Skip to content

Commit

Permalink
Change the searchbox search algorithm to the Dice-Sørensen coefficient
Browse files Browse the repository at this point in the history
  • Loading branch information
Dlurak committed Sep 29, 2024
1 parent 117ef06 commit 405d9ef
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 43 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"canvas": "^2.11.2",
"canvg": "^4.0.2",
"date-fns": "^3.6.0",
"dice-coefficient": "^2.1.1",
"image-size": "^1.1.1",
"isomorphic-unfetch": "^4.0.2",
"isomorphic-xml2js": "^0.1.3",
Expand Down
77 changes: 50 additions & 27 deletions src/components/SearchBox/options/preset.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
import match from 'autosuggest-highlight/match';
import sum from 'lodash/sum';
import orderBy from 'lodash/orderBy';
import groupBy from 'lodash/groupBy';
import { diceCoefficient } from 'dice-coefficient';
import FolderIcon from '@mui/icons-material/Folder';
import { Grid, Typography } from '@mui/material';
import React from 'react';
Expand Down Expand Up @@ -61,47 +65,66 @@ type PresetOptions = Promise<{
after: PresetOption[];
}>;

export const getPresetOptions = async (inputValue: string): PresetOptions => {
export const getPresetOptions = async (
inputValue: string,
threshold = 0.3,
): PresetOptions => {
if (inputValue.length <= 2) {
return { before: [], after: [] };
}

const results = (await getPresetsForSearch()).map((preset) => {
const name = num(preset.name, inputValue) * 10;
const textsByOne = preset.texts.map((term) => num(term, inputValue));
const sum = name + textsByOne.reduce((a, b) => a + b, 0);
return { name, textsByOne, sum, presetForSearch: preset };
const presets = await getPresetsForSearch();
const rawResults = presets.map((preset) => {
const nameSimilarity = diceCoefficient(preset.name, inputValue);
const textsByOneSimilarity = preset.texts.map((term) =>
diceCoefficient(term, inputValue),
);
return {
nameSimilarity,
textsByOneSimilarity,
sum: nameSimilarity * 10 + sum(textsByOneSimilarity),
presetForSearch: preset,
};
});
const grouped = groupBy(rawResults, ({ sum, nameSimilarity }) => {
if (nameSimilarity > threshold) {
return 'name';
}
if (nameSimilarity === 0 && sum > threshold) {
return 'rest';
}
});

const nameMatches = results
.filter((result) => result.name > 0)
.map((result) => ({
type: 'preset' as const,
preset: result,
}));

const rest = results
.filter((result) => result.name === 0 && result.sum > 0)
.map((result) => ({
type: 'preset' as const,
preset: result,
}));

const allResults = [...nameMatches, ...rest];
const allResults = [
...orderBy(grouped.name, ({ sum }) => sum, 'desc'),
...orderBy(grouped.rest, ({ sum }) => sum, 'desc'),
].map((result) => ({
type: 'preset' as const,
preset: result,
}));
const before = allResults.slice(0, 2);
const after = allResults.slice(2);

return { before, after };
};

const getAdditionalText = (preset: PresetOption['preset']) => {
const { textsByOneSimilarity } = preset;
const highestMatching = Math.max(...textsByOneSimilarity);

if (preset.nameSimilarity >= highestMatching) {
return '';
}

const { texts } = preset.presetForSearch;
const matchingIndex = textsByOneSimilarity.indexOf(highestMatching);
const matchingText = texts[matchingIndex];
return ` (${matchingText}…)`;
};

export const renderPreset = ({ preset }: PresetOption, inputValue: string) => {
const { name } = preset.presetForSearch;
const additionalText =
preset.name === 0
? ` (${preset.presetForSearch.texts.find(
(_, idx) => preset.textsByOne[idx] > 0,
)}…)`
: '';
const additionalText = getAdditionalText(preset);

return (
<>
Expand Down
26 changes: 12 additions & 14 deletions src/components/SearchBox/options/stars.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,26 @@ import React from 'react';
import { getHumanDistance, IconPart } from '../utils';
import type { Star } from '../../utils/StarsContext';
import { StarOption } from '../types';
import match from 'autosuggest-highlight/match';
import { LonLat } from '../../../services/types';
import { diceCoefficient } from 'dice-coefficient';

export const getStarsOptions = (
stars: Star[],
inputValue: string,
): StarOption[] => {
const ratedStars = sortBy(
stars
.map((star) => ({
star,
// TODO matching is not optimal, maybe Sørensen–Dice coefficient
// https://www.npmjs.com/package/dice-coefficient
matching:
inputValue === ''
? Infinity
: match(star.label, inputValue, {
insideWords: true,
findAllOccurrences: true,
}).length,
}))
.filter(({ matching }) => matching > 0),
.map((star) => {
if (inputValue === '') {
return { star, matching: Infinity };
}
const matching = diceCoefficient(star.label, inputValue);
return {
star,
matching,
};
})
.filter(({ matching }) => matching > 0.2),
({ matching }) => matching,
);
return ratedStars.map(({ star }) => ({ type: 'star', star }));
Expand Down
4 changes: 2 additions & 2 deletions src/components/SearchBox/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ export type OverpassOption = GenericOption<
export type PresetOption = GenericOption<
'preset',
{
name: number;
textsByOne: number[];
nameSimilarity: number;
textsByOneSimilarity: number[];
sum: number;
presetForSearch: {
key: string;
Expand Down
12 changes: 12 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3142,6 +3142,13 @@ detect-node@^2.0.4, detect-node@^2.1.0:
resolved "https://registry.yarnpkg.com/detect-node/-/detect-node-2.1.0.tgz#c9c70775a49c3d03bc2c06d9a73be550f978f8b1"
integrity sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g==

dice-coefficient@^2.1.1:
version "2.1.1"
resolved "https://registry.yarnpkg.com/dice-coefficient/-/dice-coefficient-2.1.1.tgz#d8ebb51f021ab6069994e7ef36842184771f616f"
integrity sha512-vPTcHmOQAuGvU6eyBtj7QCBwDJh2I7QpbBU51lbgfv7592KjBl6dm0baRBSh9ekt2X91MNAz7OpJrXCIUtDzlw==
dependencies:
n-gram "^2.0.0"

diff-sequences@^29.6.3:
version "29.6.3"
resolved "https://registry.yarnpkg.com/diff-sequences/-/diff-sequences-29.6.3.tgz#4deaf894d11407c51efc8418012f9e70b84ea921"
Expand Down Expand Up @@ -5915,6 +5922,11 @@ murmurhash-js@^1.0.0:
resolved "https://registry.yarnpkg.com/murmurhash-js/-/murmurhash-js-1.0.0.tgz#b06278e21fc6c37fa5313732b0412bcb6ae15f51"
integrity sha1-sGJ44h/Gw3+lMTcysEEry2rhX1E=

n-gram@^2.0.0:
version "2.0.2"
resolved "https://registry.yarnpkg.com/n-gram/-/n-gram-2.0.2.tgz#e544a7dffefc49c22d898b2f491e787941b3a2ba"
integrity sha512-S24aGsn+HLBxUGVAUFOwGpKs7LBcG4RudKU//eWzt/mQ97/NMKQxDWHyHx63UNWk/OOdihgmzoETn1tf5nQDzQ==

nan@^2.17.0:
version "2.20.0"
resolved "https://registry.yarnpkg.com/nan/-/nan-2.20.0.tgz#08c5ea813dd54ed16e5bd6505bf42af4f7838ca3"
Expand Down

0 comments on commit 405d9ef

Please sign in to comment.