Grafana/data: Extract fuzzy search core (#107110)

* Move fuzzy search to grafana/data

* Move @leeoniya/ufuzzy package

* Cleanup

* Use exact version

* mark export as internal
pull/106335/merge
Alex Khomenko 2 days ago committed by GitHub
parent 24884154dc
commit 3d1b820827
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      packages/grafana-data/package.json
  2. 3
      packages/grafana-data/src/index.ts
  3. 103
      packages/grafana-data/src/utils/fuzzySearch.test.ts
  4. 63
      packages/grafana-data/src/utils/fuzzySearch.ts
  5. 1
      packages/grafana-ui/package.json
  6. 74
      packages/grafana-ui/src/components/Combobox/filter.test.ts
  7. 74
      packages/grafana-ui/src/components/Combobox/filter.ts
  8. 2
      yarn.lock

@ -57,6 +57,7 @@
"dependencies": {
"@braintree/sanitize-url": "7.0.1",
"@grafana/schema": "12.1.0-pre",
"@leeoniya/ufuzzy": "1.0.18",
"@types/d3-interpolate": "^3.0.0",
"@types/string-hash": "1.1.3",
"@types/systemjs": "6.15.1",

@ -258,8 +258,9 @@ export * as arrayUtils from './utils/arrayUtils';
export { store, Store } from './utils/store';
export { LocalStorageValueProvider } from './utils/LocalStorageValueProvider';
export { throwIfAngular } from './utils/throwIfAngular';
export { fuzzySearch } from './utils/fuzzySearch';
// Tranformations
// Transformations
export { standardTransformers } from './transformations/transformers';
export {
fieldMatchers,

@ -0,0 +1,103 @@
import { fuzzySearch } from './fuzzySearch';
describe('fuzzySearch', () => {
it('should return all indices when needle is empty', () => {
const haystack = ['A', 'B', 'C', 'D'];
const result = fuzzySearch(haystack, '');
expect(result).toEqual([0, 1, 2, 3]);
});
it('should properly rank by match quality', () => {
const haystack = ['A', 'AA', 'AB', 'AC', 'BC', 'C', 'CD'];
const needle = 'C';
const result = fuzzySearch(haystack, needle);
const matches = result.map((idx) => haystack[idx]);
expect(matches).toEqual(['C', 'CD', 'AC', 'BC']);
});
it('should handle case sensitivity and order by match quality', () => {
const haystack = [
'client_service_namespace',
'namespace',
'alert_namespace',
'container_namespace',
'Namespace',
'client_k8s_namespace_name',
'foobar',
];
const needle = 'Names';
const result = fuzzySearch(haystack, needle);
const matches = result.map((idx) => haystack[idx]);
expect(matches).toEqual([
'Namespace',
'namespace',
'alert_namespace',
'container_namespace',
'client_k8s_namespace_name',
'client_service_namespace',
]);
});
it('should do substring match when needle contains non-ascii characters', () => {
const haystack = ['A水', 'AA', 'AB', 'AC', 'BC', 'C', 'CD'];
const needle = '水';
const result = fuzzySearch(haystack, needle);
expect(result.map((idx) => haystack[idx])).toEqual(['A水']);
});
it('should handle multiple non-latin characters', () => {
const haystack = ['台灣省', '台中市', '台北市', '台南市', '南投縣', '高雄市', '台中第一高級中學'];
const needle = '南';
const result = fuzzySearch(haystack, needle);
expect(result.map((idx) => haystack[idx])).toEqual(['台南市', '南投縣']);
});
it('should do substring match when needle contains only symbols', () => {
const haystack = ['=', '<=', '>', '!~'];
const needle = '=';
const result = fuzzySearch(haystack, needle);
expect(result.map((idx) => haystack[idx])).toEqual(['=', '<=']);
});
it('should handle empty haystack', () => {
const haystack: string[] = [];
const needle = 'test';
const result = fuzzySearch(haystack, needle);
expect(result).toEqual([]);
});
it('should handle no matches', () => {
const haystack = ['apple', 'banana', 'cherry'];
const needle = 'xyz';
const result = fuzzySearch(haystack, needle);
expect(result).toEqual([]);
});
it('should return indices in the correct order', () => {
const haystack = ['zebra', 'apple', 'aardvark', 'application'];
const needle = 'app';
const result = fuzzySearch(haystack, needle);
result.forEach((index) => {
expect(haystack[index].toLowerCase()).toContain('app');
});
});
it('should handle partial matches', () => {
const haystack = ['Dashboard', 'Dashboards', 'dash-config', 'config-dash'];
const needle = 'dash';
const result = fuzzySearch(haystack, needle);
expect(result.length).toEqual(haystack.length);
result.forEach((index) => {
expect(haystack[index].toLowerCase()).toContain('dash');
});
});
});

@ -0,0 +1,63 @@
import uFuzzy from '@leeoniya/ufuzzy';
// https://catonmat.net/my-favorite-regex :)
const REGEXP_NON_ASCII = /[^ -~]/m;
// https://www.asciitable.com/
// matches only these: `~!@#$%^&*()_+-=[]\{}|;':",./<>?
const REGEXP_ONLY_SYMBOLS = /^[\x21-\x2F\x3A-\x40\x5B-\x60\x7B-\x7E]+$/m;
// limit max terms in needle that qualify for re-ordering
const outOfOrderLimit = 5;
// beyond 25 chars fall back to substring search
const maxNeedleLength = 25;
// beyond 5 terms fall back to substring match
const maxFuzzyTerms = 5;
// when number of matches <= 1e4, do ranking + sorting by quality
const rankThreshold = 1e4;
// typo tolerance mode
const uf = new uFuzzy({ intraMode: 1 });
/**
* Fuzzy search utility that returns matching indices for a given search term
* Uses intelligent fallback strategies for different types of input
* @internal
*/
export function fuzzySearch(haystack: string[], needle: string): number[] {
if (needle === '') {
return haystack.map((_, index) => index);
}
// fallback to substring matches to avoid badness
else if (
// contains non-ascii
REGEXP_NON_ASCII.test(needle) ||
// is only ascii symbols (operators)
REGEXP_ONLY_SYMBOLS.test(needle) ||
// too long (often copy-paste from somewhere)
needle.length > maxNeedleLength ||
uf.split(needle).length > maxFuzzyTerms
) {
const indices: number[] = [];
for (let i = 0; i < haystack.length; i++) {
let item = haystack[i];
if (item.includes(needle)) {
indices.push(i);
}
}
return indices;
}
// fuzzy search
else {
const [idxs, info, order] = uf.search(haystack, needle, outOfOrderLimit, rankThreshold);
if (idxs?.length) {
if (info && order) {
return order.map((idx) => info.idx[idx]);
} else {
return idxs;
}
}
}
return [];
}

@ -72,7 +72,6 @@
"@grafana/i18n": "12.1.0-pre",
"@grafana/schema": "12.1.0-pre",
"@hello-pangea/dnd": "18.0.1",
"@leeoniya/ufuzzy": "1.0.18",
"@monaco-editor/react": "4.7.0",
"@popperjs/core": "2.11.8",
"@react-aria/dialog": "3.5.27",

@ -1,74 +0,0 @@
import { fuzzyFind } from './filter';
describe('combobox filter', () => {
it('should properly rank by match quality', () => {
const needle = 'C';
const stringOptions = ['A', 'AA', 'AB', 'AC', 'BC', 'C', 'CD'];
const options = stringOptions.map((value) => ({ value }));
const matches = fuzzyFind(options, stringOptions, needle);
expect(matches.map((m) => m.value)).toEqual(['C', 'CD', 'AC', 'BC']);
});
it('orders by match quality and case sensitivty', () => {
const stringOptions = [
'client_service_namespace',
'namespace',
'alert_namespace',
'container_namespace',
'Namespace',
'client_k8s_namespace_name',
'foobar',
];
const options = stringOptions.map((value) => ({ value }));
const matches = fuzzyFind(options, stringOptions, 'Names');
expect(matches.map((m) => m.value)).toEqual([
'Namespace',
'namespace',
'alert_namespace',
'container_namespace',
'client_k8s_namespace_name',
'client_service_namespace',
]);
});
describe('non-ascii', () => {
it('should do substring match when needle is non-latin', () => {
const needle = '水';
const stringOptions = ['A水', 'AA', 'AB', 'AC', 'BC', 'C', 'CD'];
const options = stringOptions.map((value) => ({ value }));
const matches = fuzzyFind(options, stringOptions, needle);
expect(matches.map((m) => m.value)).toEqual(['A水']);
});
it('second case for non-latin characters', () => {
const stringOptions = ['台灣省', '台中市', '台北市', '台南市', '南投縣', '高雄市', '台中第一高級中學'];
const options = stringOptions.map((value) => ({ value }));
const matches = fuzzyFind(options, stringOptions, '南');
expect(matches.map((m) => m.value)).toEqual(['台南市', '南投縣']);
});
});
describe('operators', () => {
it('should do substring match when needle is only symbols', () => {
const needle = '=';
const stringOptions = ['=', '<=', '>', '!~'];
const options = stringOptions.map((value) => ({ value }));
const matches = fuzzyFind(options, stringOptions, needle);
expect(matches.map((m) => m.value)).toEqual(['=', '<=']);
});
});
});

@ -1,23 +1,6 @@
import uFuzzy from '@leeoniya/ufuzzy';
import { fuzzySearch } from '@grafana/data';
import { ALL_OPTION_VALUE, ComboboxOption } from './types';
// https://catonmat.net/my-favorite-regex :)
const REGEXP_NON_ASCII = /[^ -~]/m;
// https://www.asciitable.com/
// matches only these: `~!@#$%^&*()_+-=[]\{}|;':",./<>?
const REGEXP_ONLY_SYMBOLS = /^[\x21-\x2F\x3A-\x40\x5B-\x60\x7B-\x7E]+$/m;
// limit max terms in needle that qualify for re-ordering
const outOfOrderLimit = 5;
// beyond 25 chars fall back to substring search
const maxNeedleLength = 25;
// beyond 5 terms fall back to substring match
const maxFuzzyTerms = 5;
// when number of matches <= 1e4, do ranking + sorting by quality
const rankThreshold = 1e4;
// typo tolerance mode
const uf = new uFuzzy({ intraMode: 1 });
import { ComboboxOption } from './types';
export function itemToString<T extends string | number>(item?: ComboboxOption<T> | null) {
if (item == null) {
@ -26,60 +9,11 @@ export function itemToString<T extends string | number>(item?: ComboboxOption<T>
return item.label ?? item.value.toString();
}
//TODO: Remove when MutliCombobox async has been merged
export function itemFilter<T extends string | number>(inputValue: string) {
const lowerCasedInputValue = inputValue.toLowerCase();
return (item: ComboboxOption<T>) => {
return (
!inputValue ||
item.label?.toLowerCase().includes(lowerCasedInputValue) ||
item.value?.toString().toLowerCase().includes(lowerCasedInputValue) ||
item.value.toString() === ALL_OPTION_VALUE
);
};
}
export function fuzzyFind<T extends string | number>(
options: Array<ComboboxOption<T>>,
haystack: string[],
needle: string
) {
let matches: Array<ComboboxOption<T>> = [];
if (needle === '') {
matches = options;
}
// fallback to substring matches to avoid badness
else if (
// contains non-ascii
REGEXP_NON_ASCII.test(needle) ||
// is only ascii symbols (operators)
REGEXP_ONLY_SYMBOLS.test(needle) ||
// too long (often copy-paste from somewhere)
needle.length > maxNeedleLength ||
uf.split(needle).length > maxFuzzyTerms
) {
for (let i = 0; i < haystack.length; i++) {
let item = haystack[i];
if (item.includes(needle)) {
matches.push(options[i]);
}
}
}
// fuzzy search
else {
const [idxs, info, order] = uf.search(haystack, needle, outOfOrderLimit, rankThreshold);
if (idxs?.length) {
if (info && order) {
matches = order.map((idx) => options[info.idx[idx]]);
} else {
matches = idxs.map((idx) => options[idx]);
}
}
}
return matches;
const indices = fuzzySearch(haystack, needle);
return indices.map((idx) => options[idx]);
}

@ -3021,6 +3021,7 @@ __metadata:
"@braintree/sanitize-url": "npm:7.0.1"
"@grafana/schema": "npm:12.1.0-pre"
"@grafana/tsconfig": "npm:^2.0.0"
"@leeoniya/ufuzzy": "npm:1.0.18"
"@rollup/plugin-node-resolve": "npm:16.0.0"
"@types/d3-interpolate": "npm:^3.0.0"
"@types/history": "npm:4.7.11"
@ -3643,7 +3644,6 @@ __metadata:
"@grafana/schema": "npm:12.1.0-pre"
"@grafana/tsconfig": "npm:^2.0.0"
"@hello-pangea/dnd": "npm:18.0.1"
"@leeoniya/ufuzzy": "npm:1.0.18"
"@monaco-editor/react": "npm:4.7.0"
"@popperjs/core": "npm:2.11.8"
"@react-aria/dialog": "npm:3.5.27"

Loading…
Cancel
Save