Improve text extraction logic for searching

This commit is contained in:
Dane Everitt 2022-03-12 14:36:00 -05:00
parent e15e1572aa
commit 95183edffd
No known key found for this signature in database
GPG Key ID: EEA66103B3D71F53
4 changed files with 128 additions and 0 deletions

View File

@ -0,0 +1,45 @@
import extractSearchFilters from '@/helpers/extractSearchFilters';
type TestCase = [ string, 0 | Record<string, string[]> ];
describe('@/helpers/extractSearchFilters.ts', function () {
const _DEFAULT = 0x00;
const cases: TestCase[] = [
[ '', {} ],
[ 'hello world', _DEFAULT ],
[ 'bar:xyz foo:abc', { bar: [ 'xyz' ], foo: [ 'abc' ] } ],
[ 'hello foo:abc', { foo: [ 'abc' ] } ],
[ 'hello foo:abc world another bar:xyz hodor', { foo: [ 'abc' ], bar: [ 'xyz' ] } ],
[ 'foo:1 foo:2 foo: 3 foo:4', { foo: [ '1', '2', '4' ] } ],
[ ' foo:123 foo:bar:123 foo: foo:string', { foo: [ '123', 'bar:123', 'string' ] } ],
[ 'foo:1 bar:2 baz:3', { foo: [ '1' ], bar: [ '2' ] } ],
[ 'hello "world this" is quoted', _DEFAULT ],
[ 'hello "world foo:123 is" quoted', _DEFAULT ],
[ 'hello foo:"this is quoted" bar:"this \\"is deeply\\" quoted" world foo:another', {
foo: [ 'this is quoted', 'another' ],
bar: [ 'this "is deeply" quoted' ],
} ],
];
it.each(cases)('should return expected filters: [%s]', function (input, output) {
expect(extractSearchFilters(input, [ 'foo', 'bar' ])).toStrictEqual({
filters: output === _DEFAULT ? {
'*': [ input ],
} : output,
});
});
it('should allow modification of the default parameter', function () {
expect(extractSearchFilters('hello world', [ 'foo' ], 'default_param')).toStrictEqual({
filters: {
default_param: [ 'hello world' ],
},
});
expect(extractSearchFilters('foo:123 bar', [ 'foo' ], 'default_param')).toStrictEqual({
filters: {
foo: [ '123' ],
},
});
});
});

View File

@ -0,0 +1,40 @@
import { QueryBuilderParams } from '@/api/http';
import splitStringWhitespace from '@/helpers/splitStringWhitespace';
const extractSearchFilters = <T extends string, D extends string = string> (
str: string,
params: T[],
defaultFilter: D = '*' as D,
): QueryBuilderParams<T> | QueryBuilderParams<D> => {
const filters: Map<T, string[]> = new Map();
if (str.trim().length === 0) {
return { filters: {} };
}
for (const segment of splitStringWhitespace(str)) {
const parts = segment.split(':');
const filter = parts[0] as T;
const value = parts.slice(1).join(':');
// @ts-ignore
if (!filter || !value || !params.includes(filter)) {
continue;
}
filters.set(filter, [ ...(filters.get(filter) || []), value ]);
}
if (filters.size === 0) {
return {
filters: {
[defaultFilter]: [ str ] as Readonly<string[]>,
} as unknown as QueryBuilderParams<D>['filters'],
};
}
return {
filters: Object.fromEntries(filters) as unknown as QueryBuilderParams<T>['filters'],
};
};
export default extractSearchFilters;

View File

@ -0,0 +1,16 @@
import splitStringWhitespace from '@/helpers/splitStringWhitespace';
describe('@/helpers/splitStringWhitespace.ts', function () {
it.each([
[ '', [] ],
[ 'hello world', [ 'hello', 'world' ] ],
[ ' hello world ', [ 'hello', 'world' ] ],
[ 'hello123 world 123 $$ s ', [ 'hello123', 'world', '123', '$$', 's' ] ],
[ 'hello world! how are you?', [ 'hello', 'world!', 'how', 'are', 'you?' ] ],
[ 'hello "foo bar baz" world', [ 'hello', 'foo bar baz', 'world' ] ],
[ 'hello "foo \\"bar bar \\" baz" world', [ 'hello', 'foo "bar bar " baz', 'world' ] ],
[ 'hello "foo "bar baz" baz" world', [ 'hello', 'foo bar', 'baz baz', 'world' ] ],
])('should handle string: %s', function (input, output) {
expect(splitStringWhitespace(input)).toStrictEqual(output);
});
});

View File

@ -0,0 +1,27 @@
/**
* Takes a string and splits it into an array by whitespace, ignoring any
* text that is wrapped in quotes. You must escape quotes within a quoted
* string, otherwise it will just split on those.
*
* Derived from https://stackoverflow.com/a/46946420
*/
export default (str: string): string[] => {
let quoted = false;
const parts = [ '' ] as string[];
for (const char of (str.trim().match(/\\?.|^$/g) || [])) {
if (char === '"') {
quoted = !quoted;
} else if (!quoted && char === ' ') {
parts.push('');
} else {
parts[Math.max(parts.length - 1, 0)] += char.replace(/\\(.)/, '$1');
}
}
if (parts.length === 1 && parts[0] === '') {
return [];
}
return parts;
};