Improve text extraction logic for searching

2022-03-12 14:36:00 -05:00 · 2022-03-12 14:36:00 -05:00 · 95183edffd
parent e15e1572aa
commit 95183edffd
4 changed files with 128 additions and 0 deletions
--- a/resources/scripts/helpers/extractSearchFilters.spec.ts
+++ b/resources/scripts/helpers/extractSearchFilters.spec.ts
@ -0,0 +1,45 @@
 import extractSearchFilters from '@/helpers/extractSearchFilters';
 type TestCase = [ string, 0 | Record<string, string[]> ];
 describe('@/helpers/extractSearchFilters.ts', function () {
    const _DEFAULT = 0x00;
    const cases: TestCase[] = [
        [ '', {} ],
        [ 'hello world', _DEFAULT ],
        [ 'bar:xyz foo:abc', { bar: [ 'xyz' ], foo: [ 'abc' ] } ],
        [ 'hello foo:abc', { foo: [ 'abc' ] } ],
        [ 'hello foo:abc world another bar:xyz hodor', { foo: [ 'abc' ], bar: [ 'xyz' ] } ],
        [ 'foo:1 foo:2 foo: 3 foo:4', { foo: [ '1', '2', '4' ] } ],
        [ ' foo:123 foo:bar:123 foo: foo:string', { foo: [ '123', 'bar:123', 'string' ] } ],
        [ 'foo:1 bar:2 baz:3', { foo: [ '1' ], bar: [ '2' ] } ],
        [ 'hello "world this" is quoted', _DEFAULT ],
        [ 'hello "world foo:123 is" quoted', _DEFAULT ],
        [ 'hello foo:"this is quoted" bar:"this \\"is deeply\\" quoted" world foo:another', {
            foo: [ 'this is quoted', 'another' ],
            bar: [ 'this "is deeply" quoted' ],
        } ],
    ];
    it.each(cases)('should return expected filters: [%s]', function (input, output) {
        expect(extractSearchFilters(input, [ 'foo', 'bar' ])).toStrictEqual({
            filters: output === _DEFAULT ? {
                '*': [ input ],
            } : output,
        });
    });
    it('should allow modification of the default parameter', function () {
        expect(extractSearchFilters('hello world', [ 'foo' ], 'default_param')).toStrictEqual({
            filters: {
                default_param: [ 'hello world' ],
            },
        });
        expect(extractSearchFilters('foo:123 bar', [ 'foo' ], 'default_param')).toStrictEqual({
            filters: {
                foo: [ '123' ],
            },
        });
    });
 });
--- a/resources/scripts/helpers/extractSearchFilters.ts
+++ b/resources/scripts/helpers/extractSearchFilters.ts
@ -0,0 +1,40 @@
 import { QueryBuilderParams } from '@/api/http';
 import splitStringWhitespace from '@/helpers/splitStringWhitespace';
 const extractSearchFilters = <T extends string, D extends string = string> (
    str: string,
    params: T[],
    defaultFilter: D = '*' as D,
 ): QueryBuilderParams<T> | QueryBuilderParams<D> => {
    const filters: Map<T, string[]> = new Map();
    if (str.trim().length === 0) {
        return { filters: {} };
    }
    for (const segment of splitStringWhitespace(str)) {
        const parts = segment.split(':');
        const filter = parts[0] as T;
        const value = parts.slice(1).join(':');
        // @ts-ignore
        if (!filter || !value || !params.includes(filter)) {
            continue;
        }
        filters.set(filter, [ ...(filters.get(filter) || []), value ]);
    }
    if (filters.size === 0) {
        return {
            filters: {
                [defaultFilter]: [ str ] as Readonly<string[]>,
            } as unknown as QueryBuilderParams<D>['filters'],
        };
    }
    return {
        filters: Object.fromEntries(filters) as unknown as QueryBuilderParams<T>['filters'],
    };
 };
 export default extractSearchFilters;
--- a/resources/scripts/helpers/splitStringWhitespace.spec.ts
+++ b/resources/scripts/helpers/splitStringWhitespace.spec.ts
@ -0,0 +1,16 @@
 import splitStringWhitespace from '@/helpers/splitStringWhitespace';
 describe('@/helpers/splitStringWhitespace.ts', function () {
    it.each([
        [ '', [] ],
        [ 'hello world', [ 'hello', 'world' ] ],
        [ '   hello world ', [ 'hello', 'world' ] ],
        [ 'hello123 world 123 $$ s ', [ 'hello123', 'world', '123', '$$', 's' ] ],
        [ 'hello world! how are you?', [ 'hello', 'world!', 'how', 'are', 'you?' ] ],
        [ 'hello "foo bar baz" world', [ 'hello', 'foo bar baz', 'world' ] ],
        [ 'hello "foo \\"bar bar \\" baz" world', [ 'hello', 'foo "bar bar " baz', 'world' ] ],
        [ 'hello "foo "bar baz" baz" world', [ 'hello', 'foo bar', 'baz baz', 'world' ] ],
    ])('should handle string: %s', function (input, output) {
        expect(splitStringWhitespace(input)).toStrictEqual(output);
    });
 });
--- a/resources/scripts/helpers/splitStringWhitespace.ts
+++ b/resources/scripts/helpers/splitStringWhitespace.ts
@ -0,0 +1,27 @@
 /**
 * Takes a string and splits it into an array by whitespace, ignoring any
 * text that is wrapped in quotes. You must escape quotes within a quoted
 * string, otherwise it will just split on those.
 *
 * Derived from https://stackoverflow.com/a/46946420
 */
 export default (str: string): string[] => {
    let quoted = false;
    const parts = [ '' ] as string[];
    for (const char of (str.trim().match(/\\?.|^$/g) || [])) {
        if (char === '"') {
            quoted = !quoted;
        } else if (!quoted && char === ' ') {
            parts.push('');
        } else {
            parts[Math.max(parts.length - 1, 0)] += char.replace(/\\(.)/, '$1');
        }
    }
    if (parts.length === 1 && parts[0] === '') {
        return [];
    }
    return parts;
 };