From 67a044b6fbe486eafb03773b251090bb85cd3f92 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 2 Jun 2026 05:20:15 +0000 Subject: [PATCH 1/2] fix(common-utils): emit SQL filter for SQL-expression field keys "Add to Filters" on a nested-JSON attribute builds a ClickHouse SQL expression (e.g. JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')) as the filter field. filtersToQuery previously emitted this as the field side of a Lucene term, producing a query that lucene.parse cannot parse (opens a range at the '[' and throws SyntaxError), crashing the search page at chart render time. Detect raw SQL-expression keys (containing parens/brackets/quotes/ whitespace that cannot appear in a Lucene field name) and emit a SQL IN/NOT IN/BETWEEN filter instead, which flows straight into the WHERE clause and round-trips through parseQuery/extractInClauses. Plain field paths keep using Lucene. Co-authored-by: Mike Shi --- .../src/__tests__/filters.test.ts | 89 +++++++++++++++++++ .../__tests__/nestedJsonAddToFilters.test.ts | 62 +++++++++++++ packages/common-utils/src/filters.ts | 74 ++++++++++++++- 3 files changed, 224 insertions(+), 1 deletion(-) create mode 100644 packages/common-utils/src/__tests__/nestedJsonAddToFilters.test.ts diff --git a/packages/common-utils/src/__tests__/filters.test.ts b/packages/common-utils/src/__tests__/filters.test.ts index 9ba03e7c30..c2c6942e03 100644 --- a/packages/common-utils/src/__tests__/filters.test.ts +++ b/packages/common-utils/src/__tests__/filters.test.ts @@ -192,6 +192,95 @@ describe('filters', () => { { type: 'lucene', condition: 'LogAttributes.latency:[0 TO 100]' }, ]); }); + + // A SQL-expression key (e.g. produced by "Add to Filters" on a nested-JSON + // attribute) cannot be represented as a Lucene field name. Emitting a + // Lucene `field:"value"` for it makes lucene.parse throw at render time, so + // filtersToQuery must emit a SQL filter instead. See HDX nested-JSON crash. + describe('SQL-expression field keys (nested-JSON "Add to Filters")', () => { + const jsonExtractKey = + "JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')"; + + it('emits a SQL IN filter for a JSONExtractString key', () => { + const filters = { + [jsonExtractKey]: { + included: new Set(['asdf-14']), + excluded: new Set(), + }, + }; + expect(filtersToQuery(filters)).toEqual([ + { type: 'sql', condition: `${jsonExtractKey} IN ('asdf-14')` }, + ]); + }); + + it('emits a SQL NOT IN filter for excluded values', () => { + const filters = { + [jsonExtractKey]: { + included: new Set(), + excluded: new Set(['asdf-14']), + }, + }; + expect(filtersToQuery(filters)).toEqual([ + { type: 'sql', condition: `${jsonExtractKey} NOT IN ('asdf-14')` }, + ]); + }); + + it('emits separate SQL filters for combined included/excluded', () => { + const filters = { + [jsonExtractKey]: { + included: new Set(['a', 'b']), + excluded: new Set(['c']), + }, + }; + expect(filtersToQuery(filters)).toEqual([ + { type: 'sql', condition: `${jsonExtractKey} IN ('a', 'b')` }, + { type: 'sql', condition: `${jsonExtractKey} NOT IN ('c')` }, + ]); + }); + + it('SQL-escapes single quotes and backslashes in values', () => { + const filters = { + [jsonExtractKey]: { + included: new Set(["it's a\\test"]), + excluded: new Set(), + }, + }; + expect(filtersToQuery(filters)).toEqual([ + { + type: 'sql', + condition: `${jsonExtractKey} IN ('it''s a\\\\test')`, + }, + ]); + }); + + it('emits a SQL BETWEEN filter for ranges', () => { + const filters = { + [jsonExtractKey]: { + included: new Set(), + excluded: new Set(), + range: { min: 10, max: 500 }, + }, + }; + expect(filtersToQuery(filters)).toEqual([ + { type: 'sql', condition: `${jsonExtractKey} BETWEEN 10 AND 500` }, + ]); + }); + + it('still emits Lucene for plain field paths', () => { + const filters = { + 'LogAttributes.weird.key.payload': { + included: new Set(['asdf-14']), + excluded: new Set(), + }, + }; + expect(filtersToQuery(filters)).toEqual([ + { + type: 'lucene', + condition: 'LogAttributes.weird.key.payload:"asdf-14"', + }, + ]); + }); + }); }); describe('parseLuceneFilter', () => { diff --git a/packages/common-utils/src/__tests__/nestedJsonAddToFilters.test.ts b/packages/common-utils/src/__tests__/nestedJsonAddToFilters.test.ts new file mode 100644 index 0000000000..6fed350408 --- /dev/null +++ b/packages/common-utils/src/__tests__/nestedJsonAddToFilters.test.ts @@ -0,0 +1,62 @@ +import { filtersToQuery } from '@/filters'; +import { parse } from '@/queryParser'; + +/** + * Regression coverage for the nested-JSON "Add to Filters" crash. + * + * "Add to Filters" on an attribute reached through a parsed JSON string column + * (e.g. `weird.key.payload` holding a JSON string) builds a ClickHouse SQL + * expression as the filter field: `JSONExtractString(LogAttributes['weird.key + * .payload'], 'abc.def.jqk/abcd')`. The previous code emitted this as the field + * side of a Lucene term (`:"value"`), which `lucene.parse` cannot parse — + * it opens a range expression at the `[` after `LogAttributes` and throws + * `SyntaxError: Expected ".", "TO", ... but "]" found.` at chart render time. + * + * `filtersToQuery` now detects SQL-expression field keys and emits a SQL filter + * instead, so the broken Lucene string is never produced. + */ +describe('nested-JSON "Add to Filters" does not produce unparseable Lucene', () => { + const sqlExprKey = + "JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')"; + + it('locks in the bug: the old Lucene condition throws when parsed', () => { + // The unescaped `[`/`]`/`(`/`)` make the Lucene PEG parser open a range + // expression after `LogAttributes` and fail. + const oldLuceneCondition = `${sqlExprKey}:"asdf-14"`; + expect(() => parse(oldLuceneCondition)).toThrow(); + }); + + it('minimal repro: a bare `field:[term]` (no TO) throws', () => { + expect(() => parse('key:[foo]')).toThrow(); + }); + + it('filtersToQuery now emits a SQL filter (never handed to lucene.parse)', () => { + const filters = filtersToQuery({ + [sqlExprKey]: { + included: new Set(['asdf-14']), + excluded: new Set(), + }, + }); + + expect(filters).toEqual([ + { type: 'sql', condition: `${sqlExprKey} IN ('asdf-14')` }, + ]); + }); + + it('any Lucene filter the handler still emits parses successfully', () => { + // Plain field paths keep using Lucene; whatever Lucene condition the + // handler emits must parse without throwing. + const filters = filtersToQuery({ + 'LogAttributes.weird.key.payload': { + included: new Set(['asdf-14']), + excluded: new Set(), + }, + }); + + for (const filter of filters) { + if (filter.type === 'lucene' && 'condition' in filter) { + expect(() => parse(filter.condition)).not.toThrow(); + } + } + }); +}); diff --git a/packages/common-utils/src/filters.ts b/packages/common-utils/src/filters.ts index e5ded218a4..758c3d6f40 100644 --- a/packages/common-utils/src/filters.ts +++ b/packages/common-utils/src/filters.ts @@ -24,6 +24,32 @@ const escapeLuceneQuotedTerm = (s: string) => { return s.replace(/\\/g, '\\\\').replace(/"/g, '\\"'); }; +/** + * Escape a value for use inside a single-quoted SQL string literal. Backslashes + * are doubled and single quotes are SQL-escaped (`''`). This is the inverse of + * the unescaping `getBooleanOrUnquotedString` performs in the app's + * `parseQuery`, so SQL filters emitted here round-trip back into FilterState. + */ +const escapeSqlString = (s: string): string => + s.replace(/\\/g, '\\\\').replace(/'/g, "''"); + +/** Render a FilterState value as a SQL literal (quoted string or bare boolean). */ +const toSqlLiteral = (v: string | boolean): string => + typeof v === 'boolean' ? String(v) : `'${escapeSqlString(v)}'`; + +/** + * A FilterState key is a raw ClickHouse expression (e.g. a `JSONExtractString( + * LogAttributes['k.v'], 'p')` produced by "Add to Filters" on a nested-JSON + * attribute) rather than a plain field path whenever it contains characters + * that cannot appear in a Lucene field name — parentheses, brackets, quotes, or + * whitespace. Emitting a Lucene `field:"value"` for such a key makes + * `lucene.parse` throw at render time (HDX nested-JSON "Add to Filters" crash), + * so we emit a SQL filter instead; the raw expression is already valid SQL and + * flows straight into the WHERE clause. + */ +const isSqlExpressionField = (field: string): boolean => + /[()[\]{}"'\s]/.test(field); + /** * Escape backslashes and colons so the field name survives Lucene parsing. * Map sub-keys can legitimately contain `:` (e.g. `LogAttributes['foo:bar']` @@ -37,6 +63,42 @@ const escapeLuceneQuotedTerm = (s: string) => { const escapeLuceneFieldName = (key: string): string => key.replace(/\\/g, '\\\\').replace(/:/g, '\\:'); +/** + * Build SQL `IN` / `NOT IN` / `BETWEEN` filters for a single field whose key is + * a raw ClickHouse expression. The emitted shape matches what the app's + * `parseQuery`/`extractInClauses` reads back, so these filters round-trip. + */ +const filterStateValuesToSql = ( + column: string, + values: FilterState[string], +): Filter[] => { + const conditions: Filter[] = []; + + if (values.included.size > 0) { + conditions.push({ + type: 'sql' as const, + condition: `${column} IN (${Array.from(values.included) + .map(toSqlLiteral) + .join(', ')})`, + }); + } + if (values.excluded.size > 0) { + conditions.push({ + type: 'sql' as const, + condition: `${column} NOT IN (${Array.from(values.excluded) + .map(toSqlLiteral) + .join(', ')})`, + }); + } + if (values.range != null) { + conditions.push({ + type: 'sql' as const, + condition: `${column} BETWEEN ${values.range.min} AND ${values.range.max}`, + }); + } + return conditions; +}; + export const filtersToQuery = (filters: FilterState): Filter[] => { return Object.entries(filters) .filter( @@ -46,8 +108,18 @@ export const filtersToQuery = (filters: FilterState): Filter[] => { values.range != null, ) .flatMap(([key, values]) => { + const normalizedField = parseKeyPath(key).join('.'); + + // Raw SQL expressions (e.g. JSONExtractString(...) from nested-JSON "Add + // to Filters") can't be represented as a Lucene field name without + // breaking lucene.parse, so emit SQL filters for them instead. The + // original `key` already holds the valid ClickHouse expression. + if (isSqlExpressionField(normalizedField)) { + return filterStateValuesToSql(key, values); + } + const conditions: Filter[] = []; - const luceneField = escapeLuceneFieldName(parseKeyPath(key).join('.')); + const luceneField = escapeLuceneFieldName(normalizedField); if (values.included.size > 0) { const terms = Array.from(values.included).map( From d30f97162bc017f2a27743d35f7e1449e0bce08f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 2 Jun 2026 05:23:04 +0000 Subject: [PATCH 2/2] test(app): round-trip SQL-expression filters through filtersToQuery/parseQuery Verify nested-JSON "Add to Filters" SQL filters (JSONExtractString IN (...)) survive the filtersToQuery -> parseQuery URL round-trip, including single-quote SQL escaping and combined included/excluded values. Co-authored-by: Mike Shi --- .../app/src/__tests__/searchFilters.test.ts | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/packages/app/src/__tests__/searchFilters.test.ts b/packages/app/src/__tests__/searchFilters.test.ts index 1d30fc7314..ca55adb59d 100644 --- a/packages/app/src/__tests__/searchFilters.test.ts +++ b/packages/app/src/__tests__/searchFilters.test.ts @@ -636,6 +636,70 @@ describe('searchFilters', () => { }); }); + // "Add to Filters" on a nested-JSON attribute produces a ClickHouse SQL + // expression as the filter key (JSONExtractString(...)). filtersToQuery emits + // these as SQL filters (not Lucene, which lucene.parse can't parse), and + // parseQuery must read them back so the selection survives a URL round-trip. + describe('round-trip: SQL-expression field filters (nested-JSON "Add to Filters")', () => { + const sqlExprKey = + "JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')"; + + it('round-trips a JSONExtractString included filter as SQL', () => { + const filters = { + [sqlExprKey]: { + included: new Set(['asdf-14']), + excluded: new Set(), + }, + }; + const query = filtersToQuery(filters); + expect(query).toEqual([ + { type: 'sql', condition: `${sqlExprKey} IN ('asdf-14')` }, + ]); + const parsed = parseQuery(query); + expect(parsed.filters).toEqual({ + [sqlExprKey]: { + included: new Set(['asdf-14']), + excluded: new Set(), + }, + }); + expect(parsed.passthroughFilters).toEqual([]); + }); + + it('round-trips JSONExtractString included + excluded values', () => { + const filters = { + [sqlExprKey]: { + included: new Set(['a', 'b']), + excluded: new Set(['c']), + }, + }; + const query = filtersToQuery(filters); + expect(query).toEqual([ + { type: 'sql', condition: `${sqlExprKey} IN ('a', 'b')` }, + { type: 'sql', condition: `${sqlExprKey} NOT IN ('c')` }, + ]); + const parsed = parseQuery(query); + expect(parsed.filters).toEqual({ + [sqlExprKey]: { + included: new Set(['a', 'b']), + excluded: new Set(['c']), + }, + }); + }); + + it('round-trips values with single quotes through SQL escaping', () => { + const filters = { + [sqlExprKey]: { + included: new Set(["it's a test"]), + excluded: new Set(), + }, + }; + const parsed = parseQuery(filtersToQuery(filters)); + expect(parsed.filters[sqlExprKey].included).toEqual( + new Set(["it's a test"]), + ); + }); + }); + describe('round-trip: Lucene filters', () => { it('round-trips Map filter with single included value', () => { const filters = {