Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions packages/app/src/__tests__/searchFilters.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,70 @@ describe('searchFilters', () => {
});
});

// "Add to Filters" on a nested-JSON attribute produces a ClickHouse SQL
// expression as the filter key (JSONExtractString(...)). filtersToQuery emits
// these as SQL filters (not Lucene, which lucene.parse can't parse), and
// parseQuery must read them back so the selection survives a URL round-trip.
describe('round-trip: SQL-expression field filters (nested-JSON "Add to Filters")', () => {
const sqlExprKey =
"JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')";

it('round-trips a JSONExtractString included filter as SQL', () => {
const filters = {
[sqlExprKey]: {
included: new Set<string | boolean>(['asdf-14']),
excluded: new Set<string | boolean>(),
},
};
const query = filtersToQuery(filters);
expect(query).toEqual([
{ type: 'sql', condition: `${sqlExprKey} IN ('asdf-14')` },
]);
const parsed = parseQuery(query);
expect(parsed.filters).toEqual({
[sqlExprKey]: {
included: new Set(['asdf-14']),
excluded: new Set(),
},
});
expect(parsed.passthroughFilters).toEqual([]);
});

it('round-trips JSONExtractString included + excluded values', () => {
const filters = {
[sqlExprKey]: {
included: new Set<string | boolean>(['a', 'b']),
excluded: new Set<string | boolean>(['c']),
},
};
const query = filtersToQuery(filters);
expect(query).toEqual([
{ type: 'sql', condition: `${sqlExprKey} IN ('a', 'b')` },
{ type: 'sql', condition: `${sqlExprKey} NOT IN ('c')` },
]);
const parsed = parseQuery(query);
expect(parsed.filters).toEqual({
[sqlExprKey]: {
included: new Set(['a', 'b']),
excluded: new Set(['c']),
},
});
});

it('round-trips values with single quotes through SQL escaping', () => {
const filters = {
[sqlExprKey]: {
included: new Set<string | boolean>(["it's a test"]),
excluded: new Set<string | boolean>(),
},
};
const parsed = parseQuery(filtersToQuery(filters));
expect(parsed.filters[sqlExprKey].included).toEqual(
new Set(["it's a test"]),
);
});
});

describe('round-trip: Lucene filters', () => {
it('round-trips Map filter with single included value', () => {
const filters = {
Expand Down
89 changes: 89 additions & 0 deletions packages/common-utils/src/__tests__/filters.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,95 @@ describe('filters', () => {
{ type: 'lucene', condition: 'LogAttributes.latency:[0 TO 100]' },
]);
});

// A SQL-expression key (e.g. produced by "Add to Filters" on a nested-JSON
// attribute) cannot be represented as a Lucene field name. Emitting a
// Lucene `field:"value"` for it makes lucene.parse throw at render time, so
// filtersToQuery must emit a SQL filter instead. See HDX nested-JSON crash.
describe('SQL-expression field keys (nested-JSON "Add to Filters")', () => {
const jsonExtractKey =
"JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')";

it('emits a SQL IN filter for a JSONExtractString key', () => {
const filters = {
[jsonExtractKey]: {
included: new Set<string | boolean>(['asdf-14']),
excluded: new Set<string | boolean>(),
},
};
expect(filtersToQuery(filters)).toEqual([
{ type: 'sql', condition: `${jsonExtractKey} IN ('asdf-14')` },
]);
});

it('emits a SQL NOT IN filter for excluded values', () => {
const filters = {
[jsonExtractKey]: {
included: new Set<string | boolean>(),
excluded: new Set<string | boolean>(['asdf-14']),
},
};
expect(filtersToQuery(filters)).toEqual([
{ type: 'sql', condition: `${jsonExtractKey} NOT IN ('asdf-14')` },
]);
});

it('emits separate SQL filters for combined included/excluded', () => {
const filters = {
[jsonExtractKey]: {
included: new Set<string | boolean>(['a', 'b']),
excluded: new Set<string | boolean>(['c']),
},
};
expect(filtersToQuery(filters)).toEqual([
{ type: 'sql', condition: `${jsonExtractKey} IN ('a', 'b')` },
{ type: 'sql', condition: `${jsonExtractKey} NOT IN ('c')` },
]);
});

it('SQL-escapes single quotes and backslashes in values', () => {
const filters = {
[jsonExtractKey]: {
included: new Set<string | boolean>(["it's a\\test"]),
excluded: new Set<string | boolean>(),
},
};
expect(filtersToQuery(filters)).toEqual([
{
type: 'sql',
condition: `${jsonExtractKey} IN ('it''s a\\\\test')`,
},
]);
});

it('emits a SQL BETWEEN filter for ranges', () => {
const filters = {
[jsonExtractKey]: {
included: new Set<string | boolean>(),
excluded: new Set<string | boolean>(),
range: { min: 10, max: 500 },
},
};
expect(filtersToQuery(filters)).toEqual([
{ type: 'sql', condition: `${jsonExtractKey} BETWEEN 10 AND 500` },
]);
});

it('still emits Lucene for plain field paths', () => {
const filters = {
'LogAttributes.weird.key.payload': {
included: new Set<string | boolean>(['asdf-14']),
excluded: new Set<string | boolean>(),
},
};
expect(filtersToQuery(filters)).toEqual([
{
type: 'lucene',
condition: 'LogAttributes.weird.key.payload:"asdf-14"',
},
]);
});
});
});

describe('parseLuceneFilter', () => {
Expand Down
62 changes: 62 additions & 0 deletions packages/common-utils/src/__tests__/nestedJsonAddToFilters.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { filtersToQuery } from '@/filters';
import { parse } from '@/queryParser';

/**
* Regression coverage for the nested-JSON "Add to Filters" crash.
*
* "Add to Filters" on an attribute reached through a parsed JSON string column
* (e.g. `weird.key.payload` holding a JSON string) builds a ClickHouse SQL
* expression as the filter field: `JSONExtractString(LogAttributes['weird.key
* .payload'], 'abc.def.jqk/abcd')`. The previous code emitted this as the field
* side of a Lucene term (`<expr>:"value"`), which `lucene.parse` cannot parse —
* it opens a range expression at the `[` after `LogAttributes` and throws
* `SyntaxError: Expected ".", "TO", ... but "]" found.` at chart render time.
*
* `filtersToQuery` now detects SQL-expression field keys and emits a SQL filter
* instead, so the broken Lucene string is never produced.
*/
describe('nested-JSON "Add to Filters" does not produce unparseable Lucene', () => {
const sqlExprKey =
"JSONExtractString(LogAttributes['weird.key.payload'], 'abc.def.jqk/abcd')";

it('locks in the bug: the old Lucene condition throws when parsed', () => {
// The unescaped `[`/`]`/`(`/`)` make the Lucene PEG parser open a range
// expression after `LogAttributes` and fail.
const oldLuceneCondition = `${sqlExprKey}:"asdf-14"`;
expect(() => parse(oldLuceneCondition)).toThrow();
});

it('minimal repro: a bare `field:[term]` (no TO) throws', () => {
expect(() => parse('key:[foo]')).toThrow();
});

it('filtersToQuery now emits a SQL filter (never handed to lucene.parse)', () => {
const filters = filtersToQuery({
[sqlExprKey]: {
included: new Set<string | boolean>(['asdf-14']),
excluded: new Set<string | boolean>(),
},
});

expect(filters).toEqual([
{ type: 'sql', condition: `${sqlExprKey} IN ('asdf-14')` },
]);
});

it('any Lucene filter the handler still emits parses successfully', () => {
// Plain field paths keep using Lucene; whatever Lucene condition the
// handler emits must parse without throwing.
const filters = filtersToQuery({
'LogAttributes.weird.key.payload': {
included: new Set<string | boolean>(['asdf-14']),
excluded: new Set<string | boolean>(),
},
});

for (const filter of filters) {
if (filter.type === 'lucene' && 'condition' in filter) {
expect(() => parse(filter.condition)).not.toThrow();
}
}
});
});
74 changes: 73 additions & 1 deletion packages/common-utils/src/filters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,32 @@ const escapeLuceneQuotedTerm = (s: string) => {
return s.replace(/\\/g, '\\\\').replace(/"/g, '\\"');
};

/**
* Escape a value for use inside a single-quoted SQL string literal. Backslashes
* are doubled and single quotes are SQL-escaped (`''`). This is the inverse of
* the unescaping `getBooleanOrUnquotedString` performs in the app's
* `parseQuery`, so SQL filters emitted here round-trip back into FilterState.
*/
const escapeSqlString = (s: string): string =>
s.replace(/\\/g, '\\\\').replace(/'/g, "''");

/** Render a FilterState value as a SQL literal (quoted string or bare boolean). */
const toSqlLiteral = (v: string | boolean): string =>
typeof v === 'boolean' ? String(v) : `'${escapeSqlString(v)}'`;

/**
* A FilterState key is a raw ClickHouse expression (e.g. a `JSONExtractString(
* LogAttributes['k.v'], 'p')` produced by "Add to Filters" on a nested-JSON
* attribute) rather than a plain field path whenever it contains characters
* that cannot appear in a Lucene field name — parentheses, brackets, quotes, or
* whitespace. Emitting a Lucene `field:"value"` for such a key makes
* `lucene.parse` throw at render time (HDX nested-JSON "Add to Filters" crash),
* so we emit a SQL filter instead; the raw expression is already valid SQL and
* flows straight into the WHERE clause.
*/
const isSqlExpressionField = (field: string): boolean =>
/[()[\]{}"'\s]/.test(field);

/**
* Escape backslashes and colons so the field name survives Lucene parsing.
* Map sub-keys can legitimately contain `:` (e.g. `LogAttributes['foo:bar']`
Expand All @@ -37,6 +63,42 @@ const escapeLuceneQuotedTerm = (s: string) => {
const escapeLuceneFieldName = (key: string): string =>
key.replace(/\\/g, '\\\\').replace(/:/g, '\\:');

/**
* Build SQL `IN` / `NOT IN` / `BETWEEN` filters for a single field whose key is
* a raw ClickHouse expression. The emitted shape matches what the app's
* `parseQuery`/`extractInClauses` reads back, so these filters round-trip.
*/
const filterStateValuesToSql = (
column: string,
values: FilterState[string],
): Filter[] => {
const conditions: Filter[] = [];

if (values.included.size > 0) {
conditions.push({
type: 'sql' as const,
condition: `${column} IN (${Array.from(values.included)
.map(toSqlLiteral)
.join(', ')})`,
});
}
if (values.excluded.size > 0) {
conditions.push({
type: 'sql' as const,
condition: `${column} NOT IN (${Array.from(values.excluded)
.map(toSqlLiteral)
.join(', ')})`,
});
}
if (values.range != null) {
conditions.push({
type: 'sql' as const,
condition: `${column} BETWEEN ${values.range.min} AND ${values.range.max}`,
});
}
return conditions;
};

export const filtersToQuery = (filters: FilterState): Filter[] => {
return Object.entries(filters)
.filter(
Expand All @@ -46,8 +108,18 @@ export const filtersToQuery = (filters: FilterState): Filter[] => {
values.range != null,
)
.flatMap(([key, values]) => {
const normalizedField = parseKeyPath(key).join('.');

// Raw SQL expressions (e.g. JSONExtractString(...) from nested-JSON "Add
// to Filters") can't be represented as a Lucene field name without
// breaking lucene.parse, so emit SQL filters for them instead. The
// original `key` already holds the valid ClickHouse expression.
if (isSqlExpressionField(normalizedField)) {
return filterStateValuesToSql(key, values);
}

const conditions: Filter[] = [];
const luceneField = escapeLuceneFieldName(parseKeyPath(key).join('.'));
const luceneField = escapeLuceneFieldName(normalizedField);

if (values.included.size > 0) {
const terms = Array.from(values.included).map(
Expand Down
Loading