diff --git a/README.md b/README.md index ea3eed2..5afaac7 100644 --- a/README.md +++ b/README.md @@ -10,12 +10,14 @@ filter('role = "magical_girl" AND power >= 3', { role: "magical_girl", power: 5 onesixty is a zero-dependency TypeScript implementation of [AIP-160](https://google.aip.dev/160), the filtering language used across Google APIs. Parse filter expressions into a type-safe AST, evaluate them against plain objects, or compile them into your own backend (SQL, Elasticsearch, etc.). -- **Zero dependencies,** just TypeScript, nothing else +- **Zero dependencies,** 12kB gzipped. Just TypeScript, nothing else +- **Fast.** 1M+ filter evaluations per second, 10-15x faster than alternatives - **Full AIP-160 grammar:** comparisons, `AND`/`OR`/`NOT`, field traversal, `:` (has), functions, wildcards, parentheses - **Compile once, run many:** parse a filter once, evaluate it against thousands of objects - **Async functions:** custom functions can return promises - **Serializable:** compiled filters survive `JSON.stringify` for storage and transfer - **Structured errors:** every error is a typed class with machine-readable data, not just a message string +- **Tolerant mode:** collect all errors and get a best-effort CST for editor integrations and as-you-type validation - **Bring your own backend:** use the AST directly to generate SQL, Elasticsearch queries, or anything else ## Install @@ -27,6 +29,8 @@ yarn add onesixty bun add onesixty ``` +Try it interactively: `pnpm playground` + ## Usage ### One-shot filtering @@ -71,6 +75,7 @@ filter("distance(lat, lng) < 100", coords, { Async functions work too. Use `filterAsync` or `compile().evaluateAsync()`: ```ts +// Check if the current request is authorized for a resource const f = compile("authorized(resource)"); await f.evaluateAsync(request, { @@ -96,35 +101,61 @@ const f = CompiledFilter.fromSerialized(JSON.parse(json)); f.evaluate({ status: "contracted" }); // true ``` -### Custom evaluation (SQL, Elasticsearch, etc.) +### Pipeline API -You don't need to use the built-in evaluator. Parse the filter into an AST and walk it yourself: +For advanced use cases, the full parse-transform-evaluate pipeline is exposed as separate functions: ```ts -import { parse, transform, type ASTNode } from "onesixty"; +import { parse, transform, evaluate, type ASTNode } from "onesixty"; + +// Parse and transform in two steps: string -> CST -> AST +const ast = transform(parse('status = "contracted" AND grief <= 50')); + +// Evaluate directly against an object +evaluate(ast, { status: "contracted", grief: 30 }); // true +``` +You can also skip the built-in evaluator entirely and handle evaluation through your own means with the AST. + +```ts +// Walk the AST to build a WHERE clause +const params: string[] = []; function toSQL(node: ASTNode | null): string { - if (node === null) return "1=1"; - switch (node.type) { - case "and": - return node.children.map(toSQL).join(" AND "); - case "or": - return `(${node.children.map(toSQL).join(" OR ")})`; - case "not": - return `NOT (${toSQL(node.child)})`; - case "restriction": - return node.comparable.type === "member" - ? `${node.comparable.path.join(".")} ${node.comparator} ?` - : `${node.comparable.qualifiedName}() ${node.comparator} ?`; - default: - return "1=1"; + if (!node) return "1=1"; + if (node.type === "and") return node.children.map(toSQL).join(" AND "); + if (node.type === "not") return `NOT (${toSQL(node.child)})`; + if (node.type === "restriction" && node.comparable.type === "member") { + params.push(node.arg?.type === "value" ? node.arg.value : ""); + return `${node.comparable.path.join(".")} ${node.comparator} $${params.length}`; } + return "1=1"; } -toSQL(transform(parse('status = "contracted" AND grief <= 50'))); -// "status = ? AND grief <= ?" +toSQL(ast); // "status = $1 AND grief <= $2", params: ["contracted", "50"] ``` +### Tolerant parsing + +By default, `parse` throws on the first syntax error. Pass `tolerant: true` to collect all errors and get a best-effort CST instead. This is useful for editor integrations, as-you-type validation, and anywhere you want diagnostics without aborting. + +```ts +import { parse, toCleanTree, transform } from "onesixty"; + +const result = parse("status = AND power >= 3", { tolerant: true }); + +result.ok; // false - there are errors +result.errors; // [ExpectedValueError: Expected a value after '=', found 'AND'] +result.cst; // complete CST - 'status = ' AND 'power >= 3' + +// If the tree is clean, narrow it to a strict FilterNode for evaluation +const clean = toCleanTree(result); +if (clean) { + const ast = transform(clean); +} +``` + +The tolerant parser never throws. It uses insertion-based recovery to fill in missing values with zero-width placeholders, so subsequent valid expressions are still parsed. `toCleanTree` returns `null` if any errors were found, or a strict `FilterNode` you can pass to `transform`. + ### Error handling All errors are typed classes with structured data. Catch them broadly or narrowly: @@ -147,6 +178,50 @@ Every error subclass exposes the relevant tokens, positions, and context as type --- +## Benchmarks + +Measured on a MacBook M4 Pro with Node.js 24, using `vitest bench`. All numbers are operations per second (higher is better). The comparison target is [`@tcn/aip-160`](https://www.npmjs.com/package/@tcn/aip-160), the other AIP-160 implementation on npm. + +### End-to-end: parse + evaluate + +| Expression | onesixty | @tcn/aip-160 | Ratio | +| --------------------------------------- | --------: | -----------: | ----: | +| `a = 1` | 2,285,479 | 149,225 | 15x | +| 4 restrictions with AND, has, traversal | 572,782 | 43,775 | 13x | +| OR + nested path + NOT + wildcard | 352,348 | 35,659 | 10x | +| Global text search on nested object | 1,650,356 | 335,975 | 5x | + +### Compile once, evaluate many (x100 loop) + +| Approach | onesixty | @tcn/aip-160 | Ratio | +| -------------------------- | -------: | -----------: | ----: | +| `compile()` + `evaluate()` | 144,462 | n/a | n/a | +| `filter()` (re-parse each) | 10,464 | 670 | 16x | + +### Stress tests + +| Scenario | onesixty | @tcn/aip-160 | Ratio | +| -------------------------------------- | --------: | -----------: | ----: | +| 50 chained AND restrictions | 42,447 | 2,461 | 17x | +| 32 levels of parentheses | 131,251 | 10,538 | 12x | +| Last key in 1,000-key object | 1,962,401 | 113,214 | 17x | +| Global search miss on 1,000-key object | 93,253 | 118,047 | 0.8x | +| Array fanout: 1,000 elements | 20,984 | n/a | n/a | + +### Pipeline stages (onesixty internals) + +| Stage | ops/sec | +| --------------------------- | ---------: | +| tokenize | 4,794,457 | +| parse | 1,212,612 | +| parse + transform | 1,172,134 | +| evaluate (pre-compiled AST) | 12,154,248 | +| filter (end-to-end) | 1,034,112 | + +Run the benchmarks yourself with `pnpm bench`. + +--- + ## Reference ### Filter syntax @@ -168,7 +243,7 @@ onesixty implements the full [AIP-160](https://google.aip.dev/160) grammar. See | Traversal | `user.soul_gem.city = "Mitakihara"` | Dot-separated field paths | | Functions | `cohort(request.user)` | Custom functions, qualified names (`math.abs()`) | | Parentheses | `(a OR b) AND c` | Grouping and precedence override | -| Wildcards | `name = "prod-*"` | Only in quoted strings with `=` | +| Wildcards | `name = "Mami-*"` | Only in quoted strings with `=` | **Precedence:** OR binds tighter than AND. `a AND b OR c` means `a AND (b OR c)`. @@ -201,10 +276,12 @@ The `comparable` field on restrictions is always `ASTMemberNode | ASTFunctionNod
Parse options -| Option | Type | Default | Description | -| ----------- | -------- | ------- | --------------------------------- | -| `maxDepth` | `number` | `64` | Maximum parenthesis nesting depth | -| `maxLength` | `number` | `8192` | Maximum input string length | +| Option | Type | Default | Description | +| ----------- | --------- | ------- | --------------------------------------------------------------------- | +| `maxDepth` | `number` | `64` | Maximum parenthesis nesting depth | +| `maxLength` | `number` | `8192` | Maximum input string length | +| `tolerant` | `boolean` | `false` | Collect errors instead of throwing; return type becomes `ParseResult` | +| `maxErrors` | `number` | `20` | Stop recovery after this many errors (only with `tolerant`) |
@@ -224,18 +301,6 @@ See the JSDoc on `EvaluateOptions` for full details on each option. -### Pipeline API - -For advanced use cases, the full parse-transform-evaluate pipeline is exposed as separate functions. See the JSDoc on `parse`, `transform`, and `evaluate` for details. - -```ts -import { parse, transform, evaluate } from "onesixty"; - -const cst = parse("grief <= 50"); // string -> CST -const ast = transform(cst); // CST -> AST -evaluate(ast, { grief: 30 }); // AST + object -> boolean -``` - ## License [Apache-2.0](./LICENSE) diff --git a/api/index.d.mts b/api/index.d.mts index 3345931..660c1d1 100644 --- a/api/index.d.mts +++ b/api/index.d.mts @@ -88,23 +88,11 @@ type ComparatorKind = TokenKind.Equals | TokenKind.NotEquals | TokenKind.LessTha */ declare function isComparatorKind(kind: TokenKind): kind is ComparatorKind; //#endregion -//#region src/lexer.d.ts +//#region src/errors.d.ts /** - * Tokenize an AIP-160 filter expression into a stream of tokens. - * - * @param input - The raw filter expression string. - * @returns An array of tokens, always terminated by an `EOF` token. - * - * @example - * ```ts - * tokenize('age >= 21 AND name = "Alice"'); - * // [Text("age"), GreaterEquals(">="), Text("21"), And("AND"), - * // Text("name"), Equals("="), String("Alice"), EOF] - * ``` + * Thrown when an internal assumption is violated. Indicates a bug in onesixty, + * not invalid user input. If you encounter this error, please report it. */ -declare function tokenize(input: string): Token[]; -//#endregion -//#region src/errors.d.ts declare class InvariantError extends Error { constructor(message: string); } @@ -304,41 +292,91 @@ declare class InvalidFieldTypeError extends FilterError { constructor(path: string, expected: "string" | "boolean" | "array" | "object" | "span" | "comparator"); } //#endregion +//#region src/lexer.d.ts +/** + * Result of tokenizing with `tolerant: true`. + */ +interface TokenizeResult { + /** Token stream, always terminated by an `EOF` token. */ + tokens: Token[]; + /** Lexer errors collected during tokenization. Empty on success. */ + errors: LexerError[]; +} +/** + * Tokenize an AIP-160 filter expression into a stream of tokens. + * + * @param input - The raw filter expression string. + * @returns An array of tokens, always terminated by an `EOF` token. + * + * @example + * ```ts + * tokenize('age >= 21 AND name = "Alice"'); + * // [Text("age"), GreaterEquals(">="), Text("21"), And("AND"), + * // Text("name"), Equals("="), String("Alice"), EOF] + * ``` + */ +declare function tokenize(input: string): Token[]; +/** + * Tokenize in tolerant mode: collect errors instead of throwing. + * + * @param input - The raw filter expression string. + * @param options - Must include `tolerant: true`. + * @returns A {@link TokenizeResult} with the token stream and any errors. + */ +declare function tokenize(input: string, options: { + tolerant: true; +}): TokenizeResult; +//#endregion //#region src/parser.d.ts interface NodeBase { span: Span; } +/** + * Resolves to {@link ErrorNode} when `T` is `true`, or `never` when `false`. + * + * Used throughout the CST interfaces so that strict-mode types + * (`T = false`, the default) never include `ErrorNode` in their unions, + * while tolerant-mode types (`T = true`) do. + */ +type MaybeError = T extends true ? ErrorNode : never; +/** @internal Base for {@link FilterNode}; use `FilterNode` in consumer code. */ +interface FilterNodeBase extends NodeBase { + type: "Filter"; + /** The top-level expression, or `null` for empty filters. */ + expression: ExpressionNode | MaybeError | null; +} /** * Root node of the concrete syntax tree. * * Corresponds to the EBNF production `filter = [expression]`. * An empty or whitespace-only input produces a `FilterNode` with * `expression: null`. + * + * In tolerant mode (`FilterNode`), an additional `trailing` field + * holds an {@link ErrorNode} wrapping any tokens after the expression. */ -interface FilterNode extends NodeBase { - type: "Filter"; - /** The top-level expression, or `null` for empty filters. */ - expression: ExpressionNode | null; -} +type FilterNode = FilterNodeBase & (T extends true ? { + trailing: ErrorNode | null; +} : unknown); /** * A sequence of AND-joined sequences. * * Corresponds to `expression = sequence {WS AND WS sequence}`. */ -interface ExpressionNode extends NodeBase { +interface ExpressionNode extends NodeBase { type: "Expression"; /** One or more sequences joined by explicit `AND`. */ - sequences: SequenceNode[]; + sequences: SequenceNode[]; } /** * A run of implicitly AND-joined factors (whitespace-separated). * * Corresponds to `sequence = factor {WS factor}`. */ -interface SequenceNode extends NodeBase { +interface SequenceNode extends NodeBase { type: "Sequence"; /** One or more factors separated by whitespace (implicit AND). */ - factors: FactorNode[]; + factors: FactorNode[]; } /** * A group of OR-joined terms. @@ -346,25 +384,25 @@ interface SequenceNode extends NodeBase { * Corresponds to `factor = term {WS OR WS term}`. * OR binds tighter than AND in AIP-160. */ -interface FactorNode extends NodeBase { +interface FactorNode extends NodeBase { type: "Factor"; /** One or more terms joined by explicit `OR`. */ - terms: TermNode[]; + terms: TermNode[]; } /** * An optionally negated simple expression. * * Corresponds to `term = [NOT WS | "-"] simple`. */ -interface TermNode extends NodeBase { +interface TermNode extends NodeBase { type: "Term"; /** Whether the term is preceded by `NOT` or `-`. */ negated: boolean; /** The inner expression (restriction or composite). */ - simple: SimpleNode; + simple: SimpleNode | MaybeError; } /** A term's inner expression: a {@link RestrictionNode} or a {@link CompositeNode}. */ -type SimpleNode = RestrictionNode | CompositeNode; +type SimpleNode = RestrictionNode | CompositeNode; /** * A field restriction or bare value (global search). * @@ -372,17 +410,24 @@ type SimpleNode = RestrictionNode | CompositeNode; * When `comparator` and `arg` are `null`, this is a bare value * used for global text search. */ -interface RestrictionNode extends NodeBase { +interface RestrictionNode extends NodeBase { type: "Restriction"; /** The left-hand side: a field path or function call. */ - comparable: ComparableNode; + comparable: ComparableNode; /** The comparison operator, or `null` for bare values (global restrictions). */ comparator: ComparatorKind | null; - /** The right-hand side value, or `null` for bare values. */ - arg: ArgNode | null; + /** + * The right-hand side value, or `null` for bare values. + * + * In tolerant mode, insertion-based recovery may synthesize a zero-width + * placeholder {@link MemberNode} (empty `value.token.value`, `span.start === span.end`) + * when the value is missing (e.g. `a = AND ...`). The corresponding error is + * recorded in {@link ParseResult.errors}. + */ + arg: ArgNode | MaybeError | null; } /** The left-hand side of a restriction: a {@link MemberNode} or {@link FunctionCallNode}. */ -type ComparableNode = MemberNode | FunctionCallNode; +type ComparableNode = MemberNode | FunctionCallNode; /** * A field path (e.g. `a.b.c`). * @@ -401,24 +446,31 @@ interface MemberNode extends NodeBase { * Corresponds to `function = name "(" [argList] ")"`. * Only produced when `(` is immediately adjacent to the name (no whitespace). */ -interface FunctionCallNode extends NodeBase { +interface FunctionCallNode extends NodeBase { type: "FunctionCall"; /** The function name segments (e.g. `["math", "abs"]` for `math.abs()`). */ name: ValueNode[]; - /** The function arguments (may be empty). */ - args: ArgNode[]; + /** + * The function arguments (may be empty). + * + * In tolerant mode, insertion-based recovery may synthesize a zero-width + * placeholder {@link MemberNode} (empty `value.token.value`, `span.start === span.end`) + * when an argument is missing (e.g. `fn(a,)`). The corresponding error is + * recorded in {@link ParseResult.errors}. + */ + args: (ArgNode | MaybeError)[]; } /** A function or comparator argument: a {@link ComparableNode} or {@link CompositeNode}. */ -type ArgNode = ComparableNode | CompositeNode; +type ArgNode = ComparableNode | CompositeNode; /** * A parenthesized sub-expression (e.g. `(a OR b)`). * * Corresponds to `composite = "(" expression ")"`. */ -interface CompositeNode extends NodeBase { +interface CompositeNode extends NodeBase { type: "Composite"; /** The enclosed expression. */ - expression: ExpressionNode; + expression: ExpressionNode | MaybeError; } /** * A leaf text or string literal value. @@ -430,12 +482,29 @@ interface ValueNode extends NodeBase { /** The underlying token. */ token: Token; } +/** + * A placeholder node representing invalid syntax that the parser + * recovered from. + * + * Only present in the CST when parsing with `tolerant: true`. + * Contains the original error and any tokens that were skipped + * during recovery. + */ +interface ErrorNode extends NodeBase { + type: "Error"; + /** The error that was recovered from. */ + error: ParserError | LexerError; + /** Tokens that were skipped during recovery (may be empty). */ + skipped: Token[]; + /** Position where the parser expected something. For editor diagnostics. */ + expectedAt: Span; +} /** * Discriminated union of all concrete syntax tree node types. * * Use the `type` field to narrow to a specific node interface. */ -type CSTNode = FilterNode | ExpressionNode | SequenceNode | FactorNode | TermNode | RestrictionNode | CompositeNode | MemberNode | FunctionCallNode | ValueNode; +type CSTNode = FilterNode | ExpressionNode | SequenceNode | FactorNode | TermNode | RestrictionNode | CompositeNode | MemberNode | FunctionCallNode | ValueNode | MaybeError; /** * Options for the {@link parse} function. * @@ -453,6 +522,35 @@ interface ParseOptions { * @default 8192 */ maxLength?: number; + /** + * When `true`, collect errors and return a best-effort CST instead of + * throwing on the first error. The return type changes to {@link ParseResult}. + * @default false + */ + tolerant?: boolean; + /** + * Maximum number of errors to collect before stopping recovery. + * Only meaningful when `tolerant` is `true`. Lexer errors count toward + * this budget. The actual count may slightly exceed this limit when + * errors occur during stack unwinding. + * @default 20 + */ + maxErrors?: number; +} +/** + * Result of parsing with `tolerant: true`. + * + * The CST uses `FilterNode` so that {@link ErrorNode} appears in the + * type unions. Use {@link toCleanTree} to narrow to `FilterNode` (strict) + * before passing to {@link transform}. + */ +interface ParseResult { + /** The CST root. Always returned, even when errors were found. */ + cst: FilterNode; + /** All errors collected during lexing and parsing. Empty on success. */ + errors: (LexerError | ParserError)[]; + /** `true` when no errors were found during lexing or parsing. */ + ok: boolean; } /** * Parse an AIP-160 filter expression into a concrete syntax tree (CST). @@ -471,7 +569,56 @@ interface ParseOptions { * const cst = parse('status = "active" AND age >= 21'); * ``` */ -declare function parse(input: string, options?: ParseOptions): FilterNode; +declare function parse(input: string, options?: ParseOptions & { + tolerant?: false; +}): FilterNode; +/** + * Parse in tolerant mode: collect errors instead of throwing. + * + * Returns a {@link ParseResult} containing the best-effort CST and all + * errors found. The CST may contain {@link ErrorNode}s as placeholders + * for invalid syntax. + * + * @param input - The raw filter expression string. + * @param options - Must include `tolerant: true`. + * @returns A {@link ParseResult} with the CST and any errors. + * + * @example + * ```ts + * const { cst, errors, ok } = parse('a AND AND b', { tolerant: true }); + * if (!ok) { + * for (const error of errors) console.log(error.description); + * } + * ``` + */ +declare function parse(input: string, options: ParseOptions & { + tolerant: true; +}): ParseResult; +/** + * Walk a CST and return `true` if any node is an {@link ErrorNode}. + * + * Useful for checking whether a tolerant parse produced a clean tree + * before passing it to {@link transform}. + */ +declare function hasErrorNodes(node: CSTNode): boolean; +/** + * Narrow a tolerant parse result to a strict `FilterNode` if it contains no errors. + * + * Returns the narrowed `FilterNode` when the result is error-free, + * or `null` if it has any errors (including insertion-recovery placeholders + * that don't appear as {@link ErrorNode}s in the tree). The returned value + * is safe to pass to {@link transform}. + * + * @example + * ```ts + * const result = parse(input, { tolerant: true }); + * const clean = toCleanTree(result); + * if (clean) { + * const ast = transform(clean); + * } + * ``` + */ +declare function toCleanTree(result: ParseResult): FilterNode | null; //#endregion //#region src/transform.d.ts /** @@ -907,4 +1054,4 @@ declare class CompiledFilter { static fromSerialized(data: SerializedFilter, options?: EvaluateOptions): CompiledFilter; } //#endregion -export { type ASTFunctionNode, type ASTMemberNode, type ASTNode, type ASTRestrictionNode, type ASTValueNode, type AndNode, type ArgNode, type AsyncEvaluateOptions, type AsyncFilterOptions, type CSTNode, type ComparableNode, type Comparator, type ComparatorKind, type ComparisonOperator, CompiledFilter, type CompositeNode, DepthLimitError, EmptyExpressionError, EvaluateError, type EvaluateOptions, ExpectedExpressionError, ExpectedIdentifierError, ExpectedValueError, type ExpressionNode, type FactorNode, FilterError, type FilterNode, type FilterOptions, type FunctionCallNode, type GlobalNode, InputLengthError, InvalidFieldTypeError, InvalidFunctionNameError, InvalidNegationError, InvariantError, LexerError, type MemberNode, type NotNode, type OrNode, type ParseOptions, ParserError, type RestrictionNode, type SequenceNode, type SerializedFilter, type SimpleNode, type Span, type TermNode, type Token, TokenKind, UnclosedDelimiterError, UnexpectedCharacterError, UnexpectedTokenError, UnknownFunctionError, UnknownNodeTypeError, UnsupportedVersionError, UnterminatedStringError, type ValueNode, compile, evaluate, evaluateAsync, filter, filterAsync, isComparatorKind, parse, tokenize, transform }; \ No newline at end of file +export { type ASTFunctionNode, type ASTMemberNode, type ASTNode, type ASTRestrictionNode, type ASTValueNode, type AndNode, type ArgNode, type AsyncEvaluateOptions, type AsyncFilterOptions, type CSTNode, type ComparableNode, type Comparator, type ComparatorKind, type ComparisonOperator, CompiledFilter, type CompositeNode, DepthLimitError, EmptyExpressionError, type ErrorNode, EvaluateError, type EvaluateOptions, ExpectedExpressionError, ExpectedIdentifierError, ExpectedValueError, type ExpressionNode, type FactorNode, FilterError, type FilterNode, type FilterOptions, type FunctionCallNode, type GlobalNode, InputLengthError, InvalidFieldTypeError, InvalidFunctionNameError, InvalidNegationError, InvariantError, LexerError, type MaybeError, type MemberNode, type NotNode, type OrNode, type ParseOptions, type ParseResult, ParserError, type RestrictionNode, type SequenceNode, type SerializedFilter, type SimpleNode, type Span, type TermNode, type Token, TokenKind, type TokenizeResult, UnclosedDelimiterError, UnexpectedCharacterError, UnexpectedTokenError, UnknownFunctionError, UnknownNodeTypeError, UnsupportedVersionError, UnterminatedStringError, type ValueNode, compile, evaluate, evaluateAsync, filter, filterAsync, hasErrorNodes, isComparatorKind, parse, toCleanTree, tokenize, transform }; \ No newline at end of file diff --git a/bench/tcn-aip160.bench.ts b/bench/tcn-aip160.bench.ts new file mode 100644 index 0000000..60069a1 --- /dev/null +++ b/bench/tcn-aip160.bench.ts @@ -0,0 +1,110 @@ +import { bench, describe, beforeAll } from "vitest"; +import { AipFilter, parseFilterString } from "@tcn/aip-160"; + +// @tcn/aip-160 is fully async due to lazy grammar compilation. +// We pre-warm the instance so benchmarks measure steady-state performance, +// not one-time grammar loading. +const aip = new AipFilter(); +beforeAll(async () => { + await aip.filter("a = 1", [{ a: 1 }]); +}); + +const EXPR = 'power >= 5 AND status = "contracted"'; +const TARGET = [{ power: 7, status: "contracted", name: "Madoka" }]; + +describe("pipeline stages", () => { + bench("parseFilterString", async () => { + await parseFilterString(EXPR); + }); + + bench("filter (end-to-end, single item)", async () => { + await aip.filter(EXPR, TARGET); + }); +}); + +describe("compile-once vs re-parse", () => { + // @tcn/aip-160 has no compile step - every call re-parses. + bench("filter() x100", async () => { + for (let i = 0; i < 100; i++) { + await aip.filter(EXPR, TARGET); + } + }); +}); + +const SIMPLE_EXPR = "a = 1"; +const SIMPLE_TARGET = [{ a: 1 }]; + +const MEDIUM_EXPR = "a = 1 AND b = 2 AND d > 10"; +const MEDIUM_TARGET = [{ a: 1, b: 2, c: "contracted", d: 20 }]; + +// @tcn/aip-160 doesn't support the full onesixty feature set (no `:` has +// operator, no `NOT wildcard`), so we use the closest equivalent expressions. +const COMPLEX_EXPR = "(a = 1 OR b = 2) AND d > 10"; +const COMPLEX_TARGET = [{ a: 1, b: 3, c: "yes", d: 15, g: "kyubey" }]; + +const NESTED_TARGET = [ + { + a: { b: { c: { d: { e: { f: { name: "madoka" } } } } } }, + x: { y: "other" }, + }, +]; + +describe("expression complexity", () => { + bench("simple: a = 1", async () => { + await aip.filter(SIMPLE_EXPR, SIMPLE_TARGET); + }); + + bench("medium: 3 restrictions", async () => { + await aip.filter(MEDIUM_EXPR, MEDIUM_TARGET); + }); + + bench("complex: OR + nested path", async () => { + await aip.filter(COMPLEX_EXPR, COMPLEX_TARGET); + }); + + bench("global search: bare value on nested object", async () => { + await aip.filter('"madoka"', NESTED_TARGET); + }); +}); + +// Large expression: 50 chained AND restrictions +const LARGE_EXPR = Array.from({ length: 50 }, (_, i) => `f${i} = ${i}`).join(" AND "); +const LARGE_TARGET = [Object.fromEntries(Array.from({ length: 50 }, (_, i) => [`f${i}`, i]))]; + +// Deep nesting: 32 levels of parentheses +const DEEP_EXPR = "(".repeat(32) + "a = 1" + ")".repeat(32); + +// Large target: 1000 keys, filter hits the last one +const LARGE_OBJ_TARGET = [ + Object.fromEntries(Array.from({ length: 1000 }, (_, i) => [`key${i}`, i])), +]; + +describe("stress: large expressions", () => { + bench("parseFilterString (50 restrictions)", async () => { + await parseFilterString(LARGE_EXPR); + }); + + bench("filter (50 restrictions)", async () => { + await aip.filter(LARGE_EXPR, LARGE_TARGET); + }); +}); + +describe("stress: deep nesting", () => { + bench("parseFilterString (32 levels)", async () => { + await parseFilterString(DEEP_EXPR); + }); + + bench("filter (32 levels)", async () => { + await aip.filter(DEEP_EXPR, [{ a: 1 }]); + }); +}); + +describe("stress: large targets", () => { + bench("filter last key in 1000-key object", async () => { + await aip.filter("key999 = 999", LARGE_OBJ_TARGET); + }); + + bench("global search miss on 1000-key object", async () => { + await aip.filter('"nonexistent"', LARGE_OBJ_TARGET); + }); +}); diff --git a/package.json b/package.json index fe9c88d..fd9cb19 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "prepublishOnly": "pnpm run build" }, "devDependencies": { + "@tcn/aip-160": "^1.2.5", "@types/node": "^25.5.0", "@typescript/native-preview": "7.0.0-dev.20260315.1", "bumpp": "^11.0.0", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7a11825..fcbcc3e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,6 +8,9 @@ importers: .: devDependencies: + '@tcn/aip-160': + specifier: ^1.2.5 + version: 1.2.5 '@types/node': specifier: ^25.5.0 version: 25.5.0 @@ -628,6 +631,9 @@ packages: '@standard-schema/spec@1.1.0': resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==} + '@tcn/aip-160@1.2.5': + resolution: {integrity: sha512-QEMVY6fFaiwxnNXo2nM54x2p5OeTWT6I5BOt10/QZGRToghV1CMbb7MTdTlaIPF8Ol7gJEdcnQzOpOmGLXOQuA==} + '@tybys/wasm-util@0.10.1': resolution: {integrity: sha512-9tTaPJLSiejZKx+Bmog4uSubteqTvFrVrURwkmHixBo0G4seD0zUxp98E1DzUBJxLQ3NPwXrGKDiVjwx/DpPsg==} @@ -745,6 +751,9 @@ packages: resolution: {integrity: sha512-NUPRluOfOiTKBKvWPtSD4PhFvWCqOi0BGStNWs57X9js7XGTprSmFoz5F0tWhR4WPjNeR9jXqdC7/UpSJTnlRg==} engines: {node: '>=18'} + clarity-pattern-parser@11.5.4: + resolution: {integrity: sha512-utqGefhOq85w7ihCO7yZf72OnGsXnYTSA4dhNJ3k9IsWQqVThiEkW8tYbtjUEK92dXKzL4E81uJ+Wp8ge4nJow==} + convert-source-map@2.0.0: resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} @@ -1489,6 +1498,10 @@ snapshots: '@standard-schema/spec@1.1.0': {} + '@tcn/aip-160@1.2.5': + dependencies: + clarity-pattern-parser: 11.5.4 + '@tybys/wasm-util@0.10.1': dependencies: tslib: 2.8.1 @@ -1611,6 +1624,8 @@ snapshots: chai@6.2.2: {} + clarity-pattern-parser@11.5.4: {} + convert-source-map@2.0.0: {} defu@6.1.4: {} diff --git a/src/errors.ts b/src/errors.ts index 4c63415..0ca9dd6 100644 --- a/src/errors.ts +++ b/src/errors.ts @@ -1,9 +1,13 @@ import type { Span, Token } from "./types"; import { TokenKind, isComparatorKind } from "./types"; +/** + * Thrown when an internal assumption is violated. Indicates a bug in onesixty, + * not invalid user input. If you encounter this error, please report it. + */ export class InvariantError extends Error { constructor(message: string) { - super(`onesixty internal error: ${message}. This is a bug. Please report it.`); + super(`onesixty internal error: ${message}. This is a bug, please report it.`); } } diff --git a/src/evaluate.ts b/src/evaluate.ts index 32be3a2..d4c4e6f 100644 --- a/src/evaluate.ts +++ b/src/evaluate.ts @@ -150,7 +150,6 @@ function evaluateRestriction( const fieldValue = resolve(target, fieldPath); - // Unset field -> skip (non-match), even for != if (fieldValue == null) return false; return compare(fieldValue, node.comparator, argValue, wildcardEnabled, options); diff --git a/src/index.ts b/src/index.ts index d60c608..edda08c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,5 @@ export { type Span, type Token, TokenKind, type ComparatorKind, isComparatorKind } from "./types"; -export { tokenize } from "./lexer"; +export { tokenize, type TokenizeResult } from "./lexer"; export { FilterError, LexerError, @@ -25,11 +25,16 @@ export { } from "./errors"; export { parse, + hasErrorNodes, + toCleanTree, + type MaybeError, type ParseOptions, + type ParseResult, type ArgNode, type CSTNode, type ComparableNode, type CompositeNode, + type ErrorNode, type ExpressionNode, type FactorNode, type FilterNode, diff --git a/src/lexer.ts b/src/lexer.ts index e6aa3fa..8794e13 100644 --- a/src/lexer.ts +++ b/src/lexer.ts @@ -1,5 +1,6 @@ import type { Token } from "./types"; import { TokenKind } from "./types"; +import type { LexerError } from "./errors"; import { UnexpectedCharacterError, UnterminatedStringError } from "./errors"; // Pre-computed: 1 = valid text character, 0 = delimiter/whitespace @@ -36,8 +37,12 @@ const CH_BACKSLASH = 0x5c; // \ class Lexer { private pos = 0; + public readonly errors: LexerError[] = []; - public constructor(private readonly input: string) {} + public constructor( + private readonly input: string, + private readonly tolerant: boolean, + ) {} public tokenize(): Token[] { const tokens: Token[] = []; @@ -75,11 +80,15 @@ class Lexer { tokens.push(this.emit(TokenKind.NotEquals, "!=")); continue; } - throw new UnexpectedCharacterError( + const bangError = new UnexpectedCharacterError( "!", { start: this.pos, end: this.pos + 1 }, this.input, ); + if (!this.tolerant) throw bangError; + this.errors.push(bangError); + this.pos++; + continue; } case CH_EQ: tokens.push(this.emit(TokenKind.Equals, "=")); @@ -137,7 +146,12 @@ class Lexer { } if (scanPos >= len) { - throw new UnterminatedStringError(quote, { start, end: len }, this.input); + const strError = new UnterminatedStringError(quote, { start, end: len }, this.input); + if (!this.tolerant) throw strError; + this.errors.push(strError); + const value = this.input.slice(this.pos, len); + this.pos = len; + return { kind: TokenKind.String, value, start, end: len }; } const value = this.input.slice(this.pos, scanPos); this.pos = scanPos + 1; @@ -174,7 +188,10 @@ class Lexer { this.pos++; } if (this.pos >= len) { - throw new UnterminatedStringError(quote, { start, end: len }, this.input); + const escError = new UnterminatedStringError(quote, { start, end: len }, this.input); + if (!this.tolerant) throw escError; + this.errors.push(escError); + return { kind: TokenKind.String, value, start, end: len }; } this.pos++; return { kind: TokenKind.String, value, start, end: this.pos }; @@ -199,6 +216,16 @@ class Lexer { } } +/** + * Result of tokenizing with `tolerant: true`. + */ +export interface TokenizeResult { + /** Token stream, always terminated by an `EOF` token. */ + tokens: Token[]; + /** Lexer errors collected during tokenization. Empty on success. */ + errors: LexerError[]; +} + /** * Tokenize an AIP-160 filter expression into a stream of tokens. * @@ -212,6 +239,20 @@ class Lexer { * // Text("name"), Equals("="), String("Alice"), EOF] * ``` */ -export function tokenize(input: string): Token[] { - return new Lexer(input).tokenize(); +export function tokenize(input: string): Token[]; +/** + * Tokenize in tolerant mode: collect errors instead of throwing. + * + * @param input - The raw filter expression string. + * @param options - Must include `tolerant: true`. + * @returns A {@link TokenizeResult} with the token stream and any errors. + */ +export function tokenize(input: string, options: { tolerant: true }): TokenizeResult; +export function tokenize(input: string, options?: { tolerant: boolean }): Token[] | TokenizeResult { + const lexer = new Lexer(input, options?.tolerant === true); + const tokens = lexer.tokenize(); + if (options?.tolerant === true) { + return { tokens, errors: lexer.errors }; + } + return tokens; } diff --git a/src/parser.ts b/src/parser.ts index 8991ee2..95ad0f3 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,6 +1,7 @@ import type { ComparatorKind, Span, Token } from "./types"; import { TokenKind, isComparatorKind } from "./types"; import { tokenize } from "./lexer"; +import type { LexerError, ParserError } from "./errors"; import { DepthLimitError, EmptyExpressionError, @@ -19,28 +20,46 @@ interface NodeBase { span: Span; } +/** + * Resolves to {@link ErrorNode} when `T` is `true`, or `never` when `false`. + * + * Used throughout the CST interfaces so that strict-mode types + * (`T = false`, the default) never include `ErrorNode` in their unions, + * while tolerant-mode types (`T = true`) do. + */ +export type MaybeError = T extends true ? ErrorNode : never; + +/** @internal Base for {@link FilterNode}; use `FilterNode` in consumer code. */ +interface FilterNodeBase extends NodeBase { + type: "Filter"; + /** The top-level expression, or `null` for empty filters. */ + expression: ExpressionNode | MaybeError | null; +} + /** * Root node of the concrete syntax tree. * * Corresponds to the EBNF production `filter = [expression]`. * An empty or whitespace-only input produces a `FilterNode` with * `expression: null`. + * + * In tolerant mode (`FilterNode`), an additional `trailing` field + * holds an {@link ErrorNode} wrapping any tokens after the expression. */ -export interface FilterNode extends NodeBase { - type: "Filter"; - /** The top-level expression, or `null` for empty filters. */ - expression: ExpressionNode | null; -} +export type FilterNode = FilterNodeBase & + (T extends true + ? { /** Tokens after the expression that could not be parsed. */ trailing: ErrorNode | null } + : unknown); /** * A sequence of AND-joined sequences. * * Corresponds to `expression = sequence {WS AND WS sequence}`. */ -export interface ExpressionNode extends NodeBase { +export interface ExpressionNode extends NodeBase { type: "Expression"; /** One or more sequences joined by explicit `AND`. */ - sequences: SequenceNode[]; + sequences: SequenceNode[]; } /** @@ -48,10 +67,10 @@ export interface ExpressionNode extends NodeBase { * * Corresponds to `sequence = factor {WS factor}`. */ -export interface SequenceNode extends NodeBase { +export interface SequenceNode extends NodeBase { type: "Sequence"; /** One or more factors separated by whitespace (implicit AND). */ - factors: FactorNode[]; + factors: FactorNode[]; } /** @@ -60,10 +79,10 @@ export interface SequenceNode extends NodeBase { * Corresponds to `factor = term {WS OR WS term}`. * OR binds tighter than AND in AIP-160. */ -export interface FactorNode extends NodeBase { +export interface FactorNode extends NodeBase { type: "Factor"; /** One or more terms joined by explicit `OR`. */ - terms: TermNode[]; + terms: TermNode[]; } /** @@ -71,16 +90,16 @@ export interface FactorNode extends NodeBase { * * Corresponds to `term = [NOT WS | "-"] simple`. */ -export interface TermNode extends NodeBase { +export interface TermNode extends NodeBase { type: "Term"; /** Whether the term is preceded by `NOT` or `-`. */ negated: boolean; /** The inner expression (restriction or composite). */ - simple: SimpleNode; + simple: SimpleNode | MaybeError; } /** A term's inner expression: a {@link RestrictionNode} or a {@link CompositeNode}. */ -export type SimpleNode = RestrictionNode | CompositeNode; +export type SimpleNode = RestrictionNode | CompositeNode; /** * A field restriction or bare value (global search). @@ -89,18 +108,25 @@ export type SimpleNode = RestrictionNode | CompositeNode; * When `comparator` and `arg` are `null`, this is a bare value * used for global text search. */ -export interface RestrictionNode extends NodeBase { +export interface RestrictionNode extends NodeBase { type: "Restriction"; /** The left-hand side: a field path or function call. */ - comparable: ComparableNode; + comparable: ComparableNode; /** The comparison operator, or `null` for bare values (global restrictions). */ comparator: ComparatorKind | null; - /** The right-hand side value, or `null` for bare values. */ - arg: ArgNode | null; + /** + * The right-hand side value, or `null` for bare values. + * + * In tolerant mode, insertion-based recovery may synthesize a zero-width + * placeholder {@link MemberNode} (empty `value.token.value`, `span.start === span.end`) + * when the value is missing (e.g. `a = AND ...`). The corresponding error is + * recorded in {@link ParseResult.errors}. + */ + arg: ArgNode | MaybeError | null; } /** The left-hand side of a restriction: a {@link MemberNode} or {@link FunctionCallNode}. */ -export type ComparableNode = MemberNode | FunctionCallNode; +export type ComparableNode = MemberNode | FunctionCallNode; /** * A field path (e.g. `a.b.c`). @@ -121,26 +147,33 @@ export interface MemberNode extends NodeBase { * Corresponds to `function = name "(" [argList] ")"`. * Only produced when `(` is immediately adjacent to the name (no whitespace). */ -export interface FunctionCallNode extends NodeBase { +export interface FunctionCallNode extends NodeBase { type: "FunctionCall"; /** The function name segments (e.g. `["math", "abs"]` for `math.abs()`). */ name: ValueNode[]; - /** The function arguments (may be empty). */ - args: ArgNode[]; + /** + * The function arguments (may be empty). + * + * In tolerant mode, insertion-based recovery may synthesize a zero-width + * placeholder {@link MemberNode} (empty `value.token.value`, `span.start === span.end`) + * when an argument is missing (e.g. `fn(a,)`). The corresponding error is + * recorded in {@link ParseResult.errors}. + */ + args: (ArgNode | MaybeError)[]; } /** A function or comparator argument: a {@link ComparableNode} or {@link CompositeNode}. */ -export type ArgNode = ComparableNode | CompositeNode; +export type ArgNode = ComparableNode | CompositeNode; /** * A parenthesized sub-expression (e.g. `(a OR b)`). * * Corresponds to `composite = "(" expression ")"`. */ -export interface CompositeNode extends NodeBase { +export interface CompositeNode extends NodeBase { type: "Composite"; /** The enclosed expression. */ - expression: ExpressionNode; + expression: ExpressionNode | MaybeError; } /** @@ -154,30 +187,74 @@ export interface ValueNode extends NodeBase { token: Token; } +/** + * A placeholder node representing invalid syntax that the parser + * recovered from. + * + * Only present in the CST when parsing with `tolerant: true`. + * Contains the original error and any tokens that were skipped + * during recovery. + */ +export interface ErrorNode extends NodeBase { + type: "Error"; + /** The error that was recovered from. */ + error: ParserError | LexerError; + /** Tokens that were skipped during recovery (may be empty). */ + skipped: Token[]; + /** Position where the parser expected something. For editor diagnostics. */ + expectedAt: Span; +} + /** * Discriminated union of all concrete syntax tree node types. * * Use the `type` field to narrow to a specific node interface. */ -export type CSTNode = - | FilterNode - | ExpressionNode - | SequenceNode - | FactorNode - | TermNode - | RestrictionNode - | CompositeNode +export type CSTNode = + | FilterNode + | ExpressionNode + | SequenceNode + | FactorNode + | TermNode + | RestrictionNode + | CompositeNode | MemberNode - | FunctionCallNode - | ValueNode; + | FunctionCallNode + | ValueNode + | MaybeError; export type { ComparatorKind }; const KEYWORDS = new Set([TokenKind.And, TokenKind.Or, TokenKind.Not]); +// Synchronization sets for error recovery. +// SYNC_EXPRESSION includes comparators so partial restrictions like `= 1` aren't +// entirely skipped when the comparable fails - this preserves more structure for +// syntax highlighting. +const SYNC_EXPRESSION = new Set([ + TokenKind.And, + TokenKind.Or, + TokenKind.RParen, + TokenKind.EOF, + TokenKind.Equals, + TokenKind.NotEquals, + TokenKind.LessThan, + TokenKind.LessEquals, + TokenKind.GreaterThan, + TokenKind.GreaterEquals, + TokenKind.Has, +]); +const SYNC_ARG = new Set([ + TokenKind.Comma, + TokenKind.RParen, + TokenKind.And, + TokenKind.Or, + TokenKind.EOF, +]); + // Precedence (loosest -> tightest): // -// Expression = Sequence { AND Sequence } <- explicit AND (loosest) +// Expression = Sequence { AND Sequence } <- explicit AND (loosest) // Sequence = Factor { Factor } <- implicit AND (juxtaposition) // Factor = Term { OR Term } <- OR // Term = [NOT | -] Simple <- negation (tightest) @@ -192,35 +269,125 @@ const KEYWORDS = new Set([TokenKind.And, TokenKind.Or, TokenKind.Not]); class Parser { private pos = 0; private depth = 0; + private hitErrorLimit = false; + public readonly errors: (ParserError | LexerError)[] = []; + public trailing: ErrorNode | null = null; public constructor( private readonly tokens: Token[], private readonly source: string, private readonly maxDepth: number, - ) {} + private readonly tolerant: boolean, + private readonly maxErrors: number, + ) { + const last = tokens[tokens.length - 1]; + invariant(last !== undefined && last.kind === TokenKind.EOF, "Token stream must end with EOF"); + } + + private shouldRecover(error: ParserError): true { + if (!this.tolerant) throw error; + this.errors.push(error); + if (this.errors.length >= this.maxErrors) { + this.hitErrorLimit = true; + } + return true; + } - public parse(): FilterNode { + private recover(error: ParserError, syncSet: Set): ErrorNode { + this.shouldRecover(error); + const start = this.peek(); + const skipped: Token[] = []; + while (!this.at(TokenKind.EOF) && !syncSet.has(this.peek().kind)) { + skipped.push(this.advance()); + } + const last = skipped[skipped.length - 1]; + const expectedAt: Span = { start: start.start, end: start.end }; + return { + type: "Error", + error, + skipped, + expectedAt, + span: { start: start.start, end: last ? last.end : start.start }, + }; + } + + private recoverEmpty(error: ParserError, at: Token): ErrorNode { + this.shouldRecover(error); + const expectedAt: Span = { start: at.start, end: at.start }; + return { + type: "Error", + error, + skipped: [], + expectedAt, + span: { start: at.start, end: at.start }, + }; + } + + private synthesizeMember(position: number): MemberNode { + const placeholder: ValueNode = { + type: "Value", + token: { kind: TokenKind.Text, value: "", start: position, end: position }, + span: { start: position, end: position }, + }; + return { type: "Member", value: placeholder, fields: [], span: placeholder.span }; + } + + public parse(): FilterNodeBase { if (this.at(TokenKind.EOF)) { const span = this.peek(); return { type: "Filter", expression: null, span: { start: span.start, end: span.end } }; } const expression = this.parseExpression(); - if (!this.at(TokenKind.EOF)) { - throw new UnexpectedTokenError(this.peek(), this.source); + if (!this.at(TokenKind.EOF) && !this.hitErrorLimit) { + const error = new UnexpectedTokenError(this.peek(), this.source); + this.shouldRecover(error); + const start = this.peek(); + const skipped: Token[] = []; + while (!this.at(TokenKind.EOF)) { + skipped.push(this.advance()); + } + const last = skipped[skipped.length - 1]; + this.trailing = { + type: "Error", + error, + skipped, + expectedAt: { start: start.start, end: start.end }, + span: { start: start.start, end: last ? last.end : start.start }, + }; } return { type: "Filter", expression, span: expression.span }; } // Expression: sequence {AND sequence} - private parseExpression(): ExpressionNode { + private parseExpression(): ExpressionNode { const first = this.parseSequence(); - let last = first; - const sequences: SequenceNode[] = [first]; + let last: SequenceNode = first; + const sequences: SequenceNode[] = [first]; - while (this.at(TokenKind.And)) { + while (this.at(TokenKind.And) && !this.hitErrorLimit) { const andToken = this.advance(); - this.expectExpressionAfter(andToken); + if (!this.canStartExpression()) { + const errorNode = this.recoverEmpty( + new ExpectedExpressionError(andToken, this.peek(), this.source), + this.peek(), + ); + const term: TermNode = { + type: "Term", + negated: false, + simple: errorNode, + span: errorNode.span, + }; + const factor: FactorNode = { type: "Factor", terms: [term], span: errorNode.span }; + const seq: SequenceNode = { + type: "Sequence", + factors: [factor], + span: errorNode.span, + }; + last = seq; + sequences.push(last); + continue; + } last = this.parseSequence(); sequences.push(last); } @@ -234,12 +401,12 @@ class Parser { // Sequence: factor {factor} - private parseSequence(): SequenceNode { + private parseSequence(): SequenceNode { const first = this.parseFactor(); - let last = first; - const factors: FactorNode[] = [first]; + let last: FactorNode = first; + const factors: FactorNode[] = [first]; - while (this.canStartFactor()) { + while (this.canStartFactor() && !this.hitErrorLimit) { last = this.parseFactor(); factors.push(last); } @@ -253,14 +420,28 @@ class Parser { // Factor: term {OR term} - private parseFactor(): FactorNode { + private parseFactor(): FactorNode { const first = this.parseTerm(); - let last = first; - const terms: TermNode[] = [first]; + let last: TermNode = first; + const terms: TermNode[] = [first]; - while (this.at(TokenKind.Or)) { + while (this.at(TokenKind.Or) && !this.hitErrorLimit) { const orToken = this.advance(); - this.expectExpressionAfter(orToken); + if (!this.canStartExpression()) { + const errorNode = this.recoverEmpty( + new ExpectedExpressionError(orToken, this.peek(), this.source), + this.peek(), + ); + const term: TermNode = { + type: "Term", + negated: false, + simple: errorNode, + span: errorNode.span, + }; + last = term; + terms.push(last); + continue; + } last = this.parseTerm(); terms.push(last); } @@ -274,12 +455,21 @@ class Parser { // Term: [NOT | -] simple - private parseTerm(): TermNode { + private parseTerm(): TermNode { const kind = this.peek().kind; if (kind === TokenKind.Not || kind === TokenKind.Minus) { const negToken = this.advance(); if (!this.canStartSimple()) { - throw new ExpectedExpressionError(negToken, this.peek(), this.source); + const errorNode = this.recoverEmpty( + new ExpectedExpressionError(negToken, this.peek(), this.source), + this.peek(), + ); + return { + type: "Term", + negated: true, + simple: errorNode, + span: { start: negToken.start, end: negToken.end }, + }; } const simple = this.parseSimple(); return { type: "Term", negated: true, simple, span: this.spanFromTo(negToken, simple.span) }; @@ -291,7 +481,7 @@ class Parser { // Simple: restriction | composite - private parseSimple(): SimpleNode { + private parseSimple(): SimpleNode | ErrorNode { if (this.at(TokenKind.LParen)) { return this.parseComposite(); } @@ -300,9 +490,9 @@ class Parser { // Composite: ( expression ) - private parseComposite(): CompositeNode { + private parseComposite(): CompositeNode | ErrorNode { if (this.depth + 1 > this.maxDepth) { - throw new DepthLimitError(this.maxDepth, this.peek(), this.source); + return this.recoverDepthLimit(new DepthLimitError(this.maxDepth, this.peek(), this.source)); } this.depth++; @@ -310,13 +500,35 @@ class Parser { if (this.at(TokenKind.RParen)) { this.depth--; - throw new EmptyExpressionError(this.peek(), this.source); + const error = new EmptyExpressionError(this.peek(), this.source); + this.shouldRecover(error); + const rparen = this.advance(); + const errorNode: ErrorNode = { + type: "Error", + error, + skipped: [], + expectedAt: { start: rparen.start, end: rparen.start }, + span: { start: rparen.start, end: rparen.start }, + }; + return { + type: "Composite", + expression: errorNode, + span: { start: lparen.start, end: rparen.end }, + }; } const expression = this.parseExpression(); if (!this.at(TokenKind.RParen)) { - throw new UnclosedDelimiterError("parenthesis", this.peek(), this.source, lparen.start); + this.shouldRecover( + new UnclosedDelimiterError("parenthesis", this.peek(), this.source, lparen.start), + ); + this.depth--; + return { + type: "Composite", + expression, + span: { start: lparen.start, end: expression.span.end }, + }; } const rparen = this.advance(); @@ -324,10 +536,33 @@ class Parser { return { type: "Composite", expression, span: { start: lparen.start, end: rparen.end } }; } + private recoverDepthLimit(error: DepthLimitError): ErrorNode { + this.shouldRecover(error); + const start = this.peek(); + const skipped: Token[] = []; + let nesting = 0; + while (!this.at(TokenKind.EOF)) { + if (this.peek().kind === TokenKind.RParen && nesting === 0) break; + const token = this.advance(); + skipped.push(token); + if (token.kind === TokenKind.LParen) nesting++; + if (token.kind === TokenKind.RParen) nesting--; + } + const last = skipped[skipped.length - 1]; + return { + type: "Error", + error, + skipped, + expectedAt: { start: start.start, end: start.end }, + span: { start: start.start, end: last ? last.end : start.start }, + }; + } + // Restriction: comparable [comparator arg] - private parseRestriction(): RestrictionNode { + private parseRestriction(): RestrictionNode | ErrorNode { const comparable = this.parseComparable(); + if (comparable.type === "Error") return comparable; const comparator = this.comparatorKind(); if (comparator != null) { @@ -347,9 +582,13 @@ class Parser { // Comparable: member | function - private parseComparable(): ComparableNode { + private parseComparable(): ComparableNode | ErrorNode { const head = this.parseValue(); + if (head.type === "Error") return head; + return this.parseComparableRest(head); + } + private parseComparableRest(head: ValueNode): ComparableNode { if (this.peek().kind !== TokenKind.Dot) { const next = this.peek(); if (next.kind === TokenKind.LParen && next.start === head.span.end) { @@ -359,19 +598,31 @@ class Parser { } const fields: ValueNode[] = []; - while (true) { + let lastErrorNode: ErrorNode | null = null; + while (!this.hitErrorLimit) { const dot = this.eat(TokenKind.Dot); if (!dot) break; - fields.push(this.parseField(dot)); + const field = this.parseField(dot); + if (field.type === "Error") { + lastErrorNode = field; + break; + } + fields.push(field); + } + + if (fields.length === 0) { + return { type: "Member", value: head, fields: [], span: head.span }; } const lastField = fields[fields.length - 1]; invariant(lastField !== undefined, "Expected at least one field after dot"); - const lparen = this.peek(); - if (lparen.kind === TokenKind.LParen) { - if (lparen.start === lastField.span.end) { - return this.parseFunctionCall(head, fields); + if (lastErrorNode === null) { + const lparen = this.peek(); + if (lparen.kind === TokenKind.LParen) { + if (lparen.start === lastField.span.end) { + return this.parseFunctionCall(head, fields); + } } } @@ -381,21 +632,30 @@ class Parser { // FunctionCall: name "(" [argList] ")" - private parseFunctionCall(head: ValueNode, nameParts: ValueNode[]): FunctionCallNode { + private parseFunctionCall(head: ValueNode, nameParts: ValueNode[]): FunctionCallNode { // EBNF: function uses `name` not `field`: name : TEXT | keyword (no STRING) for (const part of [head, ...nameParts]) { if (part.token.kind === TokenKind.String) { - throw new InvalidFunctionNameError(part.token, this.source); + this.shouldRecover(new InvalidFunctionNameError(part.token, this.source)); + break; } } const lparen = this.advance(); - let args: ArgNode[] = []; + let args: (ArgNode | ErrorNode)[] = []; if (!this.at(TokenKind.RParen)) { args = this.parseArgList(lparen); } if (!this.at(TokenKind.RParen)) { - throw new UnclosedDelimiterError("functionCall", this.peek(), this.source); + this.shouldRecover(new UnclosedDelimiterError("functionCall", this.peek(), this.source)); + const lastArg = args[args.length - 1]; + const endPos = lastArg ? lastArg.span.end : lparen.end; + return { + type: "FunctionCall", + name: [head, ...nameParts], + args, + span: { start: head.span.start, end: endPos }, + }; } const rparen = this.advance(); @@ -409,7 +669,7 @@ class Parser { // Arg: comparable | composite - private parseArg(after: Token): ArgNode { + private parseArg(after: Token): ArgNode | ErrorNode { switch (this.peek().kind) { case TokenKind.LParen: return this.parseComposite(); @@ -418,17 +678,19 @@ class Parser { return this.parseComparable(); case TokenKind.Minus: return this.parseNegativeValue(); - default: - throw new ExpectedValueError(after, this.peek(), this.source); + default: { + this.shouldRecover(new ExpectedValueError(after, this.peek(), this.source)); + return this.synthesizeMember(after.end); + } } } // ArgList: arg {"," arg} - private parseArgList(lparen: Token): ArgNode[] { - const args: ArgNode[] = [this.parseArg(lparen)]; + private parseArgList(lparen: Token): (ArgNode | ErrorNode)[] { + const args: (ArgNode | ErrorNode)[] = [this.parseArg(lparen)]; - while (true) { + while (!this.hitErrorLimit) { const comma = this.eat(TokenKind.Comma); if (!comma) break; if ( @@ -437,7 +699,9 @@ class Parser { !this.at(TokenKind.LParen) && !this.at(TokenKind.Minus) ) { - throw new ExpectedValueError(comma, this.peek(), this.source); + this.shouldRecover(new ExpectedValueError(comma, this.peek(), this.source)); + args.push(this.synthesizeMember(comma.end)); + continue; } args.push(this.parseArg(comma)); } @@ -447,14 +711,23 @@ class Parser { // NegativeValue: "-" comparable - private parseNegativeValue(): MemberNode { + private parseNegativeValue(): MemberNode | ErrorNode { const minusToken = this.advance(); if (!this.at(TokenKind.Text) && !this.at(TokenKind.String)) { - throw new ExpectedValueError(minusToken, this.peek(), this.source); + return this.recover(new ExpectedValueError(minusToken, this.peek(), this.source), SYNC_ARG); } const comparable = this.parseComparable(); + if (comparable.type === "Error") return comparable; if (comparable.type !== "Member") { - throw new InvalidNegationError("function", minusToken, this.source); + const error = new InvalidNegationError("function", minusToken, this.source); + this.shouldRecover(error); + return { + type: "Error", + error, + skipped: [minusToken], + expectedAt: { start: minusToken.start, end: minusToken.end }, + span: { start: minusToken.start, end: comparable.span.end }, + }; } // For dotted paths, only allow if the result forms a valid number (e.g. -3.14) if (comparable.fields.length > 0) { @@ -462,7 +735,7 @@ class Parser { "-" + [comparable.value.token.value, ...comparable.fields.map((f) => f.token.value)].join("."); if (Number.isNaN(Number(joined))) { - throw new InvalidNegationError("fieldPath", minusToken, this.source); + this.shouldRecover(new InvalidNegationError("fieldPath", minusToken, this.source)); } } const oldToken = comparable.value.token; @@ -487,24 +760,24 @@ class Parser { // Value: TEXT | STRING - private parseValue(): ValueNode { + private parseValue(): ValueNode | ErrorNode { if (this.at(TokenKind.Text) || this.at(TokenKind.String)) { const token = this.advance(); return { type: "Value", token, span: { start: token.start, end: token.end } }; } - throw new ExpectedIdentifierError(this.peek(), this.source); + return this.recover(new ExpectedIdentifierError(this.peek(), this.source), SYNC_EXPRESSION); } // Field: value | keyword (after a dot) - private parseField(dot: Token): ValueNode { + private parseField(dot: Token): ValueNode | ErrorNode { if (this.at(TokenKind.Text) || this.at(TokenKind.String) || KEYWORDS.has(this.peek().kind)) { const token = this.advance(); return { type: "Value", token, span: { start: token.start, end: token.end } }; } - throw new ExpectedValueError(dot, this.peek(), this.source); + return this.recover(new ExpectedValueError(dot, this.peek(), this.source), SYNC_EXPRESSION); } private peek(): Token { @@ -532,9 +805,8 @@ class Parser { return null; } - private expectExpressionAfter(operator: Token): void { - if (this.canStartSimple() || this.at(TokenKind.Not) || this.at(TokenKind.Minus)) return; - throw new ExpectedExpressionError(operator, this.peek(), this.source); + private canStartExpression(): boolean { + return this.canStartSimple() || this.at(TokenKind.Not) || this.at(TokenKind.Minus); } private canStartSimple(): boolean { @@ -581,6 +853,38 @@ export interface ParseOptions { * @default 8192 */ maxLength?: number; + + /** + * When `true`, collect errors and return a best-effort CST instead of + * throwing on the first error. The return type changes to {@link ParseResult}. + * @default false + */ + tolerant?: boolean; + + /** + * Maximum number of errors to collect before stopping recovery. + * Only meaningful when `tolerant` is `true`. Lexer errors count toward + * this budget. The actual count may slightly exceed this limit when + * errors occur during stack unwinding. + * @default 20 + */ + maxErrors?: number; +} + +/** + * Result of parsing with `tolerant: true`. + * + * The CST uses `FilterNode` so that {@link ErrorNode} appears in the + * type unions. Use {@link toCleanTree} to narrow to `FilterNode` (strict) + * before passing to {@link transform}. + */ +export interface ParseResult { + /** The CST root. Always returned, even when errors were found. */ + cst: FilterNode; + /** All errors collected during lexing and parsing. Empty on success. */ + errors: (LexerError | ParserError)[]; + /** `true` when no errors were found during lexing or parsing. */ + ok: boolean; } /** @@ -600,11 +904,133 @@ export interface ParseOptions { * const cst = parse('status = "active" AND age >= 21'); * ``` */ -export function parse(input: string, options?: ParseOptions): FilterNode { +export function parse(input: string, options?: ParseOptions & { tolerant?: false }): FilterNode; +/** + * Parse in tolerant mode: collect errors instead of throwing. + * + * Returns a {@link ParseResult} containing the best-effort CST and all + * errors found. The CST may contain {@link ErrorNode}s as placeholders + * for invalid syntax. + * + * @param input - The raw filter expression string. + * @param options - Must include `tolerant: true`. + * @returns A {@link ParseResult} with the CST and any errors. + * + * @example + * ```ts + * const { cst, errors, ok } = parse('a AND AND b', { tolerant: true }); + * if (!ok) { + * for (const error of errors) console.log(error.description); + * } + * ``` + */ +export function parse(input: string, options: ParseOptions & { tolerant: true }): ParseResult; +export function parse(input: string, options?: ParseOptions): FilterNode | ParseResult { const maxLength = options?.maxLength ?? 8192; + const tolerant = options?.tolerant === true; + const maxErrors = options?.maxErrors ?? 20; + if (input.length > maxLength) { - throw new InputLengthError(maxLength, { start: maxLength, end: input.length }, input); + const error = new InputLengthError(maxLength, { start: maxLength, end: input.length }, input); + if (!tolerant) throw error; + return { + cst: { + type: "Filter" as const, + expression: null, + trailing: null, + span: { start: 0, end: input.length }, + }, + errors: [error], + ok: false, + }; + } + + let tokens: Token[]; + let lexerErrors: LexerError[] = []; + + if (tolerant) { + const result = tokenize(input, { tolerant: true }); + tokens = result.tokens; + lexerErrors = result.errors; + } else { + tokens = tokenize(input); + } + + const parser = new Parser(tokens, input, options?.maxDepth ?? 64, tolerant, maxErrors); + for (const e of lexerErrors) { + parser.errors.push(e); + } + + const base = parser.parse(); + + if (tolerant) { + const cst: FilterNode = { ...base, trailing: parser.trailing }; + return { + cst, + errors: parser.errors, + ok: parser.errors.length === 0, + }; + } + + // In strict mode the parser throws on any error, so no ErrorNodes were + // ever constructed. The cast is safe: FilterNodeBase with no ErrorNodes + // is structurally identical to FilterNode. + return base as unknown as FilterNode; +} + +/** + * Walk a CST and return `true` if any node is an {@link ErrorNode}. + * + * Useful for checking whether a tolerant parse produced a clean tree + * before passing it to {@link transform}. + */ +export function hasErrorNodes(node: CSTNode): boolean { + if (node.type === "Error") return true; + switch (node.type) { + case "Filter": + return ( + (node.expression !== null && hasErrorNodes(node.expression)) || + (node.trailing !== undefined && node.trailing !== null) + ); + case "Expression": + return node.sequences.some(hasErrorNodes); + case "Sequence": + return node.factors.some(hasErrorNodes); + case "Factor": + return node.terms.some(hasErrorNodes); + case "Term": + return hasErrorNodes(node.simple); + case "Restriction": + return hasErrorNodes(node.comparable) || (node.arg !== null && hasErrorNodes(node.arg)); + case "Composite": + return hasErrorNodes(node.expression); + case "Member": + return false; + case "FunctionCall": + return node.args.some(hasErrorNodes); + case "Value": + return false; } - const tokens = tokenize(input); - return new Parser(tokens, input, options?.maxDepth ?? 64).parse(); +} + +/** + * Narrow a tolerant parse result to a strict `FilterNode` if it contains no errors. + * + * Returns the narrowed `FilterNode` when the result is error-free, + * or `null` if it has any errors (including insertion-recovery placeholders + * that don't appear as {@link ErrorNode}s in the tree). The returned value + * is safe to pass to {@link transform}. + * + * @example + * ```ts + * const result = parse(input, { tolerant: true }); + * const clean = toCleanTree(result); + * if (clean) { + * const ast = transform(clean); + * } + * ``` + */ +export function toCleanTree(result: ParseResult): FilterNode | null { + if (!result.ok) return null; + return result.cst as unknown as FilterNode; } diff --git a/tests/__snapshots__/snapshots.test.ts.snap b/tests/__snapshots__/snapshots.test.ts.snap index 734629a..a7651ad 100644 --- a/tests/__snapshots__/snapshots.test.ts.snap +++ b/tests/__snapshots__/snapshots.test.ts.snap @@ -402,6 +402,2366 @@ exports[`snapshot: filter > with options 1`] = `true`; exports[`snapshot: filterAsync > async match 1`] = `true`; +exports[`snapshot: parse (tolerant) > AND AND (expected expression) 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 1, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": null, + "span": { + "end": 1, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 1, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 1, + "start": 0, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 6, + "start": 6, + }, + "terms": [ + { + "negated": false, + "simple": { + "error": [Error: Expected an expression after 'AND', found keyword 'AND'], + "expectedAt": { + "end": 6, + "start": 6, + }, + "skipped": [], + "span": { + "end": 6, + "start": 6, + }, + "type": "Error", + }, + "span": { + "end": 6, + "start": 6, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 6, + "start": 6, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 11, + "start": 10, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "fields": [], + "span": { + "end": 11, + "start": 10, + }, + "type": "Member", + "value": { + "span": { + "end": 11, + "start": 10, + }, + "token": { + "end": 11, + "kind": 15, + "start": 10, + "value": "b", + }, + "type": "Value", + }, + }, + "comparator": null, + "span": { + "end": 11, + "start": 10, + }, + "type": "Restriction", + }, + "span": { + "end": 11, + "start": 10, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 11, + "start": 10, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 11, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 11, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression after 'AND', found keyword 'AND'], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > NOT without expression 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 3, + "start": 0, + }, + "terms": [ + { + "negated": true, + "simple": { + "error": [Error: Expected an expression after 'NOT', found keyword 'AND'], + "expectedAt": { + "end": 4, + "start": 4, + }, + "skipped": [], + "span": { + "end": 4, + "start": 4, + }, + "type": "Error", + }, + "span": { + "end": 3, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 3, + "start": 0, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 9, + "start": 8, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "fields": [], + "span": { + "end": 9, + "start": 8, + }, + "type": "Member", + "value": { + "span": { + "end": 9, + "start": 8, + }, + "token": { + "end": 9, + "kind": 15, + "start": 8, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": null, + "span": { + "end": 9, + "start": 8, + }, + "type": "Restriction", + }, + "span": { + "end": 9, + "start": 8, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 9, + "start": 8, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 9, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 9, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression after 'NOT', found keyword 'AND'], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > OR OR (expected expression) 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 9, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": null, + "span": { + "end": 1, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 1, + "start": 0, + }, + "type": "Term", + }, + { + "negated": false, + "simple": { + "error": [Error: Expected an expression after 'OR', found keyword 'OR'], + "expectedAt": { + "end": 5, + "start": 5, + }, + "skipped": [], + "span": { + "end": 5, + "start": 5, + }, + "type": "Error", + }, + "span": { + "end": 5, + "start": 5, + }, + "type": "Term", + }, + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "fields": [], + "span": { + "end": 9, + "start": 8, + }, + "type": "Member", + "value": { + "span": { + "end": 9, + "start": 8, + }, + "token": { + "end": 9, + "kind": 15, + "start": 8, + "value": "b", + }, + "type": "Value", + }, + }, + "comparator": null, + "span": { + "end": 9, + "start": 8, + }, + "type": "Restriction", + }, + "span": { + "end": 9, + "start": 8, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 9, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 9, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 9, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression after 'OR', found keyword 'OR'], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > depth limit exceeded 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 5, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 4, + "start": 1, + }, + "terms": [ + { + "negated": false, + "simple": { + "error": [Error: Maximum nesting depth exceeded (1)], + "expectedAt": { + "end": 2, + "start": 1, + }, + "skipped": [ + { + "end": 2, + "kind": 10, + "start": 1, + "value": "(", + }, + { + "end": 3, + "kind": 15, + "start": 2, + "value": "a", + }, + { + "end": 4, + "kind": 11, + "start": 3, + "value": ")", + }, + ], + "span": { + "end": 4, + "start": 1, + }, + "type": "Error", + }, + "span": { + "end": 4, + "start": 1, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 4, + "start": 1, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 4, + "start": 1, + }, + "type": "Expression", + }, + "span": { + "end": 5, + "start": 0, + }, + "type": "Composite", + }, + "span": { + "end": 5, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 5, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Maximum nesting depth exceeded (1)], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > empty input 1`] = ` +{ + "cst": { + "expression": null, + "span": { + "end": 0, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [], + "ok": true, +} +`; + +exports[`snapshot: parse (tolerant) > empty parentheses 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 2, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "error": [Error: Expected an expression inside parentheses], + "expectedAt": { + "end": 1, + "start": 1, + }, + "skipped": [], + "span": { + "end": 1, + "start": 1, + }, + "type": "Error", + }, + "span": { + "end": 2, + "start": 0, + }, + "type": "Composite", + }, + "span": { + "end": 2, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 2, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 2, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 2, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression inside parentheses], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > input length exceeded 1`] = ` +{ + "cst": { + "expression": null, + "span": { + "end": 100, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Filter exceeds maximum length of 10 characters], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > lexer + parser errors combined 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 33, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 33, + "start": 4, + }, + "type": "Member", + "value": { + "span": { + "end": 33, + "start": 4, + }, + "token": { + "end": 33, + "kind": 16, + "start": 4, + "value": "unterminated AND b AND AND c", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 33, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 33, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 33, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 33, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 33, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Unterminated string], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > maxErrors stops recovery 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 2, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "error": [Error: Expected an expression inside parentheses], + "expectedAt": { + "end": 1, + "start": 1, + }, + "skipped": [], + "span": { + "end": 1, + "start": 1, + }, + "type": "Error", + }, + "span": { + "end": 2, + "start": 0, + }, + "type": "Composite", + }, + "span": { + "end": 2, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + { + "span": { + "end": 5, + "start": 3, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "error": [Error: Expected an expression inside parentheses], + "expectedAt": { + "end": 4, + "start": 4, + }, + "skipped": [], + "span": { + "end": 4, + "start": 4, + }, + "type": "Error", + }, + "span": { + "end": 5, + "start": 3, + }, + "type": "Composite", + }, + "span": { + "end": 5, + "start": 3, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 5, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression inside parentheses], + [Error: Expected an expression inside parentheses], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > missing arg after comma (insertion recovery) 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 6, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "args": [ + { + "fields": [], + "span": { + "end": 4, + "start": 3, + }, + "type": "Member", + "value": { + "span": { + "end": 4, + "start": 3, + }, + "token": { + "end": 4, + "kind": 15, + "start": 3, + "value": "a", + }, + "type": "Value", + }, + }, + { + "fields": [], + "span": { + "end": 5, + "start": 5, + }, + "type": "Member", + "value": { + "span": { + "end": 5, + "start": 5, + }, + "token": { + "end": 5, + "kind": 15, + "start": 5, + "value": "", + }, + "type": "Value", + }, + }, + ], + "name": [ + { + "span": { + "end": 2, + "start": 0, + }, + "token": { + "end": 2, + "kind": 15, + "start": 0, + "value": "fn", + }, + "type": "Value", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "FunctionCall", + }, + "comparator": null, + "span": { + "end": 6, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 6, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 6, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an argument after ',', found ')'], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > missing value after comparator (insertion recovery) 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 3, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 3, + "start": 3, + }, + "type": "Member", + "value": { + "span": { + "end": 3, + "start": 3, + }, + "token": { + "end": 3, + "kind": 15, + "start": 3, + "value": "", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 3, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 3, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 3, + "start": 0, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 13, + "start": 8, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 13, + "start": 12, + }, + "type": "Member", + "value": { + "span": { + "end": 13, + "start": 12, + }, + "token": { + "end": 13, + "kind": 15, + "start": 12, + "value": "1", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 9, + "start": 8, + }, + "type": "Member", + "value": { + "span": { + "end": 9, + "start": 8, + }, + "token": { + "end": 9, + "kind": 15, + "start": 8, + "value": "b", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 13, + "start": 8, + }, + "type": "Restriction", + }, + "span": { + "end": 13, + "start": 8, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 13, + "start": 8, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 13, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 13, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected a value after '=', found keyword 'AND'], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > multiple errors 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 2, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "error": [Error: Expected an expression inside parentheses], + "expectedAt": { + "end": 1, + "start": 1, + }, + "skipped": [], + "span": { + "end": 1, + "start": 1, + }, + "type": "Error", + }, + "span": { + "end": 2, + "start": 0, + }, + "type": "Composite", + }, + "span": { + "end": 2, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 2, + "start": 0, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 9, + "start": 7, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "error": [Error: Expected an expression inside parentheses], + "expectedAt": { + "end": 8, + "start": 8, + }, + "skipped": [], + "span": { + "end": 8, + "start": 8, + }, + "type": "Error", + }, + "span": { + "end": 9, + "start": 7, + }, + "type": "Composite", + }, + "span": { + "end": 9, + "start": 7, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 9, + "start": 7, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 9, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 9, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression inside parentheses], + [Error: Expected an expression inside parentheses], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > negative field path 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 8, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [ + { + "span": { + "end": 8, + "start": 7, + }, + "token": { + "end": 8, + "kind": 15, + "start": 7, + "value": "c", + }, + "type": "Value", + }, + ], + "span": { + "end": 8, + "start": 4, + }, + "type": "Member", + "value": { + "span": { + "end": 6, + "start": 4, + }, + "token": { + "end": 6, + "kind": 15, + "start": 4, + "value": "-b", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 8, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 8, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 8, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 8, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 8, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Negative sign can only be applied to a numeric value, not a field path], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > quoted function name 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 6, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "args": [], + "name": [ + { + "span": { + "end": 4, + "start": 0, + }, + "token": { + "end": 4, + "kind": 16, + "start": 0, + "value": "fn", + }, + "type": "Value", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "FunctionCall", + }, + "comparator": null, + "span": { + "end": 6, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 6, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 6, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Quoted strings cannot be used as function names], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > trailing AND 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 1, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": null, + "span": { + "end": 1, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 1, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 1, + "start": 0, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 5, + "start": 5, + }, + "terms": [ + { + "negated": false, + "simple": { + "error": [Error: Expected an expression after 'AND', found end of input], + "expectedAt": { + "end": 5, + "start": 5, + }, + "skipped": [], + "span": { + "end": 5, + "start": 5, + }, + "type": "Error", + }, + "span": { + "end": 5, + "start": 5, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 5, + "start": 5, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 5, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected an expression after 'AND', found end of input], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > trailing content 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 5, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 5, + "start": 4, + }, + "type": "Member", + "value": { + "span": { + "end": 5, + "start": 4, + }, + "token": { + "end": 5, + "kind": 15, + "start": 4, + "value": "1", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 5, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 5, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 5, + "start": 0, + }, + "trailing": { + "error": [Error: Unexpected ')' after filter expression], + "expectedAt": { + "end": 6, + "start": 5, + }, + "skipped": [ + { + "end": 6, + "kind": 11, + "start": 5, + "value": ")", + }, + { + "end": 10, + "kind": 0, + "start": 7, + "value": "AND", + }, + { + "end": 12, + "kind": 15, + "start": 11, + "value": "b", + }, + { + "end": 14, + "kind": 3, + "start": 13, + "value": "=", + }, + { + "end": 16, + "kind": 15, + "start": 15, + "value": "2", + }, + ], + "span": { + "end": 16, + "start": 5, + }, + "type": "Error", + }, + "type": "Filter", + }, + "errors": [ + [Error: Unexpected ')' after filter expression], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > unclosed function call 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 7, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": null, + "comparable": { + "args": [ + { + "fields": [], + "span": { + "end": 4, + "start": 3, + }, + "type": "Member", + "value": { + "span": { + "end": 4, + "start": 3, + }, + "token": { + "end": 4, + "kind": 15, + "start": 3, + "value": "a", + }, + "type": "Value", + }, + }, + { + "fields": [], + "span": { + "end": 7, + "start": 6, + }, + "type": "Member", + "value": { + "span": { + "end": 7, + "start": 6, + }, + "token": { + "end": 7, + "kind": 15, + "start": 6, + "value": "b", + }, + "type": "Value", + }, + }, + ], + "name": [ + { + "span": { + "end": 2, + "start": 0, + }, + "token": { + "end": 2, + "kind": 15, + "start": 0, + "value": "fn", + }, + "type": "Value", + }, + ], + "span": { + "end": 7, + "start": 0, + }, + "type": "FunctionCall", + }, + "comparator": null, + "span": { + "end": 7, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 7, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 7, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 7, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 7, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Expected ')' to close function call, found end of input], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > unclosed parenthesis 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 6, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 6, + "start": 1, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 6, + "start": 5, + }, + "type": "Member", + "value": { + "span": { + "end": 6, + "start": 5, + }, + "token": { + "end": 6, + "kind": 15, + "start": 5, + "value": "1", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 2, + "start": 1, + }, + "type": "Member", + "value": { + "span": { + "end": 2, + "start": 1, + }, + "token": { + "end": 2, + "kind": 15, + "start": 1, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 6, + "start": 1, + }, + "type": "Restriction", + }, + "span": { + "end": 6, + "start": 1, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 6, + "start": 1, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 6, + "start": 1, + }, + "type": "Expression", + }, + "span": { + "end": 6, + "start": 0, + }, + "type": "Composite", + }, + "span": { + "end": 6, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 6, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 6, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [ + [Error: Unclosed parenthesis: expected ')' to match '(' at position 0], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > unmatched closing paren (insertion recovery) 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 3, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 3, + "start": 3, + }, + "type": "Member", + "value": { + "span": { + "end": 3, + "start": 3, + }, + "token": { + "end": 3, + "kind": 15, + "start": 3, + "value": "", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 3, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 3, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 3, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 3, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 3, + "start": 0, + }, + "trailing": { + "error": [Error: Unexpected ')' after filter expression], + "expectedAt": { + "end": 5, + "start": 4, + }, + "skipped": [ + { + "end": 5, + "kind": 11, + "start": 4, + "value": ")", + }, + { + "end": 7, + "kind": 15, + "start": 6, + "value": "b", + }, + { + "end": 9, + "kind": 3, + "start": 8, + "value": "=", + }, + { + "end": 11, + "kind": 15, + "start": 10, + "value": "1", + }, + ], + "span": { + "end": 11, + "start": 4, + }, + "type": "Error", + }, + "type": "Filter", + }, + "errors": [ + [Error: Expected a value after '=', found ')'], + [Error: Unexpected ')' after filter expression], + ], + "ok": false, +} +`; + +exports[`snapshot: parse (tolerant) > valid input 1`] = ` +{ + "cst": { + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 5, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 5, + "start": 4, + }, + "type": "Member", + "value": { + "span": { + "end": 5, + "start": 4, + }, + "token": { + "end": 5, + "kind": 15, + "start": 4, + "value": "1", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 1, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 1, + "start": 0, + }, + "token": { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 5, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 5, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "Sequence", + }, + { + "factors": [ + { + "span": { + "end": 15, + "start": 10, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 15, + "start": 14, + }, + "type": "Member", + "value": { + "span": { + "end": 15, + "start": 14, + }, + "token": { + "end": 15, + "kind": 15, + "start": 14, + "value": "2", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 11, + "start": 10, + }, + "type": "Member", + "value": { + "span": { + "end": 11, + "start": 10, + }, + "token": { + "end": 11, + "kind": 15, + "start": 10, + "value": "b", + }, + "type": "Value", + }, + }, + "comparator": 3, + "span": { + "end": 15, + "start": 10, + }, + "type": "Restriction", + }, + "span": { + "end": 15, + "start": 10, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 15, + "start": 10, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 15, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 15, + "start": 0, + }, + "trailing": null, + "type": "Filter", + }, + "errors": [], + "ok": true, +} +`; + exports[`snapshot: parse > AND expression 1`] = ` { "expression": { @@ -2620,35 +4980,225 @@ exports[`snapshot: parse > qualified function 1`] = ` }, "type": "Value", }, - { - "span": { - "end": 8, - "start": 5, - }, - "token": { - "end": 8, - "kind": 15, - "start": 5, - "value": "mem", - }, - "type": "Value", + { + "span": { + "end": 8, + "start": 5, + }, + "token": { + "end": 8, + "kind": 15, + "start": 5, + "value": "mem", + }, + "type": "Value", + }, + ], + "span": { + "end": 16, + "start": 0, + }, + "type": "FunctionCall", + }, + "comparator": null, + "span": { + "end": 16, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 16, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 16, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 16, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 16, + "start": 0, + }, + "type": "Filter", +} +`; + +exports[`snapshot: parse > simple restriction 1`] = ` +{ + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 10, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 10, + "start": 9, + }, + "type": "Member", + "value": { + "span": { + "end": 10, + "start": 9, + }, + "token": { + "end": 10, + "kind": 15, + "start": 9, + "value": "5", + }, + "type": "Value", + }, + }, + "comparable": { + "fields": [], + "span": { + "end": 5, + "start": 0, + }, + "type": "Member", + "value": { + "span": { + "end": 5, + "start": 0, + }, + "token": { + "end": 5, + "kind": 15, + "start": 0, + "value": "power", + }, + "type": "Value", + }, + }, + "comparator": 8, + "span": { + "end": 10, + "start": 0, + }, + "type": "Restriction", + }, + "span": { + "end": 10, + "start": 0, + }, + "type": "Term", + }, + ], + "type": "Factor", + }, + ], + "span": { + "end": 10, + "start": 0, + }, + "type": "Sequence", + }, + ], + "span": { + "end": 10, + "start": 0, + }, + "type": "Expression", + }, + "span": { + "end": 10, + "start": 0, + }, + "type": "Filter", +} +`; + +exports[`snapshot: parse > string comparison 1`] = ` +{ + "expression": { + "sequences": [ + { + "factors": [ + { + "span": { + "end": 15, + "start": 0, + }, + "terms": [ + { + "negated": false, + "simple": { + "arg": { + "fields": [], + "span": { + "end": 15, + "start": 7, + }, + "type": "Member", + "value": { + "span": { + "end": 15, + "start": 7, + }, + "token": { + "end": 15, + "kind": 16, + "start": 7, + "value": "Madoka", }, - ], + "type": "Value", + }, + }, + "comparable": { + "fields": [], "span": { - "end": 16, + "end": 4, "start": 0, }, - "type": "FunctionCall", + "type": "Member", + "value": { + "span": { + "end": 4, + "start": 0, + }, + "token": { + "end": 4, + "kind": 15, + "start": 0, + "value": "name", + }, + "type": "Value", + }, }, - "comparator": null, + "comparator": 3, "span": { - "end": 16, + "end": 15, "start": 0, }, "type": "Restriction", }, "span": { - "end": 16, + "end": 15, "start": 0, }, "type": "Term", @@ -2658,27 +5208,27 @@ exports[`snapshot: parse > qualified function 1`] = ` }, ], "span": { - "end": 16, + "end": 15, "start": 0, }, "type": "Sequence", }, ], "span": { - "end": 16, + "end": 15, "start": 0, }, "type": "Expression", }, "span": { - "end": 16, + "end": 15, "start": 0, }, "type": "Filter", } `; -exports[`snapshot: parse > simple restriction 1`] = ` +exports[`snapshot: toCleanTree > clean tolerant CST narrows to strict FilterNode 1`] = ` { "expression": { "sequences": [ @@ -2686,7 +5236,7 @@ exports[`snapshot: parse > simple restriction 1`] = ` "factors": [ { "span": { - "end": 10, + "end": 5, "start": 0, }, "terms": [ @@ -2696,20 +5246,20 @@ exports[`snapshot: parse > simple restriction 1`] = ` "arg": { "fields": [], "span": { - "end": 10, - "start": 9, + "end": 5, + "start": 4, }, "type": "Member", "value": { "span": { - "end": 10, - "start": 9, + "end": 5, + "start": 4, }, "token": { - "end": 10, + "end": 5, "kind": 15, - "start": 9, - "value": "5", + "start": 4, + "value": "1", }, "type": "Value", }, @@ -2717,33 +5267,33 @@ exports[`snapshot: parse > simple restriction 1`] = ` "comparable": { "fields": [], "span": { - "end": 5, + "end": 1, "start": 0, }, "type": "Member", "value": { "span": { - "end": 5, + "end": 1, "start": 0, }, "token": { - "end": 5, + "end": 1, "kind": 15, "start": 0, - "value": "power", + "value": "a", }, "type": "Value", }, }, - "comparator": 8, + "comparator": 3, "span": { - "end": 10, + "end": 5, "start": 0, }, "type": "Restriction", }, "span": { - "end": 10, + "end": 5, "start": 0, }, "type": "Term", @@ -2753,36 +5303,17 @@ exports[`snapshot: parse > simple restriction 1`] = ` }, ], "span": { - "end": 10, + "end": 5, "start": 0, }, "type": "Sequence", }, - ], - "span": { - "end": 10, - "start": 0, - }, - "type": "Expression", - }, - "span": { - "end": 10, - "start": 0, - }, - "type": "Filter", -} -`; - -exports[`snapshot: parse > string comparison 1`] = ` -{ - "expression": { - "sequences": [ { "factors": [ { "span": { "end": 15, - "start": 0, + "start": 10, }, "terms": [ { @@ -2792,19 +5323,19 @@ exports[`snapshot: parse > string comparison 1`] = ` "fields": [], "span": { "end": 15, - "start": 7, + "start": 14, }, "type": "Member", "value": { "span": { "end": 15, - "start": 7, + "start": 14, }, "token": { "end": 15, - "kind": 16, - "start": 7, - "value": "Madoka", + "kind": 15, + "start": 14, + "value": "2", }, "type": "Value", }, @@ -2812,20 +5343,20 @@ exports[`snapshot: parse > string comparison 1`] = ` "comparable": { "fields": [], "span": { - "end": 4, - "start": 0, + "end": 11, + "start": 10, }, "type": "Member", "value": { "span": { - "end": 4, - "start": 0, + "end": 11, + "start": 10, }, "token": { - "end": 4, + "end": 11, "kind": 15, - "start": 0, - "value": "name", + "start": 10, + "value": "b", }, "type": "Value", }, @@ -2833,13 +5364,13 @@ exports[`snapshot: parse > string comparison 1`] = ` "comparator": 3, "span": { "end": 15, - "start": 0, + "start": 10, }, "type": "Restriction", }, "span": { "end": 15, - "start": 0, + "start": 10, }, "type": "Term", }, @@ -2849,7 +5380,7 @@ exports[`snapshot: parse > string comparison 1`] = ` ], "span": { "end": 15, - "start": 0, + "start": 10, }, "type": "Sequence", }, @@ -2864,10 +5395,173 @@ exports[`snapshot: parse > string comparison 1`] = ` "end": 15, "start": 0, }, + "trailing": null, "type": "Filter", } `; +exports[`snapshot: toCleanTree > clean tree passes through transform 1`] = ` +{ + "children": [ + { + "arg": { + "quoted": false, + "span": { + "end": 10, + "start": 9, + }, + "type": "value", + "value": "5", + }, + "comparable": { + "path": [ + "power", + ], + "span": { + "end": 5, + "start": 0, + }, + "type": "member", + }, + "comparator": ">=", + "span": { + "end": 10, + "start": 0, + }, + "type": "restriction", + }, + { + "arg": { + "quoted": false, + "span": { + "end": 26, + "start": 20, + }, + "type": "value", + "value": "active", + }, + "comparable": { + "path": [ + "name", + ], + "span": { + "end": 19, + "start": 15, + }, + "type": "member", + }, + "comparator": ":", + "span": { + "end": 26, + "start": 15, + }, + "type": "restriction", + }, + ], + "span": { + "end": 26, + "start": 0, + }, + "type": "and", +} +`; + +exports[`snapshot: toCleanTree > dirty tolerant CST returns null 1`] = `null`; + +exports[`snapshot: tokenize (tolerant) > bare ! 1`] = ` +{ + "errors": [ + [Error: Unexpected character '!'], + ], + "tokens": [ + { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + { + "end": 5, + "kind": 15, + "start": 4, + "value": "b", + }, + { + "end": 5, + "kind": 17, + "start": 5, + "value": "", + }, + ], +} +`; + +exports[`snapshot: tokenize (tolerant) > unterminated string 1`] = ` +{ + "errors": [ + [Error: Unterminated string], + ], + "tokens": [ + { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + { + "end": 3, + "kind": 3, + "start": 2, + "value": "=", + }, + { + "end": 10, + "kind": 16, + "start": 4, + "value": "hello", + }, + { + "end": 10, + "kind": 17, + "start": 10, + "value": "", + }, + ], +} +`; + +exports[`snapshot: tokenize (tolerant) > valid input (no errors) 1`] = ` +{ + "errors": [], + "tokens": [ + { + "end": 1, + "kind": 15, + "start": 0, + "value": "a", + }, + { + "end": 3, + "kind": 3, + "start": 2, + "value": "=", + }, + { + "end": 5, + "kind": 15, + "start": 4, + "value": "1", + }, + { + "end": 5, + "kind": 17, + "start": 5, + "value": "", + }, + ], +} +`; + exports[`snapshot: tokenize > complex expression 1`] = ` [ { diff --git a/tests/parser-tolerant.test.ts b/tests/parser-tolerant.test.ts new file mode 100644 index 0000000..00d4fec --- /dev/null +++ b/tests/parser-tolerant.test.ts @@ -0,0 +1,496 @@ +import { describe, expect, it } from "vitest"; +import { + parse, + transform, + evaluate, + hasErrorNodes, + toCleanTree, + tokenize, + type ParseResult, + type ErrorNode, + UnexpectedTokenError, + ExpectedExpressionError, + ExpectedValueError, + EmptyExpressionError, + UnclosedDelimiterError, + InvalidFunctionNameError, + InvalidNegationError, + DepthLimitError, + InputLengthError, + UnexpectedCharacterError, + UnterminatedStringError, +} from "../src"; + +function tolerant(input: string, options?: { maxDepth?: number; maxLength?: number }): ParseResult { + return parse(input, { tolerant: true, ...options }); +} + +describe("tolerant parsing", () => { + describe("backward compatibility", () => { + it("strict mode still throws on first error", () => { + expect(() => parse("a AND AND b")).toThrow(ExpectedExpressionError); + }); + + it("strict mode returns FilterNode directly", () => { + const cst = parse("a = 1"); + expect(cst.type).toBe("Filter"); + }); + + it("valid input in tolerant mode returns ok: true", () => { + const result = tolerant("a = 1 AND b = 2"); + expect(result.ok).toBe(true); + expect(result.errors).toHaveLength(0); + expect(result.cst.type).toBe("Filter"); + }); + + it("empty input in tolerant mode returns ok: true", () => { + const result = tolerant(""); + expect(result.ok).toBe(true); + expect(result.errors).toHaveLength(0); + expect(result.cst.expression).toBeNull(); + }); + + it("whitespace-only input in tolerant mode returns ok: true", () => { + const result = tolerant(" "); + expect(result.ok).toBe(true); + expect(result.errors).toHaveLength(0); + expect(result.cst.expression).toBeNull(); + }); + }); + + describe("lexer recovery", () => { + it("recovers from bare !", () => { + const result = tokenize("a ! b", { tolerant: true }); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(UnexpectedCharacterError); + expect(result.tokens.length).toBeGreaterThanOrEqual(3); + }); + + it("recovers from unterminated string", () => { + const result = tokenize('a = "hello', { tolerant: true }); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(UnterminatedStringError); + const stringToken = result.tokens.find((t) => t.value === "hello"); + expect(stringToken).toBeDefined(); + }); + + it("recovers from unterminated string with escape (slow path)", () => { + const result = tokenize('a = "hel\\"lo', { tolerant: true }); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(UnterminatedStringError); + const stringToken = result.tokens.find((t) => t.kind === 16); + expect(stringToken).toBeDefined(); + }); + + it("collects multiple lexer errors", () => { + const result = tokenize("a ! b ! c", { tolerant: true }); + expect(result.errors).toHaveLength(2); + }); + + it("lexer errors are included in parse result", () => { + const result = tolerant("a ! b"); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + expect(result.errors[0]).toBeInstanceOf(UnexpectedCharacterError); + }); + }); + + describe("parser recovery - trailing content", () => { + it("recovers from unexpected token after expression", () => { + const result = tolerant("a = 1)"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(UnexpectedTokenError); + expect(result.cst.expression).not.toBeNull(); + }); + + it("wraps trailing tokens in an ErrorNode on the CST", () => { + const result = tolerant("a = 1) AND b = 2"); + expect(result.ok).toBe(false); + expect(result.cst.trailing).not.toBeNull(); + expect(result.cst.trailing!.type).toBe("Error"); + expect(result.cst.trailing!.skipped.length).toBeGreaterThan(0); + expect(hasErrorNodes(result.cst)).toBe(true); + }); + + it("trailing is null for valid input", () => { + const result = tolerant("a = 1"); + expect(result.cst.trailing).toBeNull(); + }); + }); + + describe("parser recovery - expected expression after operator", () => { + it("recovers from AND AND", () => { + const result = tolerant("a AND AND b"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedExpressionError); + expect(result.cst.expression).not.toBeNull(); + }); + + it("recovers from OR OR", () => { + const result = tolerant("a OR OR b"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedExpressionError); + }); + + it("recovers from trailing AND", () => { + const result = tolerant("a AND"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedExpressionError); + }); + + it("recovers from trailing OR", () => { + const result = tolerant("a OR"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedExpressionError); + }); + }); + + describe("parser recovery - NOT/- without expression", () => { + it("recovers from NOT followed by AND", () => { + const result = tolerant("NOT AND a"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedExpressionError); + }); + + it("recovers from dangling NOT at end", () => { + const result = tolerant("a NOT"); + expect(result.ok).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + }); + }); + + describe("parser recovery - composite expressions", () => { + it("recovers from empty parentheses ()", () => { + const result = tolerant("()"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(EmptyExpressionError); + expect(result.cst.expression).not.toBeNull(); + }); + + it("recovers from unclosed parenthesis", () => { + const result = tolerant("(a = 1"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(UnclosedDelimiterError); + expect(result.cst.expression).not.toBeNull(); + }); + + it("recovers from depth limit exceeded", () => { + const result = tolerant("((a))", { maxDepth: 1 }); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(DepthLimitError); + }); + }); + + describe("parser recovery - function calls", () => { + it("recovers from string as function name", () => { + const result = tolerant('"fn"()'); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(InvalidFunctionNameError); + expect(result.cst.expression).not.toBeNull(); + }); + + it("recovers from unclosed function call", () => { + const result = tolerant("fn(a, b"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(UnclosedDelimiterError); + }); + }); + + describe("parser recovery - expected value", () => { + it("recovers from missing value after comparator", () => { + const result = tolerant("a = AND b = 1"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + }); + + it("preserves subsequent restrictions after missing value", () => { + const result = tolerant("a = AND b = 1 AND c = 2"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + + const expr = result.cst.expression; + expect(expr).not.toBeNull(); + expect(expr!.type).toBe("Expression"); + if (expr!.type === "Expression") { + expect(expr!.sequences).toHaveLength(3); + } + }); + + it("recovers from trailing comma in function args", () => { + const result = tolerant("fn(a,)"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + }); + + it("recovers from missing field after dot", () => { + const result = tolerant("a. = 1"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + }); + }); + + describe("parser recovery - negation errors", () => { + it("recovers from minus before function call", () => { + const result = tolerant("a = -fn()"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(InvalidNegationError); + }); + + it("recovers from minus before non-numeric path", () => { + const result = tolerant("a = -b.c"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(InvalidNegationError); + }); + }); + + describe("parser recovery - expected identifier", () => { + it("recovers from keyword at value position", () => { + const result = tolerant("= 1"); + expect(result.ok).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + }); + }); + + describe("parser recovery - input length", () => { + it("returns error for input exceeding maxLength", () => { + const result = tolerant("a".repeat(100), { maxLength: 10 }); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(InputLengthError); + expect(result.cst.expression).toBeNull(); + }); + }); + + describe("multiple errors", () => { + it("collects multiple errors in one pass", () => { + const result = tolerant("(a AND AND b) OR ()"); + expect(result.ok).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(2); + }); + + it("collects errors from nested constructs", () => { + const result = tolerant("() AND ()"); + expect(result.ok).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(2); + }); + + it("respects maxErrors limit", () => { + const result = parse("() () () () ()", { tolerant: true, maxErrors: 10 }); + expect(result.errors.length).toBeGreaterThanOrEqual(5); + + const limited = parse("() () () () ()", { tolerant: true, maxErrors: 2 }); + expect(limited.errors).toHaveLength(2); + expect(limited.ok).toBe(false); + expect(limited.cst.type).toBe("Filter"); + }); + }); + + describe("hasErrorNodes", () => { + it("returns false for clean CST", () => { + const result = tolerant("a = 1 AND b = 2"); + expect(hasErrorNodes(result.cst)).toBe(false); + }); + + it("returns true for CST with errors", () => { + const result = tolerant("a AND AND b"); + expect(hasErrorNodes(result.cst)).toBe(true); + }); + + it("returns false for empty filter", () => { + const result = tolerant(""); + expect(hasErrorNodes(result.cst)).toBe(false); + }); + }); + + describe("toCleanTree", () => { + it("returns null for CST with error nodes", () => { + const result = tolerant("a AND AND b"); + expect(toCleanTree(result)).toBeNull(); + }); + + it("returns strict FilterNode for clean tolerant CST", () => { + const result = tolerant("a = 1"); + expect(result.ok).toBe(true); + const clean = toCleanTree(result); + expect(clean).not.toBeNull(); + const ast = transform(clean!); + expect(ast).not.toBeNull(); + }); + + it("returns null when expression is clean but trailing has errors", () => { + const result = tolerant("a = 1)"); + expect(result.cst.expression).not.toBeNull(); + expect(result.cst.trailing).not.toBeNull(); + expect(toCleanTree(result)).toBeNull(); + }); + }); + + describe("error node structure", () => { + it("error nodes have correct type and fields", () => { + const result = tolerant("a AND AND b"); + const errorNode = findErrorNode(result.cst); + expect(errorNode).not.toBeNull(); + expect(errorNode!.type).toBe("Error"); + expect(errorNode!.error).toBeInstanceOf(ExpectedExpressionError); + expect(Array.isArray(errorNode!.skipped)).toBe(true); + expect(errorNode!.span).toBeDefined(); + expect(errorNode!.expectedAt).toBeDefined(); + }); + }); + + describe("expectedAt positions", () => { + it("points to the token after AND when expression is missing", () => { + const result = tolerant("a AND AND b"); + const errorNode = findErrorNode(result.cst); + expect(errorNode).not.toBeNull(); + expect(errorNode!.expectedAt.start).toBe(6); + }); + + it("points to the position after comparator for missing value", () => { + const result = tolerant("a = AND b"); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + expect(result.errors[0].span.start).toBeLessThanOrEqual(4); + }); + + it("points inside empty parentheses", () => { + const result = tolerant("()"); + const errorNode = findErrorNode(result.cst); + expect(errorNode).not.toBeNull(); + expect(errorNode!.expectedAt.start).toBe(1); + }); + + it("points at EOF for trailing operator", () => { + const result = tolerant("a AND"); + const errorNode = findErrorNode(result.cst); + expect(errorNode).not.toBeNull(); + expect(errorNode!.expectedAt.start).toBe(5); + }); + }); + + describe("insertion-based recovery", () => { + it("synthesizes placeholder for missing arg value", () => { + const result = tolerant("a = AND b = 1"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + + const expr = result.cst.expression; + expect(expr).not.toBeNull(); + expect(expr!.type).toBe("Expression"); + if (expr!.type === "Expression") { + expect(expr!.sequences).toHaveLength(2); + } + }); + + it("preserves restriction before unmatched paren", () => { + const result = tolerant("a = ) b = 1"); + expect(result.ok).toBe(false); + expect(result.errors.length).toBeGreaterThanOrEqual(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + + const expr = result.cst.expression; + expect(expr).not.toBeNull(); + expect(expr!.type).toBe("Expression"); + }); + + it("synthesizes placeholder for missing arg after comma", () => { + const result = tolerant("fn(a,)"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.errors[0]).toBeInstanceOf(ExpectedValueError); + }); + + it("handles multiple consecutive missing values", () => { + const result = tolerant("a = AND b = AND c = AND d = 1"); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(3); + result.errors.forEach((e) => expect(e).toBeInstanceOf(ExpectedValueError)); + const expr = result.cst.expression; + expect(expr).not.toBeNull(); + expect(expr!.type).toBe("Expression"); + if (expr!.type === "Expression") { + expect(expr!.sequences).toHaveLength(4); + } + }); + }); + + describe("graceful maxErrors", () => { + it("never throws in tolerant mode regardless of error count", () => { + const result = parse("AND AND AND AND AND AND AND AND", { + tolerant: true, + maxErrors: 3, + }); + expect(result.errors).toHaveLength(3); + expect(result.ok).toBe(false); + expect(result.cst.type).toBe("Filter"); + }); + }); + + describe("toCleanTree rejects all error cases", () => { + it.each([ + ["insertion recovery", "a = AND b = 1"], + ["ErrorNode in tree", "a AND AND b"], + ["unclosed paren", "(a = 1"], + ["empty parens", "()"], + ["trailing content", "a = 1)"], + ["unterminated string", 'a = "hello'], + ])("%s: %s", (_label, input) => { + const result = tolerant(input); + expect(result.ok).toBe(false); + expect(toCleanTree(result)).toBeNull(); + }); + }); + + describe("full pipeline: tolerant -> toCleanTree -> transform -> evaluate", () => { + it("valid tolerant input evaluates correctly", () => { + const result = tolerant('power >= 5 AND name = "Madoka"'); + expect(result.ok).toBe(true); + const clean = toCleanTree(result); + expect(clean).not.toBeNull(); + const ast = transform(clean!); + expect(evaluate(ast, { power: 9, name: "Madoka" })).toBe(true); + expect(evaluate(ast, { power: 2, name: "Madoka" })).toBe(false); + }); + + it("dirty tolerant input is blocked by toCleanTree", () => { + const result = tolerant("a = AND b = 1"); + expect(result.ok).toBe(false); + expect(toCleanTree(result)).toBeNull(); + }); + }); +}); + +function findErrorNode(node: unknown): ErrorNode | null { + if (node === null || node === undefined || typeof node !== "object") return null; + const n = node as Record; + if (n["type"] === "Error") return node as ErrorNode; + for (const value of Object.values(n)) { + if (Array.isArray(value)) { + for (const item of value) { + const found = findErrorNode(item); + if (found) return found; + } + } else if (typeof value === "object" && value !== null) { + const found = findErrorNode(value); + if (found) return found; + } + } + return null; +} diff --git a/tests/readme.test.ts b/tests/readme.test.ts index da686a4..47f747e 100644 --- a/tests/readme.test.ts +++ b/tests/readme.test.ts @@ -6,6 +6,7 @@ import { evaluate, filter, parse, + toCleanTree, transform, type ASTNode, } from "../src"; @@ -50,27 +51,44 @@ describe("README examples", () => { expect(f.evaluate({ status: "contracted" })).toBe(true); }); + it("custom evaluation: parse + transform + evaluate", () => { + const ast = transform(parse('status = "contracted" AND grief <= 50')); + expect(evaluate(ast, { status: "contracted", grief: 30 })).toBe(true); + }); + it("custom evaluation: toSQL", () => { + const params: string[] = []; function toSQL(node: ASTNode | null): string { - if (node === null) return "1=1"; - switch (node.type) { - case "and": - return node.children.map(toSQL).join(" AND "); - case "or": - return `(${node.children.map(toSQL).join(" OR ")})`; - case "not": - return `NOT (${toSQL(node.child)})`; - case "restriction": - return node.comparable.type === "member" - ? `${node.comparable.path.join(".")} ${node.comparator} ?` - : `${node.comparable.qualifiedName}() ${node.comparator} ?`; - default: - return "1=1"; + if (!node) return "1=1"; + if (node.type === "and") return node.children.map(toSQL).join(" AND "); + if (node.type === "not") return `NOT (${toSQL(node.child)})`; + if (node.type === "restriction" && node.comparable.type === "member") { + params.push(node.arg?.type === "value" ? node.arg.value : ""); + return `${node.comparable.path.join(".")} ${node.comparator} $${params.length}`; } + return "1=1"; } - const result = toSQL(transform(parse('status = "contracted" AND grief <= 50'))); - expect(result).toBe("status = ? AND grief <= ?"); + const ast = transform(parse('status = "contracted" AND grief <= 50')); + expect(toSQL(ast)).toBe("status = $1 AND grief <= $2"); + expect(params).toEqual(["contracted", "50"]); + }); + + it("tolerant parsing: collects errors, returns best-effort CST", () => { + const result = parse("status = AND power >= 3", { tolerant: true }); + expect(result.ok).toBe(false); + expect(result.errors).toHaveLength(1); + expect(result.cst.expression).not.toBeNull(); + expect(toCleanTree(result)).toBeNull(); + }); + + it("tolerant parsing: clean tree round-trips to AST", () => { + const result = parse('status = "contracted" AND power >= 3', { tolerant: true }); + expect(result.ok).toBe(true); + const clean = toCleanTree(result); + expect(clean).not.toBeNull(); + const ast = transform(clean!); + expect(evaluate(ast, { status: "contracted", power: 5 })).toBe(true); }); it("error handling: structured FilterError", () => { @@ -86,10 +104,4 @@ describe("README examples", () => { } } }); - - it("pipeline API", () => { - const cst = parse("grief <= 50"); - const ast = transform(cst); - expect(evaluate(ast, { grief: 30 })).toBe(true); - }); }); diff --git a/tests/snapshots.test.ts b/tests/snapshots.test.ts index 2130fb8..5d0e2c5 100644 --- a/tests/snapshots.test.ts +++ b/tests/snapshots.test.ts @@ -12,6 +12,7 @@ import { FilterError, TokenKind, isComparatorKind, + toCleanTree, } from "../src"; describe("snapshot: tokenize", () => { @@ -418,6 +419,127 @@ describe("snapshot: errors", () => { }); }); +function tolerant( + input: string, + options?: { maxDepth?: number; maxLength?: number; maxErrors?: number }, +) { + return parse(input, { tolerant: true, ...options }); +} + +describe("snapshot: tokenize (tolerant)", () => { + it("bare !", () => { + expect(tokenize("a ! b", { tolerant: true })).toMatchSnapshot(); + }); + + it("unterminated string", () => { + expect(tokenize('a = "hello', { tolerant: true })).toMatchSnapshot(); + }); + + it("valid input (no errors)", () => { + expect(tokenize("a = 1", { tolerant: true })).toMatchSnapshot(); + }); +}); + +describe("snapshot: parse (tolerant)", () => { + it("valid input", () => { + expect(tolerant("a = 1 AND b = 2")).toMatchSnapshot(); + }); + + it("empty input", () => { + expect(tolerant("")).toMatchSnapshot(); + }); + + it("trailing content", () => { + expect(tolerant("a = 1) AND b = 2")).toMatchSnapshot(); + }); + + it("AND AND (expected expression)", () => { + expect(tolerant("a AND AND b")).toMatchSnapshot(); + }); + + it("OR OR (expected expression)", () => { + expect(tolerant("a OR OR b")).toMatchSnapshot(); + }); + + it("trailing AND", () => { + expect(tolerant("a AND")).toMatchSnapshot(); + }); + + it("empty parentheses", () => { + expect(tolerant("()")).toMatchSnapshot(); + }); + + it("unclosed parenthesis", () => { + expect(tolerant("(a = 1")).toMatchSnapshot(); + }); + + it("missing value after comparator (insertion recovery)", () => { + expect(tolerant("a = AND b = 1")).toMatchSnapshot(); + }); + + it("missing arg after comma (insertion recovery)", () => { + expect(tolerant("fn(a,)")).toMatchSnapshot(); + }); + + it("unmatched closing paren (insertion recovery)", () => { + expect(tolerant("a = ) b = 1")).toMatchSnapshot(); + }); + + it("quoted function name", () => { + expect(tolerant('"fn"()')).toMatchSnapshot(); + }); + + it("unclosed function call", () => { + expect(tolerant("fn(a, b")).toMatchSnapshot(); + }); + + it("NOT without expression", () => { + expect(tolerant("NOT AND a")).toMatchSnapshot(); + }); + + it("negative field path", () => { + expect(tolerant("a = -b.c")).toMatchSnapshot(); + }); + + it("depth limit exceeded", () => { + expect(tolerant("((a))", { maxDepth: 1 })).toMatchSnapshot(); + }); + + it("input length exceeded", () => { + expect(tolerant("a".repeat(100), { maxLength: 10 })).toMatchSnapshot(); + }); + + it("maxErrors stops recovery", () => { + expect(tolerant("() () () () ()", { maxErrors: 2 })).toMatchSnapshot(); + }); + + it("multiple errors", () => { + expect(tolerant("() AND ()")).toMatchSnapshot(); + }); + + it("lexer + parser errors combined", () => { + expect(tolerant('a = "unterminated AND b AND AND c')).toMatchSnapshot(); + }); +}); + +describe("snapshot: toCleanTree", () => { + it("clean tolerant CST narrows to strict FilterNode", () => { + const result = tolerant("a = 1 AND b = 2"); + const clean = toCleanTree(result); + expect(clean).toMatchSnapshot(); + }); + + it("dirty tolerant CST returns null", () => { + const result = tolerant("a AND AND b"); + expect(toCleanTree(result)).toMatchSnapshot(); + }); + + it("clean tree passes through transform", () => { + const clean = toCleanTree(tolerant("power >= 5 AND name:active")); + expect(transform(clean!)).toMatchSnapshot(); + }); +}); + describe("snapshot: TokenKind enum", () => { it("all token kinds", () => { const kinds: Record = {};