From 4164eb73cd15bf7c6b23bda7b9763f80adbcd26d Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Jun 2026 14:31:51 +0200 Subject: [PATCH 1/7] fix: enforce all where conditions when index optimization is partial OR expressions now require every disjunct to be index-optimizable; otherwise the query falls back to a full scan, since rows matched only by a non-optimizable disjunct cannot be recovered from index lookups. AND expressions keep partial index optimization but the optimizer now reports whether the matching keys are exact. When they are a superset (some conjuncts could not use an index, or a compound range was combined with other conditions), currentStateAsChanges re-checks each candidate row against the full where expression. Co-Authored-By: Claude Fable 5 --- .../index-optimization-partial-and-or.md | 9 ++ packages/db/src/collection/change-events.ts | 9 +- packages/db/src/utils/index-optimization.ts | 128 +++++++++++++----- 3 files changed, 108 insertions(+), 38 deletions(-) create mode 100644 .changeset/index-optimization-partial-and-or.md diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md new file mode 100644 index 000000000..93296a50d --- /dev/null +++ b/.changeset/index-optimization-partial-and-or.md @@ -0,0 +1,9 @@ +--- +'@tanstack/db': patch +--- + +Fix incorrect results from index-optimized `where` clauses that combine indexed and non-indexed conditions. + +- `OR` expressions are now only served from indexes when every disjunct can use an index; otherwise the query falls back to a full scan. Previously, rows matched only by a non-indexed disjunct were missing from the result. +- `AND` expressions still use indexes for the conditions that have them, but the remaining conditions are now enforced by re-checking each candidate row against the full expression. Previously, non-indexed conditions were silently dropped, returning rows that did not match the query. +- Compound range conditions (e.g. `age > 5 AND age < 10`) combined with conditions on other fields no longer ignore those other conditions. diff --git a/packages/db/src/collection/change-events.ts b/packages/db/src/collection/change-events.ts index 6afac412c..3f4977b7b 100644 --- a/packages/db/src/collection/change-events.ts +++ b/packages/db/src/collection/change-events.ts @@ -138,11 +138,16 @@ export function currentStateAsChanges< ) if (optimizationResult.canOptimize) { - // Use index optimization + // Use index optimization. When the index lookup is inexact, the keys + // are a superset of the true result (some conditions could not be + // served by an index), so re-check each row against the full expression. + const filterFn = optimizationResult.isExact + ? undefined + : createFilterFunctionFromExpression(expression) const result: Array, TKey>> = [] for (const key of optimizationResult.matchingKeys) { const value = collection.get(key) - if (value !== undefined) { + if (value !== undefined && (filterFn?.(value) ?? true)) { result.push({ type: `insert`, key, diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 81b111af5..7bb98ad47 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -29,6 +29,13 @@ import type { CollectionLike } from '../types.js' export interface OptimizationResult { canOptimize: boolean matchingKeys: Set + /** + * Whether `matchingKeys` is exactly the set of keys matching the expression. + * When `false`, the keys are a superset of the true result (some conditions + * could not be served by an index) and each row must be re-checked against + * the full expression before being included in the result. + */ + isExact: boolean } /** @@ -134,7 +141,7 @@ function optimizeQueryRecursive( } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -167,6 +174,14 @@ export function canOptimizeExpression< return false } +/** + * Result of compound range optimization, including which AND arguments + * were covered by the range query so the caller can process the rest. + */ +interface CompoundRangeResult extends OptimizationResult { + coveredArgIndices: Set +} + /** * Optimizes compound range queries on the same field * Example: WHERE age > 5 AND age < 10 @@ -177,9 +192,14 @@ function optimizeCompoundRangeQuery< >( expression: BasicExpression, collection: CollectionLike, -): OptimizationResult { +): CompoundRangeResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { + canOptimize: false, + matchingKeys: new Set(), + isExact: false, + coveredArgIndices: new Set(), + } } // Group range operations by field @@ -188,11 +208,12 @@ function optimizeCompoundRangeQuery< Array<{ operation: `gt` | `gte` | `lt` | `lte` value: any + argIndex: number }> >() // Collect all range operations from AND arguments - for (const arg of expression.args) { + for (const [argIndex, arg] of expression.args.entries()) { if (arg.type === `func` && [`gt`, `gte`, `lt`, `lte`].includes(arg.name)) { const rangeOp = arg as any if (rangeOp.args.length === 2) { @@ -238,7 +259,7 @@ function optimizeCompoundRangeQuery< if (!fieldOperations.has(fieldKey)) { fieldOperations.set(fieldKey, []) } - fieldOperations.get(fieldKey)!.push({ operation, value }) + fieldOperations.get(fieldKey)!.push({ operation, value, argIndex }) } } } @@ -293,12 +314,22 @@ function optimizeCompoundRangeQuery< toInclusive, }) - return { canOptimize: true, matchingKeys } + return { + canOptimize: true, + matchingKeys, + isExact: true, + coveredArgIndices: new Set(operations.map((op) => op.argIndex)), + } } } } - return { canOptimize: false, matchingKeys: new Set() } + return { + canOptimize: false, + matchingKeys: new Set(), + isExact: false, + coveredArgIndices: new Set(), + } } /** @@ -312,7 +343,7 @@ function optimizeSimpleComparison< collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length !== 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const leftArg = expression.args[0]! @@ -362,15 +393,15 @@ function optimizeSimpleComparison< // Check if the index supports this operation if (!index.supports(indexOperation)) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const matchingKeys = index.lookup(indexOperation, queryValue) - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -412,22 +443,38 @@ function optimizeAndExpression( collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } // First, try to optimize compound range queries on the same field + // (e.g. age > 5 AND age < 10 becomes a single range query) const compoundRangeResult = optimizeCompoundRangeQuery(expression, collection) - if (compoundRangeResult.canOptimize) { - return compoundRangeResult - } + const coveredArgIndices = compoundRangeResult.canOptimize + ? compoundRangeResult.coveredArgIndices + : new Set() const results: Array> = [] + if (compoundRangeResult.canOptimize) { + results.push(compoundRangeResult) + } - // Try to optimize each part, keep the optimizable ones - for (const arg of expression.args) { + // Try to optimize the remaining conjuncts, keep the optimizable ones. + // Conjuncts that cannot use an index make the result inexact: the + // intersection is then a superset of the true result and must be + // re-filtered against the full expression by the caller. + let allConjunctsExact = true + for (const [argIndex, arg] of expression.args.entries()) { + if (coveredArgIndices.has(argIndex)) { + continue + } const result = optimizeQueryRecursive(arg, collection) if (result.canOptimize) { results.push(result) + if (!result.isExact) { + allConjunctsExact = false + } + } else { + allConjunctsExact = false } } @@ -435,10 +482,14 @@ function optimizeAndExpression( // Use intersectSets utility for AND logic const allMatchingSets = results.map((r) => r.matchingKeys) const intersectedKeys = intersectSets(allMatchingSets) - return { canOptimize: true, matchingKeys: intersectedKeys } + return { + canOptimize: true, + matchingKeys: intersectedKeys, + isExact: allConjunctsExact, + } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -464,27 +515,31 @@ function optimizeOrExpression( collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const results: Array> = [] - // Try to optimize each part, keep the optimizable ones + // Every disjunct must be optimizable: rows matched only by a disjunct + // that cannot use an index would be missing from the union, and no + // post-filtering can recover them. In that case fall back to a full scan. for (const arg of expression.args) { const result = optimizeQueryRecursive(arg, collection) - if (result.canOptimize) { - results.push(result) + if (!result.canOptimize) { + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } + results.push(result) } - if (results.length > 0) { - // Use unionSets utility for OR logic - const allMatchingSets = results.map((r) => r.matchingKeys) - const unionedKeys = unionSets(allMatchingSets) - return { canOptimize: true, matchingKeys: unionedKeys } + // Use unionSets utility for OR logic + const allMatchingSets = results.map((r) => r.matchingKeys) + const unionedKeys = unionSets(allMatchingSets) + return { + canOptimize: true, + matchingKeys: unionedKeys, + // An inexact (superset) disjunct makes the union a superset as well + isExact: results.every((r) => r.isExact), } - - return { canOptimize: false, matchingKeys: new Set() } } /** @@ -498,8 +553,9 @@ function canOptimizeOrExpression< return false } - // If any argument can be optimized, we can gain some speedup - return expression.args.some((arg) => canOptimizeExpression(arg, collection)) + // Every disjunct must be optimizable, otherwise the union would miss + // rows matched only by the non-optimizable disjuncts + return expression.args.every((arg) => canOptimizeExpression(arg, collection)) } /** @@ -513,7 +569,7 @@ function optimizeInArrayExpression< collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length !== 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const fieldArg = expression.args[0]! @@ -532,7 +588,7 @@ function optimizeInArrayExpression< // Check if the index supports IN operation if (index.supports(`in`)) { const matchingKeys = index.lookup(`in`, values) - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } else if (index.supports(`eq`)) { // Fallback to multiple equality lookups const matchingKeys = new Set() @@ -542,12 +598,12 @@ function optimizeInArrayExpression< matchingKeys.add(key) } } - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** From f1e5bb06eb9a21f4685f690ac08cdb16bca08cdb Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Jun 2026 14:31:56 +0200 Subject: [PATCH 2/7] fix: apply strictest bound in compound range queries and fix related range edge cases Compound range conditions sharing a boundary value (gte(x,5) AND gt(x,5)) now keep the strict bound regardless of argument order. Bound values are compared with the same comparator the indexes use so dates and locale strings behave correctly. Two further issues surfaced by the regression tests: - One-sided compound ranges passed an explicit undefined bound to rangeQuery, which treats present-but-undefined as the undefined sentinel and returned an empty result. Bounds are now only passed when they exist. - BTreeIndex's exclusive lower bound check compared the normalized indexed value against the raw query value, so gt on date fields included the boundary row. It now compares against the normalized key. Co-Authored-By: Claude Fable 5 --- .../index-optimization-partial-and-or.md | 3 + packages/db/src/indexes/btree-index.ts | 5 +- packages/db/src/utils/index-optimization.ts | 58 ++++++++++++------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md index 93296a50d..469975ad8 100644 --- a/.changeset/index-optimization-partial-and-or.md +++ b/.changeset/index-optimization-partial-and-or.md @@ -7,3 +7,6 @@ Fix incorrect results from index-optimized `where` clauses that combine indexed - `OR` expressions are now only served from indexes when every disjunct can use an index; otherwise the query falls back to a full scan. Previously, rows matched only by a non-indexed disjunct were missing from the result. - `AND` expressions still use indexes for the conditions that have them, but the remaining conditions are now enforced by re-checking each candidate row against the full expression. Previously, non-indexed conditions were silently dropped, returning rows that did not match the query. - Compound range conditions (e.g. `age > 5 AND age < 10`) combined with conditions on other fields no longer ignore those other conditions. +- Compound range conditions sharing the same boundary value (e.g. `age >= 5 AND age > 5`) now apply the strictest bound regardless of the order the conditions appear in, using the same value comparison semantics as the indexes (dates, locale strings, ...). +- Compound range conditions that only bound one side (e.g. `age > 5 AND age >= 8`) no longer return an empty result. +- Strict range comparisons (`gt`/`lt`) on BTree-indexed fields holding normalized values such as dates now correctly exclude the boundary value. diff --git a/packages/db/src/indexes/btree-index.ts b/packages/db/src/indexes/btree-index.ts index 17608950a..1d04d928d 100644 --- a/packages/db/src/indexes/btree-index.ts +++ b/packages/db/src/indexes/btree-index.ts @@ -247,7 +247,10 @@ export class BTreeIndex< toKey, toInclusive, (indexedValue, _) => { - if (!fromInclusive && this.compareFn(indexedValue, from) === 0) { + // Compare against the normalized key: indexed values are stored + // normalized (e.g. dates as timestamps), the raw `from` would + // never compare equal to them + if (!fromInclusive && this.compareFn(indexedValue, fromKey) === 0) { // the B+ tree `forRange` method does not support exclusive lower bounds // so we need to exclude it manually return diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 7bb98ad47..7d88ddc1e 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -18,6 +18,7 @@ import { DEFAULT_COMPARE_OPTIONS } from '../utils.js' import { ReverseIndex } from '../indexes/reverse-index.js' import { hasVirtualPropPath } from '../virtual-props.js' +import { makeComparator } from './comparison.js' import type { CompareOptions } from '../query/builder/types.js' import type { IndexInterface, IndexOperation } from '../indexes/base-index.js' import type { BasicExpression } from '../query/ir.js' @@ -272,7 +273,18 @@ function optimizeCompoundRangeQuery< const index = findIndexForField(collection, fieldPath) if (index && index.supports(`gt`) && index.supports(`lt`)) { - // Build range query options + // Compare values with the same semantics the index uses (dates, + // locale strings, ...), in ascending order since bounds are about + // value order regardless of the index direction + const compare = makeComparator({ + ...DEFAULT_COMPARE_OPTIONS, + ...collection.compareOptions, + direction: `asc`, + }) + + // Build range query options, keeping the strictest bound on each + // side: a larger lower bound (or smaller upper bound) wins, and at + // equal values the exclusive operation wins over the inclusive one let from: any = undefined let to: any = undefined let fromInclusive = true @@ -281,38 +293,42 @@ function optimizeCompoundRangeQuery< for (const { operation, value } of operations) { switch (operation) { case `gt`: - if (from === undefined || value > from) { + case `gte`: { + const cmp = from === undefined ? 1 : compare(value, from) + if (cmp > 0) { from = value + fromInclusive = operation === `gte` + } else if (cmp === 0 && operation === `gt`) { fromInclusive = false } break - case `gte`: - if (from === undefined || value > from) { - from = value - fromInclusive = true - } - break + } case `lt`: - if (to === undefined || value < to) { + case `lte`: { + const cmp = to === undefined ? -1 : compare(value, to) + if (cmp < 0) { to = value + toInclusive = operation === `lte` + } else if (cmp === 0 && operation === `lt`) { toInclusive = false } break - case `lte`: - if (to === undefined || value < to) { - to = value - toInclusive = true - } - break + } } } - const matchingKeys = (index as any).rangeQuery({ - from, - to, - fromInclusive, - toInclusive, - }) + // Only pass the bounds that exist: rangeQuery distinguishes an + // absent bound (open-ended) from an explicit undefined value + const rangeOptions: Record = {} + if (from !== undefined) { + rangeOptions.from = from + rangeOptions.fromInclusive = fromInclusive + } + if (to !== undefined) { + rangeOptions.to = to + rangeOptions.toInclusive = toInclusive + } + const matchingKeys = (index as any).rangeQuery(rangeOptions) return { canOptimize: true, From 3df6b01990e0e2a6647d88c1c700beaacf97e177 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Thu, 18 Jun 2026 10:05:24 +0200 Subject: [PATCH 3/7] test: add failing test for exclusive lower bound without a from bound rangeQuery with only an upper bound but fromInclusive: false must not drop the minimum key, as there is no lower bound to exclude against. This regression was introduced when the exclusive lower-bound check started comparing against the normalized fromKey (which defaults to minKey when no from bound is given). This test currently fails. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tests/btree-index-undefined-values.test.ts | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/packages/db/tests/btree-index-undefined-values.test.ts b/packages/db/tests/btree-index-undefined-values.test.ts index 11e29690c..1510c02e3 100644 --- a/packages/db/tests/btree-index-undefined-values.test.ts +++ b/packages/db/tests/btree-index-undefined-values.test.ts @@ -248,6 +248,24 @@ describe(`BTreeIndex - undefined value handling`, () => { expect(withoutFrom.size).toBe(3) }) + it(`should not drop the minimum key when an upper-only range is exclusive on the (absent) lower bound`, () => { + // When no `from` bound is provided, `fromInclusive` must not cause the + // smallest key to be excluded: there is no lower bound to exclude + // against. Only an explicitly provided exclusive lower bound should + // drop its boundary value. + const index = createIndex(`value`) + index.add(`a`, { value: 1 }) + index.add(`b`, { value: 5 }) + index.add(`c`, { value: 10 }) + + const result = index.rangeQuery({ to: 10, fromInclusive: false }) + + expect(result.size).toBe(3) + expect(result).toContain(`a`) + expect(result).toContain(`b`) + expect(result).toContain(`c`) + }) + it(`should handle range query from undefined to undefined`, () => { const index = createIndex(`value`) index.add(`a`, { value: undefined }) From 25e0896c52e8477ebb62f04394d4746aec532e73 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Thu, 18 Jun 2026 10:12:54 +0200 Subject: [PATCH 4/7] fix: re-filter compound range queries that use a null/undefined bound A comparison against null/undefined is never true, but in an index those values sort as the smallest key, so an index range query cannot represent such a bound. Compound range optimization now tracks selected bounds with explicit hasFromBound/hasToBound flags (separate from the bound values) and marks the result inexact when any bound value is null/undefined, so the caller re-filters against the full expression. The inexactness now also propagates through the AND combiner, which previously ignored the compound range's exactness. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../index-optimization-partial-and-or.md | 1 + packages/db/src/utils/index-optimization.ts | 40 ++++++++++++++----- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md index 469975ad8..38ee6ab60 100644 --- a/.changeset/index-optimization-partial-and-or.md +++ b/.changeset/index-optimization-partial-and-or.md @@ -10,3 +10,4 @@ Fix incorrect results from index-optimized `where` clauses that combine indexed - Compound range conditions sharing the same boundary value (e.g. `age >= 5 AND age > 5`) now apply the strictest bound regardless of the order the conditions appear in, using the same value comparison semantics as the indexes (dates, locale strings, ...). - Compound range conditions that only bound one side (e.g. `age > 5 AND age >= 8`) no longer return an empty result. - Strict range comparisons (`gt`/`lt`) on BTree-indexed fields holding normalized values such as dates now correctly exclude the boundary value. +- Compound range conditions with a `null`/`undefined` bound (e.g. `gt(score, undefined)`) now re-filter against the full expression instead of returning index-ordered rows, matching the semantics of a full scan (a comparison against `null`/`undefined` is never true). diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 7d88ddc1e..a5ebaddac 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -284,19 +284,33 @@ function optimizeCompoundRangeQuery< // Build range query options, keeping the strictest bound on each // side: a larger lower bound (or smaller upper bound) wins, and at - // equal values the exclusive operation wins over the inclusive one + // equal values the exclusive operation wins over the inclusive one. + // `hasFromBound`/`hasToBound` track whether a bound was selected, + // separately from the bound value (which may legitimately be falsy). let from: any = undefined let to: any = undefined + let hasFromBound = false + let hasToBound = false let fromInclusive = true let toInclusive = true + // A comparison against null/undefined is never true, but in an index + // those values sort as the smallest key, so a range query cannot + // represent such a bound. Track it and force a re-filter instead of + // claiming the result is exact. + let hasNullBound = false for (const { operation, value } of operations) { + if (value == null) { + hasNullBound = true + continue + } switch (operation) { case `gt`: case `gte`: { - const cmp = from === undefined ? 1 : compare(value, from) + const cmp = hasFromBound ? compare(value, from) : 1 if (cmp > 0) { from = value + hasFromBound = true fromInclusive = operation === `gte` } else if (cmp === 0 && operation === `gt`) { fromInclusive = false @@ -305,9 +319,10 @@ function optimizeCompoundRangeQuery< } case `lt`: case `lte`: { - const cmp = to === undefined ? -1 : compare(value, to) + const cmp = hasToBound ? compare(value, to) : -1 if (cmp < 0) { to = value + hasToBound = true toInclusive = operation === `lte` } else if (cmp === 0 && operation === `lt`) { toInclusive = false @@ -317,14 +332,14 @@ function optimizeCompoundRangeQuery< } } - // Only pass the bounds that exist: rangeQuery distinguishes an - // absent bound (open-ended) from an explicit undefined value + // Only pass the bounds that were selected: rangeQuery distinguishes + // an absent bound (open-ended) from an explicitly provided one const rangeOptions: Record = {} - if (from !== undefined) { + if (hasFromBound) { rangeOptions.from = from rangeOptions.fromInclusive = fromInclusive } - if (to !== undefined) { + if (hasToBound) { rangeOptions.to = to rangeOptions.toInclusive = toInclusive } @@ -333,7 +348,9 @@ function optimizeCompoundRangeQuery< return { canOptimize: true, matchingKeys, - isExact: true, + // If a null/undefined bound was present, the range query result is + // a superset of the real matches and must be re-filtered + isExact: !hasNullBound, coveredArgIndices: new Set(operations.map((op) => op.argIndex)), } } @@ -477,8 +494,11 @@ function optimizeAndExpression( // Try to optimize the remaining conjuncts, keep the optimizable ones. // Conjuncts that cannot use an index make the result inexact: the // intersection is then a superset of the true result and must be - // re-filtered against the full expression by the caller. - let allConjunctsExact = true + // re-filtered against the full expression by the caller. The compound + // range result may itself be inexact (e.g. a null/undefined bound). + let allConjunctsExact = !compoundRangeResult.canOptimize + ? true + : compoundRangeResult.isExact for (const [argIndex, arg] of expression.args.entries()) { if (coveredArgIndices.has(argIndex)) { continue From 19dd5cfdbadebcccb9809a4dfcbf74460ff57e3f Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Thu, 18 Jun 2026 10:13:04 +0200 Subject: [PATCH 5/7] fix: only exclude exclusive lower bound when a from bound is provided BTreeIndex.rangeQuery dropped the minimum key when called with fromInclusive: false but no from bound, because fromKey defaults to the minimum key and the exclusion check did not verify a lower bound was actually given. The exclusion is now guarded by hasFrom. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/db/src/indexes/btree-index.ts | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/packages/db/src/indexes/btree-index.ts b/packages/db/src/indexes/btree-index.ts index 1d04d928d..b46c70710 100644 --- a/packages/db/src/indexes/btree-index.ts +++ b/packages/db/src/indexes/btree-index.ts @@ -247,10 +247,16 @@ export class BTreeIndex< toKey, toInclusive, (indexedValue, _) => { - // Compare against the normalized key: indexed values are stored - // normalized (e.g. dates as timestamps), the raw `from` would - // never compare equal to them - if (!fromInclusive && this.compareFn(indexedValue, fromKey) === 0) { + // Only exclude the boundary when an exclusive lower bound was + // actually provided. Without a `from` bound, `fromKey` defaults to + // the minimum key and must not be dropped. Compare against the + // normalized key since indexed values are stored normalized + // (e.g. dates as timestamps), so the raw `from` would never match. + if ( + hasFrom && + !fromInclusive && + this.compareFn(indexedValue, fromKey) === 0 + ) { // the B+ tree `forRange` method does not support exclusive lower bounds // so we need to exclude it manually return From bbec9cf64cc700e41ffbb9670e3d9e9b01574957 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Mon, 22 Jun 2026 10:13:40 +0200 Subject: [PATCH 6/7] fix: re-filter index results that can include nullish-keyed rows BTree indexes store and return rows with a null/undefined indexed value (they sort as the smallest key), but a comparison against null/undefined is never true. The simple-comparison, IN, and compound-range optimizers now report such results as inexact so the caller re-checks candidates against the full expression: - eq/gt/gte: inexact when the query value is nullish (gt/gte with a non-null bound stay exact, since the bound excludes the bottom-sorted nullish rows) - lt/lte: conservatively inexact, as the open lower bound includes nullish-keyed rows - IN: inexact when any listed value is nullish - compound range: exact only when a non-null lower bound is present to exclude the nullish rows Co-Authored-By: Claude Opus 4.8 (1M context) --- .../index-optimization-partial-and-or.md | 1 + packages/db/src/utils/index-optimization.ts | 38 ++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md index 38ee6ab60..a67048c9d 100644 --- a/.changeset/index-optimization-partial-and-or.md +++ b/.changeset/index-optimization-partial-and-or.md @@ -11,3 +11,4 @@ Fix incorrect results from index-optimized `where` clauses that combine indexed - Compound range conditions that only bound one side (e.g. `age > 5 AND age >= 8`) no longer return an empty result. - Strict range comparisons (`gt`/`lt`) on BTree-indexed fields holding normalized values such as dates now correctly exclude the boundary value. - Compound range conditions with a `null`/`undefined` bound (e.g. `gt(score, undefined)`) now re-filter against the full expression instead of returning index-ordered rows, matching the semantics of a full scan (a comparison against `null`/`undefined` is never true). +- Index-optimized `eq`, `IN`, and range queries on a field that has rows with `null`/`undefined` values no longer leak those rows into results. BTree indexes store and return such rows (they sort as the smallest key), but a comparison against `null`/`undefined` is never true, so these results are now re-filtered against the full expression to stay equivalent to a full scan. diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index a5ebaddac..9f8698331 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -348,9 +348,13 @@ function optimizeCompoundRangeQuery< return { canOptimize: true, matchingKeys, - // If a null/undefined bound was present, the range query result is - // a superset of the real matches and must be re-filtered - isExact: !hasNullBound, + // The range result is exact only when it cannot include rows with a + // nullish indexed value (which a comparison would reject but the + // index returns, as they sort as the smallest key). That requires a + // non-nullish lower bound to exclude them: without `hasFromBound` + // the range is open at the bottom and captures those rows, and a + // nullish bound value (`hasNullBound`) can never bound them out. + isExact: hasFromBound && !hasNullBound, coveredArgIndices: new Set(operations.map((op) => op.argIndex)), } } @@ -430,7 +434,23 @@ function optimizeSimpleComparison< } const matchingKeys = index.lookup(indexOperation, queryValue) - return { canOptimize: true, matchingKeys, isExact: true } + + // A comparison against null/undefined is never true, but BTree indexes + // store and return rows with a nullish indexed value (they sort as the + // smallest key). Determine whether the index result is exact or a + // superset that the caller must re-filter: + // - eq/gt/gte: a nullish query value matches nothing, while the index + // would still return nullish-keyed rows -> inexact when nullish. A + // non-nullish lower bound (gt/gte) excludes the bottom-sorted nullish + // rows, so those stay exact. + // - lt/lte: the open lower bound always includes nullish-keyed rows, + // so the result is conservatively inexact. + const isExact = + operation === `lt` || operation === `lte` + ? false + : queryValue != null + + return { canOptimize: true, matchingKeys, isExact } } } @@ -620,11 +640,17 @@ function optimizeInArrayExpression< const values = (arrayArg as any).value const index = findIndexForField(collection, fieldPath) + // A nullish member can never be matched by `IN` (a comparison against + // null/undefined is never true), but the index would still return rows + // with a nullish indexed value. When the list contains a nullish member + // the result is a superset that the caller must re-filter. + const isExact = !values.some((value: any) => value == null) + if (index) { // Check if the index supports IN operation if (index.supports(`in`)) { const matchingKeys = index.lookup(`in`, values) - return { canOptimize: true, matchingKeys, isExact: true } + return { canOptimize: true, matchingKeys, isExact } } else if (index.supports(`eq`)) { // Fallback to multiple equality lookups const matchingKeys = new Set() @@ -634,7 +660,7 @@ function optimizeInArrayExpression< matchingKeys.add(key) } } - return { canOptimize: true, matchingKeys, isExact: true } + return { canOptimize: true, matchingKeys, isExact } } } } From f2095a0c0765d4c3d9afe82a270184b992e80c02 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 08:14:50 +0000 Subject: [PATCH 7/7] ci: apply automated fixes --- packages/db/src/utils/index-optimization.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 9f8698331..103cd6355 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -446,9 +446,7 @@ function optimizeSimpleComparison< // - lt/lte: the open lower bound always includes nullish-keyed rows, // so the result is conservatively inexact. const isExact = - operation === `lt` || operation === `lte` - ? false - : queryValue != null + operation === `lt` || operation === `lte` ? false : queryValue != null return { canOptimize: true, matchingKeys, isExact } }