From 20b5e419e93397d2ff5ac08fe5476e6e7a8e40d9 Mon Sep 17 00:00:00 2001 From: oddkit-150 build agent Date: Tue, 28 Apr 2026 14:07:00 +0000 Subject: [PATCH 1/8] =?UTF-8?q?feat(search,preflight):=20result=5Fgrouping?= =?UTF-8?q?=20parameter=20=E2=80=94=20overlay-first=20ranking=20when=20kno?= =?UTF-8?q?wledge=5Fbase=5Furl=20set=20(#150)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds result_grouping parameter to search and preflight actions: - 'merged' (default when KB unset): pure BM25 score order (no behavior change) - 'overlay_first' (default when KB set): overlay docs ranked above baseline - 'grouped': separate overlay_hits/baseline_hits arrays in response Changes: - orchestrate.ts: ResultGrouping type, partitionBySource utility, runSearch and runPreflight accept resolvedGrouping, conditional default in dispatcher - index.ts: result_grouping in unified oddkit + oddkit_search + oddkit_preflight tool schemas, threaded through handler args - telemetry.ts: parseToolCall extracts result_grouping, blob9 repurposed - CHANGELOG.md: entry under [Unreleased] - workers/test/result-grouping.test.mjs: 24 tests covering partition logic, conditional defaults, grouped shape, preflight partition, telemetry recording - workers/test/telemetry-integration.test.mjs: updated blob count assertion No version bump (orchestrator decides packaging cadence). No changes to workers/src/bm25.ts. No cache-key change. --- CHANGELOG.md | 4 + workers/src/index.ts | 5 + workers/src/orchestrate.ts | 118 ++++- workers/src/telemetry.ts | 21 +- workers/test/result-grouping.test.mjs | 533 ++++++++++++++++++++ workers/test/telemetry-integration.test.mjs | 6 +- 6 files changed, 651 insertions(+), 36 deletions(-) create mode 100644 workers/test/result-grouping.test.mjs diff --git a/CHANGELOG.md b/CHANGELOG.md index 60a424f..75a7faf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **`result_grouping` parameter for search and preflight** — when `knowledge_base_url` is set, overlay (knowledge-base) docs are ranked above baseline docs by default (`"overlay_first"`). Callers can explicitly choose `"merged"` (pure BM25 score order, the previous default), `"overlay_first"` (overlay before baseline, preserving score order within each partition), or `"grouped"` (separate `overlay_hits`/`baseline_hits` arrays in search, `start_here_overlay`/`start_here_baseline` in preflight). Conditional default: `knowledge_base_url` unset → `"merged"` (no behavior change); `knowledge_base_url` set → `"overlay_first"`. Telemetry records the caller-specified value in blob9 (`result_grouping`). Closes #150. + ## [0.26.0] - 2026-04-26 ### Added diff --git a/workers/src/index.ts b/workers/src/index.ts index 079e139..15150b9 100644 --- a/workers/src/index.ts +++ b/workers/src/index.ts @@ -220,6 +220,7 @@ Use when: "canon-tier-2", "canon-tier-1", "published-essay", ]).optional().describe("Optional mode hint. Epistemic modes (exploration/planning/execution) or writing-lifecycle modes (voice-dump/drafting/peer-review-ready/canon-tier-2/canon-tier-1/published-essay). Sourced from odd/challenge/stakes-calibration."), knowledge_base_url: z.string().optional().describe("Optional GitHub repo URL for your knowledge base. When set, strict mode is automatic: missing files fall through to the bundled governance tier rather than silently substituting from the default knowledge base."), + result_grouping: z.enum(["merged", "overlay_first", "grouped"]).optional().describe("For action='search' or 'preflight': controls how overlay (knowledge_base) and baseline results are ordered. 'merged' = pure score order (default when knowledge_base_url unset). 'overlay_first' = overlay docs ranked above baseline (default when knowledge_base_url set). 'grouped' = separate overlay_hits/baseline_hits arrays in response."), include_metadata: z.boolean().optional().describe("When true, search/get responses include a metadata object with full parsed frontmatter. Default: false."), section: z.string().optional().describe("For action='get': extract only the named ## section from the document. Returns section content or available sections if not found."), sort_by: z.enum(["date", "path"]).optional().describe("For action='catalog': sort articles. 'date' returns newest first (requires frontmatter). 'path' returns all docs alphabetically, including undated."), @@ -241,6 +242,7 @@ Use when: context: args.context, mode: args.mode, knowledge_base_url: args.knowledge_base_url, + result_grouping: args.result_grouping, include_metadata: args.include_metadata, section: args.section, sort_by: args.sort_by, @@ -321,6 +323,7 @@ Use when: schema: { input: z.string().describe("Natural language query or tags to search for."), knowledge_base_url: z.string().optional().describe("Optional: GitHub repo URL for your knowledge base. When set, strict mode is automatic: missing files fall through to the bundled governance tier."), + result_grouping: z.enum(["merged", "overlay_first", "grouped"]).optional().describe("Controls how overlay (knowledge_base) and baseline results are ordered. 'merged' = pure score order (default when knowledge_base_url unset). 'overlay_first' = overlay docs ranked above baseline (default when knowledge_base_url set). 'grouped' = separate overlay_hits/baseline_hits arrays in response."), include_metadata: z.boolean().optional().describe("When true, each hit includes a metadata object with full parsed frontmatter. Default: false."), }, annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true }, @@ -386,6 +389,7 @@ Use when: schema: { input: z.string().describe("Description of what you're about to implement."), knowledge_base_url: z.string().optional().describe("Optional: GitHub repo URL for your knowledge base. When set, strict mode is automatic: missing files fall through to the bundled governance tier."), + result_grouping: z.enum(["merged", "overlay_first", "grouped"]).optional().describe("Controls how overlay (knowledge_base) and baseline start_here results are ordered. 'merged' = pure score order (default when knowledge_base_url unset). 'overlay_first' = overlay docs ranked above baseline (default when knowledge_base_url set). 'grouped' = separate start_here_overlay/start_here_baseline arrays."), }, annotations: { readOnlyHint: true, destructiveHint: false, idempotentHint: true, openWorldHint: true }, }, @@ -432,6 +436,7 @@ Use when: context: args.context as string | undefined, mode: args.mode as string | undefined, knowledge_base_url: args.knowledge_base_url as string | undefined, + result_grouping: args.result_grouping as "merged" | "overlay_first" | "grouped" | undefined, include_metadata: args.include_metadata as boolean | undefined, section: args.section as string | undefined, sort_by: args.sort_by as string | undefined, diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index c99827f..f7c00ed 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -225,12 +225,15 @@ let cachedGatePrerequisites: GatePrerequisite[] | null = null; let cachedGatePrerequisitesKnowledgeBaseUrl: string | undefined = undefined; let cachedGatePrerequisitesSource: "knowledge_base" | "minimal" = "minimal"; +export type ResultGrouping = "merged" | "overlay_first" | "grouped"; + export interface UnifiedParams { action: string; input: string; context?: string; mode?: string; knowledge_base_url?: string; + result_grouping?: ResultGrouping; include_metadata?: boolean; section?: string; sort_by?: string; @@ -1320,6 +1323,20 @@ function scoreEntries(entries: IndexEntry[], query: string): Array b.score - a.score); } +// ────────────────────────────────────────────────────────────────────────────── +// Result grouping — stable partition by source (overlay=canon vs baseline) +// Preserves BM25 score order within each partition. Single forward pass — no re-sort. +// ────────────────────────────────────────────────────────────────────────────── + +export function partitionBySource( + arr: T[], +): { overlay: T[]; baseline: T[] } { + const overlay: T[] = []; + const baseline: T[] = []; + for (const h of arr) (h.source === "canon" ? overlay : baseline).push(h); + return { overlay, baseline }; +} + // ────────────────────────────────────────────────────────────────────────────── // Individual action handlers // ────────────────────────────────────────────────────────────────────────────── @@ -1330,6 +1347,7 @@ async function runSearch( knowledgeBaseUrl?: string, state?: OddkitState, includeMetadata?: boolean, + resolvedGrouping: ResultGrouping = "merged", ): Promise { const startMs = Date.now(); const index = await fetcher.getIndex(knowledgeBaseUrl); @@ -1373,12 +1391,22 @@ async function runSearch( }; } + // Apply result_grouping partition (overlay_first / grouped re-order; + // merged preserves BM25 score order). Single forward pass — no re-sort. + let orderedHits = hits; + let isGrouped = false; + if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { + const { overlay, baseline } = partitionBySource(hits); + orderedHits = [...overlay, ...baseline]; + isGrouped = resolvedGrouping === "grouped"; + } + // Cache for fetched content to avoid redundant fetches when include_metadata is enabled const contentCache = new Map(); - // Fetch excerpts for top results + // Fetch excerpts for top results (uses partitioned order) const evidence: Array<{ quote: string; citation: string; source: string }> = []; - for (const entry of hits.slice(0, 3)) { + for (const entry of orderedHits.slice(0, 3)) { const content = await fetcher.getFile(entry.path, knowledgeBaseUrl); if (content) { contentCache.set(entry.path, content); @@ -1394,17 +1422,18 @@ async function runSearch( } const assistantLines = [ - `Found ${hits.length} result(s) for: "${input}"`, + `Found ${orderedHits.length} result(s) for: "${input}"`, "", ...evidence.map((e) => `> ${e.quote}\n— ${e.citation} (${e.source})`), "", "Results:", - ...hits.map((r) => `- \`${r.path}\` — ${r.title} (score: ${r.score.toFixed(2)}, ${r.source})`), + ...orderedHits.map((r) => `- \`${r.path}\` — ${r.title} (score: ${r.score.toFixed(2)}, ${r.source})`), ]; - // When include_metadata is requested, fetch and parse frontmatter for each hit + // When include_metadata is requested, fetch and parse frontmatter for each hit. + // Iterates orderedHits so metadata-enriched array preserves the partitioned order. const hitsWithMetadata: Array> = []; - for (const h of hits) { + for (const h of orderedHits) { const hit: Record = { uri: h.uri, path: h.path, @@ -1425,14 +1454,27 @@ async function runSearch( hitsWithMetadata.push(hit); } + // Build result object — add overlay_hits / baseline_hits for "grouped" mode + const resultObj: Record = { + status: "FOUND", + hits: hitsWithMetadata, + evidence, + docs_considered: index.entries.length, + }; + + if (isGrouped) { + const overlayHits: Record[] = []; + const baselineHits: Record[] = []; + for (const h of hitsWithMetadata) { + (h.source === "canon" ? overlayHits : baselineHits).push(h); + } + resultObj.overlay_hits = overlayHits; + resultObj.baseline_hits = baselineHits; + } + return { action: "search", - result: { - status: "FOUND", - hits: hitsWithMetadata, - evidence, - docs_considered: index.entries.length, - }, + result: resultObj, state: updatedState, assistant_text: assistantLines.join("\n").trim(), debug: { @@ -2327,22 +2369,32 @@ async function runPreflight( fetcher: KnowledgeBaseFetcher, knowledgeBaseUrl?: string, state?: OddkitState, + resolvedGrouping: ResultGrouping = "merged", ): Promise { const startMs = Date.now(); const index = await fetcher.getIndex(knowledgeBaseUrl); const topic = message.replace(/^preflight:\s*/i, "").trim(); - const results = scoreEntries(index.entries, topic).slice(0, 5); + + // Score all entries, then apply partition before slicing + const allScored = scoreEntries(index.entries, topic); + let orderedScored = allScored; + if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { + const { overlay, baseline } = partitionBySource(allScored); + orderedScored = [...overlay, ...baseline]; + } + const results = orderedScored.slice(0, 5); const dodEntry = index.entries.find((e) => e.path.toLowerCase().includes("definition-of-done")); const constraints = index.entries .filter((e) => e.path.includes("constraint") || e.authority_band === "governing") .slice(0, 3); + const startHere = results.slice(0, 3); const assistantText = [ `Preflight: ${topic}`, ``, `Start here:`, - ...results.slice(0, 3).map((r) => `- \`${r.path}\` — ${r.title}`), + ...startHere.map((r) => `- \`${r.path}\` — ${r.title}`), ``, `Definition of Done:`, dodEntry ? `- \`${dodEntry.path}\`` : "- Check canon/definition-of-done.md", @@ -2358,15 +2410,25 @@ async function runPreflight( .join("\n") .trim(); + // Build result object + const resultObj: Record = { + topic, + start_here: startHere.map((r) => r.path), + dod: dodEntry?.path, + constraints: constraints.map((c) => c.path), + docs_available: index.stats.total, + }; + + // For "grouped" mode, split start_here into overlay and baseline arrays (each capped at 3) + if (resolvedGrouping === "grouped") { + const { overlay, baseline } = partitionBySource(allScored); + resultObj.start_here_overlay = overlay.slice(0, 3).map((r) => r.path); + resultObj.start_here_baseline = baseline.slice(0, 3).map((r) => r.path); + } + return { action: "preflight", - result: { - topic, - start_here: results.slice(0, 3).map((r) => r.path), - dod: dodEntry?.path, - constraints: constraints.map((c) => c.path), - docs_available: index.stats.total, - }, + result: resultObj, state: state ? initState(state) : undefined, assistant_text: assistantText, debug: { @@ -3251,7 +3313,13 @@ const VALID_ACTIONS = [ ] as const; export async function handleUnifiedAction(params: UnifiedParams): Promise { - const { action, input, context, mode, knowledge_base_url, include_metadata, section, sort_by, limit, offset, filter_epoch, state, env, tracer } = params; + const { action, input, context, mode, knowledge_base_url, result_grouping, include_metadata, section, sort_by, limit, offset, filter_epoch, state, env, tracer } = params; + + // Conditional default: when knowledge_base_url is set and caller didn't + // specify result_grouping, default to "overlay_first" (the fix for #150). + // When KB is unset, default to "merged" (no behavior change). + const resolvedGrouping: ResultGrouping = + result_grouping ?? (knowledge_base_url ? "overlay_first" : "merged"); if (!VALID_ACTIONS.includes(action as (typeof VALID_ACTIONS)[number])) { return { @@ -3283,7 +3351,7 @@ export async function handleUnifiedAction(params: UnifiedParams): Promise; @@ -206,6 +207,7 @@ export function parseToolCall(payload: unknown): { // Extract details from tool arguments let documentUri = ""; let knowledgeBaseUrl = ""; + let resultGrouping = ""; const args = p.arguments; if (typeof args === "object" && args !== null) { const a = args as Record; @@ -217,9 +219,13 @@ export function parseToolCall(payload: unknown): { if (typeof a.knowledge_base_url === "string" && a.knowledge_base_url) { knowledgeBaseUrl = a.knowledge_base_url; } + // Extract result_grouping from tool arguments (#150) + if (typeof a.result_grouping === "string" && a.result_grouping) { + resultGrouping = a.result_grouping; + } } - return { method, toolName, documentUri, knowledgeBaseUrl }; + return { method, toolName, documentUri, knowledgeBaseUrl, resultGrouping }; } // ────────────────────────────────────────────────────────────────────────────── @@ -297,9 +303,10 @@ export function recordTelemetry( toolCall?.knowledgeBaseUrl || env.DEFAULT_KNOWLEDGE_BASE_URL || "", documentUri, env.ODDKIT_VERSION || BUILD_VERSION, - // blob9 retired (was cache_tier). Slot stays free per the - // "no deprecation, nobody uses them yet" rule. Cache effectiveness - // moved to double7/double8. + // blob9: result_grouping (#150). Was retired (cache_tier). + // Repurposed for the caller-specified grouping value; empty string + // when not applicable (non-search/preflight actions). + toolCall?.resultGrouping ?? "", ], doubles: [ 1, // double1: count @@ -354,9 +361,7 @@ const BASELINE_BLOB_SEMANTIC_NAMES = [ "knowledge_base_url", // blob6 "document_uri", // blob7 "worker_version", // blob8 - // blob9 retired (was cache_tier). Slot stays free per the - // "no deprecation, nobody uses them yet" rule. Hit-rate moved to - // double7/double8. + "result_grouping", // blob9 — repurposed from retired cache_tier (#150) ] as const; /** diff --git a/workers/test/result-grouping.test.mjs b/workers/test/result-grouping.test.mjs new file mode 100644 index 0000000..507b950 --- /dev/null +++ b/workers/test/result-grouping.test.mjs @@ -0,0 +1,533 @@ +#!/usr/bin/env node +/** + * Unit + integration tests for the result_grouping feature (#150). + * + * Tests: + * - partitionBySource: stable partition, edge cases, ordering guarantees + * - Conditional default: KB set → overlay_first, KB unset → merged + * - Grouped shape construction: overlay_hits / baseline_hits arrays + * - Preflight partition: start_here_overlay / start_here_baseline + * - Telemetry: blob9 carries result_grouping value + * + * Compiles orchestrate.ts + telemetry.ts via tsc into a temp dir, then + * dynamic-imports the compiled .js. Same pattern as tokenize.test.mjs + * and telemetry-integration.test.mjs. + */ + +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import { mkdtempSync, writeFileSync, symlinkSync, existsSync, readdirSync, readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const WORKERS_ROOT = join(__dirname, ".."); + +// ─── Compile orchestrate.ts + telemetry.ts to temp dir ──────────────────── + +const tmp = mkdtempSync(join(tmpdir(), "oddkit-result-grouping-test-")); +const tsconfig = { + compilerOptions: { + target: "ES2022", + module: "ES2022", + moduleResolution: "bundler", + lib: ["ES2022", "DOM"], + types: ["@cloudflare/workers-types"], + noEmitOnError: false, + strict: false, + skipLibCheck: true, + resolveJsonModule: true, + allowSyntheticDefaultImports: true, + esModuleInterop: true, + rootDir: join(WORKERS_ROOT, "src"), + outDir: join(tmp, "build"), + }, + include: [ + join(WORKERS_ROOT, "src", "orchestrate.ts"), + join(WORKERS_ROOT, "src", "telemetry.ts"), + join(WORKERS_ROOT, "src", "tracing.ts"), + join(WORKERS_ROOT, "src", "zip-baseline-fetcher.ts"), + join(WORKERS_ROOT, "src", "bm25.ts"), + join(WORKERS_ROOT, "src", "markdown-utils.ts"), + ], +}; +const tsconfigPath = join(tmp, "tsconfig.json"); +writeFileSync(tsconfigPath, JSON.stringify(tsconfig, null, 2)); + +const tmpNodeModules = join(tmp, "node_modules"); +if (!existsSync(tmpNodeModules)) { + symlinkSync(join(WORKERS_ROOT, "node_modules"), tmpNodeModules); +} +// orchestrate.ts imports ../package.json +if (!existsSync(join(tmp, "package.json"))) { + symlinkSync(join(WORKERS_ROOT, "package.json"), join(tmp, "package.json")); +} + +const compile = spawnSync("npx", ["--yes", "tsc", "-p", tsconfigPath], { + encoding: "utf8", +}); + +// With noEmitOnError: false, tsc may exit non-zero on type errors in the dep +// graph (zip-baseline-fetcher.ts has workers-types friction) while still +// producing the .js files we need. Only bail if target files weren't emitted. +const buildDir = join(tmp, "build"); +const orchestrateJs = join(buildDir, "orchestrate.js"); +const telemetryJs = join(buildDir, "telemetry.js"); +if (!existsSync(orchestrateJs) || !existsSync(telemetryJs)) { + console.error("TypeScript compile failed (target files not emitted):"); + console.error(compile.stdout); + console.error(compile.stderr); + process.exit(1); +} +if (compile.status !== 0 && process.env.DEBUG) { + console.error("Note: tsc reported errors but target .js files were emitted:"); + console.error(compile.stdout); +} + +// Patch compiled files: JSON import assertions + extensionless local imports +for (const f of readdirSync(buildDir).filter((n) => n.endsWith(".js"))) { + const fpath = join(buildDir, f); + let src = readFileSync(fpath, "utf8"); + src = src.replace( + /from ["']\.\.\/package\.json["'];/g, + 'from "../package.json" with { type: "json" };', + ); + src = src.replace( + /from ["'](\.\/[^"'.]+)["'];/g, + 'from "$1.js";', + ); + writeFileSync(fpath, src); +} + +// Import the compiled module +const { partitionBySource } = await import(orchestrateJs); +const { recordTelemetry, parseToolCall } = await import(telemetryJs); + +// Also import tokenize for telemetry shape tests +const tokenizeJs = join(buildDir, "tokenize.js"); +let measurePayloadShape = null; +if (existsSync(tokenizeJs)) { + const tok = await import(tokenizeJs); + measurePayloadShape = tok.measurePayloadShape; +} + +// ─── Test harness ───────────────────────────────────────────────────────── + +let pass = 0; +let fail = 0; + +async function test(name, fn) { + try { + await fn(); + console.log(` ✓ ${name}`); + pass++; + } catch (err) { + console.log(` ✗ ${name}`); + console.log(` ${err.message}`); + if (err.stack && process.env.DEBUG) console.log(err.stack); + fail++; + } +} + +console.log("result-grouping tests (#150)\n"); + +// ─── Test fixtures ──────────────────────────────────────────────────────── + +// Fixture: mixed entries with interleaving scores +// canon entries have scores 10, 6 (ranked 1st, 3rd in BM25 order) +// baseline entries have scores 8, 4 (ranked 2nd, 4th in BM25 order) +// This ensures partition actually reorders — a fixture where canon always +// outscores baseline would prove nothing. +const mixedHits = [ + { path: "canon/a.md", title: "Canon A", source: "canon", score: 10 }, + { path: "docs/b.md", title: "Baseline B", source: "baseline", score: 8 }, + { path: "canon/c.md", title: "Canon C", source: "canon", score: 6 }, + { path: "docs/d.md", title: "Baseline D", source: "baseline", score: 4 }, +]; + +const canonOnly = [ + { path: "canon/x.md", title: "Canon X", source: "canon", score: 10 }, + { path: "canon/y.md", title: "Canon Y", source: "canon", score: 5 }, +]; + +const baselineOnly = [ + { path: "docs/m.md", title: "Baseline M", source: "baseline", score: 9 }, + { path: "docs/n.md", title: "Baseline N", source: "baseline", score: 3 }, +]; + +// ─── partitionBySource tests ────────────────────────────────────────────── + +console.log("partitionBySource:"); + +await test("splits mixed entries into overlay (canon) and baseline", () => { + const { overlay, baseline } = partitionBySource(mixedHits); + assert.equal(overlay.length, 2, "should have 2 overlay entries"); + assert.equal(baseline.length, 2, "should have 2 baseline entries"); + assert.ok(overlay.every((h) => h.source === "canon"), "all overlay should be canon"); + assert.ok(baseline.every((h) => h.source === "baseline"), "all baseline should be baseline"); +}); + +await test("preserves BM25 score order within each partition (stability)", () => { + const { overlay, baseline } = partitionBySource(mixedHits); + // overlay: canon/a (10) then canon/c (6) + assert.equal(overlay[0].path, "canon/a.md"); + assert.equal(overlay[1].path, "canon/c.md"); + assert.ok(overlay[0].score >= overlay[1].score, "overlay should be descending score"); + // baseline: docs/b (8) then docs/d (4) + assert.equal(baseline[0].path, "docs/b.md"); + assert.equal(baseline[1].path, "docs/d.md"); + assert.ok(baseline[0].score >= baseline[1].score, "baseline should be descending score"); +}); + +await test("overlay_first reorder: all canon before all baseline", () => { + const { overlay, baseline } = partitionBySource(mixedHits); + const ordered = [...overlay, ...baseline]; + // Expected: canon/a(10), canon/c(6), docs/b(8), docs/d(4) + assert.equal(ordered[0].source, "canon"); + assert.equal(ordered[1].source, "canon"); + assert.equal(ordered[2].source, "baseline"); + assert.equal(ordered[3].source, "baseline"); + // Scores within tiers are descending + assert.ok(ordered[0].score >= ordered[1].score); + assert.ok(ordered[2].score >= ordered[3].score); +}); + +await test("canon-only input: overlay = all, baseline = empty", () => { + const { overlay, baseline } = partitionBySource(canonOnly); + assert.equal(overlay.length, 2); + assert.equal(baseline.length, 0); +}); + +await test("baseline-only input: overlay = empty, baseline = all", () => { + const { overlay, baseline } = partitionBySource(baselineOnly); + assert.equal(overlay.length, 0); + assert.equal(baseline.length, 2); +}); + +await test("empty array: both partitions empty", () => { + const { overlay, baseline } = partitionBySource([]); + assert.equal(overlay.length, 0); + assert.equal(baseline.length, 0); +}); + +await test("stability: entries with identical scores retain pre-partition relative order", () => { + const sameScore = [ + { path: "canon/first.md", source: "canon", score: 5 }, + { path: "docs/between.md", source: "baseline", score: 5 }, + { path: "canon/second.md", source: "canon", score: 5 }, + { path: "docs/last.md", source: "baseline", score: 5 }, + ]; + const { overlay, baseline } = partitionBySource(sameScore); + // Within canon: first then second (insertion order preserved) + assert.equal(overlay[0].path, "canon/first.md"); + assert.equal(overlay[1].path, "canon/second.md"); + // Within baseline: between then last (insertion order preserved) + assert.equal(baseline[0].path, "docs/between.md"); + assert.equal(baseline[1].path, "docs/last.md"); +}); + +// ─── Conditional default logic tests ────────────────────────────────────── + +console.log("\nconditional default:"); + +await test("KB unset → default is merged", () => { + const knowledge_base_url = undefined; + const result_grouping = undefined; + const resolved = result_grouping ?? (knowledge_base_url ? "overlay_first" : "merged"); + assert.equal(resolved, "merged"); +}); + +await test("KB set → default is overlay_first", () => { + const knowledge_base_url = "https://github.com/klappy/klappy.dev"; + const result_grouping = undefined; + const resolved = result_grouping ?? (knowledge_base_url ? "overlay_first" : "merged"); + assert.equal(resolved, "overlay_first"); +}); + +await test("explicit merged overrides KB-set default", () => { + const knowledge_base_url = "https://github.com/klappy/klappy.dev"; + const result_grouping = "merged"; + const resolved = result_grouping ?? (knowledge_base_url ? "overlay_first" : "merged"); + assert.equal(resolved, "merged"); +}); + +await test("explicit overlay_first works with KB unset", () => { + const knowledge_base_url = undefined; + const result_grouping = "overlay_first"; + const resolved = result_grouping ?? (knowledge_base_url ? "overlay_first" : "merged"); + assert.equal(resolved, "overlay_first"); +}); + +await test("explicit grouped works regardless of KB", () => { + for (const kb of [undefined, "https://github.com/klappy/klappy.dev"]) { + const result_grouping = "grouped"; + const resolved = result_grouping ?? (kb ? "overlay_first" : "merged"); + assert.equal(resolved, "grouped", `should be grouped when kb=${kb}`); + } +}); + +// ─── Grouped shape construction tests ───────────────────────────────────── + +console.log("\ngrouped shape construction:"); + +await test("grouped search: overlay_hits and baseline_hits arrays present and correct", () => { + // Simulate the grouped shape construction from runSearch + const orderedHits = (() => { + const { overlay, baseline } = partitionBySource(mixedHits); + return [...overlay, ...baseline]; + })(); + + // Simulate metadata enrichment (adds uri field) + const hitsWithMetadata = orderedHits.map((h) => ({ + uri: `klappy://${h.path.replace(".md", "")}`, + path: h.path, + title: h.title, + score: h.score, + source: h.source, + })); + + // Build grouped shape + const overlayHits = []; + const baselineHits = []; + for (const h of hitsWithMetadata) { + (h.source === "canon" ? overlayHits : baselineHits).push(h); + } + + // Assertions + assert.equal(overlayHits.length, 2, "overlay_hits should have 2 items"); + assert.equal(baselineHits.length, 2, "baseline_hits should have 2 items"); + assert.ok(overlayHits.every((h) => h.source === "canon")); + assert.ok(baselineHits.every((h) => h.source === "baseline")); + + // hits (back-compat) is overlay-then-baseline + assert.equal(hitsWithMetadata[0].source, "canon"); + assert.equal(hitsWithMetadata[1].source, "canon"); + assert.equal(hitsWithMetadata[2].source, "baseline"); + assert.equal(hitsWithMetadata[3].source, "baseline"); +}); + +await test("grouped with empty overlay: overlay_hits=[], baseline_hits=[...]", () => { + const { overlay, baseline } = partitionBySource(baselineOnly); + assert.equal(overlay.length, 0); + assert.equal(baseline.length, 2); + + const orderedHits = [...overlay, ...baseline]; + assert.equal(orderedHits.length, 2); + assert.ok(orderedHits.every((h) => h.source === "baseline")); +}); + +await test("grouped with empty baseline: overlay_hits=[...], baseline_hits=[]", () => { + const { overlay, baseline } = partitionBySource(canonOnly); + assert.equal(overlay.length, 2); + assert.equal(baseline.length, 0); + + const orderedHits = [...overlay, ...baseline]; + assert.equal(orderedHits.length, 2); + assert.ok(orderedHits.every((h) => h.source === "canon")); +}); + +// ─── Preflight partition tests ──────────────────────────────────────────── + +console.log("\npreflight partition:"); + +await test("preflight overlay_first: partition applied before slice", () => { + // Simulate scoreEntries output with interleaving scores + const allScored = [ + { path: "docs/high.md", source: "baseline", score: 20 }, + { path: "canon/mid-high.md", source: "canon", score: 18 }, + { path: "docs/mid.md", source: "baseline", score: 15 }, + { path: "canon/mid-low.md", source: "canon", score: 12 }, + { path: "docs/low.md", source: "baseline", score: 8 }, + { path: "canon/lowest.md", source: "canon", score: 3 }, + ]; + + // overlay_first: partition then slice(0, 5) + const { overlay, baseline } = partitionBySource(allScored); + const ordered = [...overlay, ...baseline]; + const results = ordered.slice(0, 5); + const startHere = results.slice(0, 3).map((r) => r.path); + + // First 3 results should be all canon (3 canon entries exist) + assert.equal(startHere[0], "canon/mid-high.md"); + assert.equal(startHere[1], "canon/mid-low.md"); + assert.equal(startHere[2], "canon/lowest.md"); +}); + +await test("preflight grouped: start_here_overlay and start_here_baseline", () => { + const allScored = [ + { path: "docs/high.md", source: "baseline", score: 20 }, + { path: "canon/mid-high.md", source: "canon", score: 18 }, + { path: "docs/mid.md", source: "baseline", score: 15 }, + { path: "canon/mid-low.md", source: "canon", score: 12 }, + ]; + + const { overlay, baseline } = partitionBySource(allScored); + const startHereOverlay = overlay.slice(0, 3).map((r) => r.path); + const startHereBaseline = baseline.slice(0, 3).map((r) => r.path); + + assert.deepEqual(startHereOverlay, ["canon/mid-high.md", "canon/mid-low.md"]); + assert.deepEqual(startHereBaseline, ["docs/high.md", "docs/mid.md"]); +}); + +await test("preflight merged: no partition applied (pure score order)", () => { + const allScored = [ + { path: "docs/high.md", source: "baseline", score: 20 }, + { path: "canon/mid-high.md", source: "canon", score: 18 }, + { path: "docs/mid.md", source: "baseline", score: 15 }, + ]; + + // merged = just use allScored directly + const startHere = allScored.slice(0, 3).map((r) => r.path); + assert.deepEqual(startHere, ["docs/high.md", "canon/mid-high.md", "docs/mid.md"]); +}); + +// ─── Telemetry: parseToolCall extracts result_grouping ──────────────────── + +console.log("\ntelemetry:"); + +await test("parseToolCall extracts result_grouping from oddkit_search arguments", () => { + const payload = { + jsonrpc: "2.0", + id: 1, + method: "tools/call", + params: { + name: "oddkit_search", + arguments: { + input: "test query", + knowledge_base_url: "https://github.com/klappy/klappy.dev", + result_grouping: "overlay_first", + }, + }, + }; + const result = parseToolCall(payload); + assert.ok(result, "should parse tool call"); + assert.equal(result.resultGrouping, "overlay_first"); + assert.equal(result.knowledgeBaseUrl, "https://github.com/klappy/klappy.dev"); +}); + +await test("parseToolCall returns empty result_grouping when not specified", () => { + const payload = { + jsonrpc: "2.0", + id: 2, + method: "tools/call", + params: { + name: "oddkit_search", + arguments: { input: "test query" }, + }, + }; + const result = parseToolCall(payload); + assert.ok(result); + assert.equal(result.resultGrouping, ""); +}); + +await test("parseToolCall returns empty result_grouping for non-search tools", () => { + const payload = { + jsonrpc: "2.0", + id: 3, + method: "tools/call", + params: { + name: "oddkit_orient", + arguments: { input: "exploring" }, + }, + }; + const result = parseToolCall(payload); + assert.ok(result); + assert.equal(result.resultGrouping, ""); +}); + +// ─── Telemetry: recordTelemetry writes result_grouping to blob9 ─────────── + +class MockAnalyticsEngine { + constructor() { this.writes = []; } + writeDataPoint(point) { this.writes.push(point); } +} + +function mockEnv() { + return { + ODDKIT_TELEMETRY: new MockAnalyticsEngine(), + DEFAULT_KNOWLEDGE_BASE_URL: "https://raw.githubusercontent.com/klappy/klappy.dev/main", + ODDKIT_VERSION: "0.test.0", + }; +} + +function mockRequest(consumer = "test") { + return new Request(`https://oddkit.klappy.dev/mcp?consumer=${consumer}`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + }); +} + +await test("recordTelemetry writes result_grouping to blob9", async () => { + const env = mockEnv(); + const requestBody = JSON.stringify({ + jsonrpc: "2.0", + id: 1, + method: "tools/call", + params: { + name: "oddkit_search", + arguments: { + input: "test", + result_grouping: "overlay_first", + }, + }, + }); + const responseBody = JSON.stringify({ jsonrpc: "2.0", id: 1, result: { ok: true } }); + + let shape = null; + if (measurePayloadShape) { + shape = await measurePayloadShape(requestBody, responseBody); + } + recordTelemetry(mockRequest(), requestBody, env, 42, { hits: 0, total: 0 }, shape); + + assert.equal(env.ODDKIT_TELEMETRY.writes.length, 1); + const point = env.ODDKIT_TELEMETRY.writes[0]; + assert.equal(point.blobs.length, 9, `blobs should be 9, got ${point.blobs.length}`); + assert.equal(point.blobs[8], "overlay_first", "blob9 should be result_grouping value"); +}); + +await test("recordTelemetry writes empty blob9 for non-search tools", async () => { + const env = mockEnv(); + const requestBody = JSON.stringify({ + jsonrpc: "2.0", + id: 1, + method: "tools/call", + params: { + name: "oddkit_orient", + arguments: { input: "test" }, + }, + }); + const responseBody = JSON.stringify({ jsonrpc: "2.0", id: 1, result: { ok: true } }); + + let shape = null; + if (measurePayloadShape) { + shape = await measurePayloadShape(requestBody, responseBody); + } + recordTelemetry(mockRequest(), requestBody, env, 10, { hits: 0, total: 0 }, shape); + + const point = env.ODDKIT_TELEMETRY.writes[0]; + assert.equal(point.blobs[8], "", "blob9 should be empty for non-search tools"); +}); + +await test("recordTelemetry writes empty blob9 for non-tool-call requests", async () => { + const env = mockEnv(); + const requestBody = JSON.stringify({ + jsonrpc: "2.0", + id: 1, + method: "tools/list", + }); + + recordTelemetry(mockRequest(), requestBody, env, 5, { hits: 0, total: 0 }, null); + + const point = env.ODDKIT_TELEMETRY.writes[0]; + // Non-tool-call: toolCall is null, so blob9 gets ?? "" fallback + // The blobs array for non-tool-call may be shorter (8) since toolCall is null + // and the resultGrouping path uses toolCall?.resultGrouping ?? "" + assert.equal(point.blobs.length, 9, "blobs should still be 9"); + assert.equal(point.blobs[8], "", "blob9 should be empty for non-tool-call"); +}); + +// ─── Summary ────────────────────────────────────────────────────────────── + +console.log(`\n${pass + fail} tests: ${pass} passed, ${fail} failed`); +if (fail > 0) process.exit(1); diff --git a/workers/test/telemetry-integration.test.mjs b/workers/test/telemetry-integration.test.mjs index cf551f9..911733a 100644 --- a/workers/test/telemetry-integration.test.mjs +++ b/workers/test/telemetry-integration.test.mjs @@ -184,8 +184,8 @@ await test("oddkit_time tool call lands a complete telemetry record", async () = assert.equal(env.ODDKIT_TELEMETRY.writes.length, 1, "should write 1 data point"); const point = env.ODDKIT_TELEMETRY.writes[0]; - // Schema shape — blob9 retired, doubles 7 and 8 added - assert.equal(point.blobs.length, 8, `blobs should be 8 (blob9 retired), got ${point.blobs.length}`); + // Schema shape — blob9 repurposed for result_grouping (#150), doubles 7 and 8 added + assert.equal(point.blobs.length, 9, `blobs should be 9 (blob9 = result_grouping), got ${point.blobs.length}`); assert.equal(point.doubles.length, 8, `doubles should be 8, got ${point.doubles.length}`); assert.equal(point.indexes.length, 1, "indexes should be 1"); @@ -306,7 +306,7 @@ const { const TEST_BLOB_NAMES = [ "event_type", "method", "tool_name", "consumer_label", "consumer_source", "knowledge_base_url", "document_uri", "worker_version", - // blob9 (cache_tier) retired in retire-indexsource-interpreter + "result_grouping", // blob9 — repurposed from retired cache_tier (#150) ]; const TEST_DOUBLE_NAMES = [ "count", "duration_ms", "bytes_in", "bytes_out", "tokens_in", "tokens_out", From 63eb8e01faf819635d4a05479cf0b575335b602a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 28 Apr 2026 14:18:03 +0000 Subject: [PATCH 2/8] fix(search): include empty overlay_hits/baseline_hits on grouped NO_MATCH --- workers/src/orchestrate.ts | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index f7c00ed..a225d9d 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -1372,13 +1372,18 @@ async function runSearch( : undefined; if (hits.length === 0) { + const noMatchResult: Record = { + status: "NO_MATCH", + docs_considered: index.entries.length, + hits: [], + }; + if (resolvedGrouping === "grouped") { + noMatchResult.overlay_hits = []; + noMatchResult.baseline_hits = []; + } return { action: "search", - result: { - status: "NO_MATCH", - docs_considered: index.entries.length, - hits: [], - }, + result: noMatchResult, state: updatedState, assistant_text: `Searched ${index.stats.total} documents but found no matches for "${input}". Try rephrasing or ask with action "catalog" to see available documentation.`, debug: { From 60fe9b89de3682a6acfe03deff69a4ea3f0ee4a7 Mon Sep 17 00:00:00 2001 From: klappy Date: Tue, 28 Apr 2026 17:22:08 +0000 Subject: [PATCH 3/8] fix(search): widen BM25 candidate pool when grouping; mirror to Node CLI (#150 fix-forward) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Worker (workers/src/orchestrate.ts): runSearch now pulls 50 BM25 candidates when resolvedGrouping !== "merged", partitions, then slices to FINAL_LIMIT (5). The original implementation truncated to 5 before partition, making overlay docs at BM25 position 6+ invisible to the partition logic. - CLI (src/core/actions.js): mirrored — partitionByOrigin helper, conditional default on result_grouping (KB set → overlay_first, else merged), candidate- pool widening, overlay_hits/baseline_hits arrays for grouped mode, NO_MATCH branch includes empty grouped arrays when applicable. - Tests: 2 new regression cases for partitionBySource against widened pools. - CHANGELOG: separate Fixed entry for the candidate-pool widening, plus a CLI parity Added entry. All worker tests (5 files, 26 tests in result-grouping) and CLI smoke tests pass; tsc --noEmit clean. --- CHANGELOG.md | 5 + src/core/actions.js | 136 +++++++++++++++++++------- workers/src/orchestrate.ts | 14 ++- workers/test/result-grouping.test.mjs | 65 ++++++++++++ 4 files changed, 180 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 75a7faf..8ce271d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **`result_grouping` parameter for search and preflight** — when `knowledge_base_url` is set, overlay (knowledge-base) docs are ranked above baseline docs by default (`"overlay_first"`). Callers can explicitly choose `"merged"` (pure BM25 score order, the previous default), `"overlay_first"` (overlay before baseline, preserving score order within each partition), or `"grouped"` (separate `overlay_hits`/`baseline_hits` arrays in search, `start_here_overlay`/`start_here_baseline` in preflight). Conditional default: `knowledge_base_url` unset → `"merged"` (no behavior change); `knowledge_base_url` set → `"overlay_first"`. Telemetry records the caller-specified value in blob9 (`result_grouping`). Closes #150. +- **`result_grouping` mirrored in Node CLI (`src/core/actions.js`)** — same conditional default and partition logic for parity with the Cloudflare Worker. CLI uses `origin: "local" | "baseline"` (its existing field) where the worker uses `source: "canon" | "baseline"`. Without this, CLI users would see baseline-contaminated rankings even though the worker is fixed. + +### Fixed + +- **Candidate-pool widening for grouping** — when `result_grouping !== "merged"`, both worker `runSearch` and CLI `search` now retrieve 50 BM25 candidates instead of 5, partition, then truncate to the response cap of 5. The original implementation truncated to 5 *before* partitioning, which made overlay docs ranked at BM25 position 6+ invisible to the partition logic. Two regression tests added (`partition surfaces overlay even when overlay is mostly low-score`, `widened pool: 50 candidates partition correctly without losing overlay`). ## [0.26.0] - 2026-04-26 diff --git a/src/core/actions.js b/src/core/actions.js index 689395f..94ca60b 100644 --- a/src/core/actions.js +++ b/src/core/actions.js @@ -88,6 +88,22 @@ function getBM25Index(docs, baselineSha) { return cachedBM25; } +// ────────────────────────────────────────────────────────────────────────────── +// Result grouping — stable partition by origin (local vs baseline). The Node CLI +// mirror of the worker's partitionBySource (which keys on `source`). See +// klappy/oddkit issue #150. +// ────────────────────────────────────────────────────────────────────────────── + +function partitionByOrigin(arr) { + const overlay = []; + const baselineHits = []; + for (const h of arr) { + if ((h.origin || "local") === "local") overlay.push(h); + else baselineHits.push(h); + } + return { overlay, baseline: baselineHits }; +} + // ────────────────────────────────────────────────────────────────────────────── // Response text builders // ────────────────────────────────────────────────────────────────────────────── @@ -160,14 +176,20 @@ export function buildEncodeResponse(taskResult) { * @param {string} [params.baseline] - Baseline override (canon_url takes precedence) * @param {string} [params.repoRoot] - Repository root (defaults to cwd) * @param {Object} [params.state] - Optional state for threading (MCP orchestrator) + * @param {"merged"|"overlay_first"|"grouped"} [params.result_grouping] - Search ranking policy (#150) * @returns {Object} { action, result, assistant_text, debug, state? } */ export async function handleAction(params) { - const { action, input, context, mode, canon_url, state, include_metadata, section, reference, compare } = params; + const { action, input, context, mode, canon_url, state, include_metadata, section, reference, compare, result_grouping } = params; const repoRoot = params.repoRoot || process.cwd(); const baseline = canon_url || params.baseline; const startMs = Date.now(); + // Issue #150: when an overlay (custom canon_url/baseline) is set, default to + // overlay_first so local docs rank above baseline. With no overlay, + // every doc has origin: "local" and the partition is a no-op anyway. + const resolvedGrouping = result_grouping ?? (baseline ? "overlay_first" : "merged"); + // Helper: enrich debug output with baseline SHA for observability function makeDebug(extra = {}) { return { @@ -286,10 +308,16 @@ export async function handleAction(params) { } const bm25 = getBM25Index(index.documents, baselineSha); - const results = searchBM25(bm25, input, 5); + + // Issue #150 fix-forward: when grouping is active, retrieve a wider + // candidate pool from BM25 so overlay (local) docs ranked beyond + // position 5 in raw BM25 are not truncated before partitioning. + const FINAL_LIMIT = 5; + const candidateLimit = resolvedGrouping !== "merged" ? 50 : FINAL_LIMIT; + const results = searchBM25(bm25, input, candidateLimit); const docMap = new Map(index.documents.map((d) => [d.path, d])); - const hits = results + const rawHits = results .map((r) => { const doc = docMap.get(r.id); if (!doc) return null; @@ -297,15 +325,32 @@ export async function handleAction(params) { }) .filter(Boolean); + // Apply result_grouping partition. Single forward pass — no re-sort. + // After partitioning the wider candidate pool, truncate to FINAL_LIMIT. + let hits = rawHits; + let isGrouped = false; + if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { + const { overlay, baseline: baselineHits } = partitionByOrigin(rawHits); + hits = [...overlay, ...baselineHits].slice(0, FINAL_LIMIT); + isGrouped = resolvedGrouping === "grouped"; + } else { + hits = rawHits.slice(0, FINAL_LIMIT); + } + const updatedState = state ? addCanonRefs(initState(state), hits.map((h) => h.path)) : undefined; if (hits.length === 0) { + const noMatchResult = { status: "NO_MATCH", docs_considered: index.documents.length, hits: [] }; + if (isGrouped) { + noMatchResult.overlay_hits = []; + noMatchResult.baseline_hits = []; + } return { action: "search", - result: { status: "NO_MATCH", docs_considered: index.documents.length, hits: [] }, + result: noMatchResult, state: updatedState, assistant_text: `Searched ${index.documents.length} documents but found no matches for "${input}". Try rephrasing or use action "catalog".`, - debug: makeDebug({ search_index_size: bm25.N }), + debug: makeDebug({ search_index_size: bm25.N, result_grouping: resolvedGrouping }), }; } @@ -322,43 +367,58 @@ export async function handleAction(params) { ...hits.map((r) => `- \`${r.path}\` — ${r.title || "(untitled)"} (score: ${r.score.toFixed(2)})`), ]; - return { - action: "search", - result: { - status: "FOUND", - hits: hits.map((h) => { - const hit = { - uri: h.uri, - path: h.path, - title: h.title, - tags: h.tags, - score: h.score, - snippet: (h.contentPreview || "").slice(0, 200), - source: h.origin || "local", - }; - if (include_metadata) { - if (h.frontmatter) { - hit.metadata = h.frontmatter; - } else if (h.absolutePath && existsSync(h.absolutePath)) { - // Fallback: index predates frontmatter storage — parse from file - try { - const { data } = matter(readFileSync(h.absolutePath, "utf-8")); - if (data && Object.keys(data).length > 0) { - hit.metadata = data; - } - } catch { - // File not readable — omit metadata for this hit - } + const hitObjects = hits.map((h) => { + const hit = { + uri: h.uri, + path: h.path, + title: h.title, + tags: h.tags, + score: h.score, + snippet: (h.contentPreview || "").slice(0, 200), + source: h.origin || "local", + }; + if (include_metadata) { + if (h.frontmatter) { + hit.metadata = h.frontmatter; + } else if (h.absolutePath && existsSync(h.absolutePath)) { + // Fallback: index predates frontmatter storage — parse from file + try { + const { data } = matter(readFileSync(h.absolutePath, "utf-8")); + if (data && Object.keys(data).length > 0) { + hit.metadata = data; } + } catch { + // File not readable — omit metadata for this hit } - return hit; - }), - evidence, - docs_considered: index.documents.length, - }, + } + } + return hit; + }); + + const foundResult = { + status: "FOUND", + hits: hitObjects, + evidence, + docs_considered: index.documents.length, + }; + + if (isGrouped) { + const overlayHits = []; + const baselineHits = []; + for (const h of hitObjects) { + if (h.source === "local") overlayHits.push(h); + else baselineHits.push(h); + } + foundResult.overlay_hits = overlayHits; + foundResult.baseline_hits = baselineHits; + } + + return { + action: "search", + result: foundResult, state: updatedState, assistant_text: assistantLines.join("\n").trim(), - debug: makeDebug({ search_index_size: bm25.N }), + debug: makeDebug({ search_index_size: bm25.N, result_grouping: resolvedGrouping }), }; } diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index a225d9d..86b4724 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -1352,7 +1352,16 @@ async function runSearch( const startMs = Date.now(); const index = await fetcher.getIndex(knowledgeBaseUrl); const bm25 = getBM25Index(index.entries); - const results = searchBM25(bm25, input, 5); + + // Issue #150 fix-forward: when grouping is active, retrieve a wider candidate + // pool from BM25 so overlay docs ranked beyond position 5 in raw BM25 are not + // truncated before partitioning. With the default 5-result cap, an overlay + // doc that scored at position 6 would be invisible even after re-ranking. + // 50 candidates is enough headroom for any realistic overlay; the final slice + // below caps the response at FINAL_LIMIT. + const FINAL_LIMIT = 5; + const candidateLimit = resolvedGrouping !== "merged" ? 50 : FINAL_LIMIT; + const results = searchBM25(bm25, input, candidateLimit); // Map scores back to entries const entryMap = new Map(index.entries.map((e) => [e.path, e])); @@ -1398,11 +1407,12 @@ async function runSearch( // Apply result_grouping partition (overlay_first / grouped re-order; // merged preserves BM25 score order). Single forward pass — no re-sort. + // After partitioning the wider candidate pool, truncate to FINAL_LIMIT. let orderedHits = hits; let isGrouped = false; if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { const { overlay, baseline } = partitionBySource(hits); - orderedHits = [...overlay, ...baseline]; + orderedHits = [...overlay, ...baseline].slice(0, FINAL_LIMIT); isGrouped = resolvedGrouping === "grouped"; } diff --git a/workers/test/result-grouping.test.mjs b/workers/test/result-grouping.test.mjs index 507b950..864fcae 100644 --- a/workers/test/result-grouping.test.mjs +++ b/workers/test/result-grouping.test.mjs @@ -527,6 +527,71 @@ await test("recordTelemetry writes empty blob9 for non-tool-call requests", asyn assert.equal(point.blobs[8], "", "blob9 should be empty for non-tool-call"); }); +// ─── Candidate-pool widening regression (#150 fix-forward) ──────────────── +// The original PR #152 implementation called searchBM25 with a hard limit of 5. +// In a realistic scenario where overlay and baseline interleave by BM25 score, +// an overlay doc ranked at BM25 position 6+ would be invisible to the partition +// — the partition can only reorder docs already in its input. This regression +// suite validates that partitionBySource correctly prioritizes overlay over +// baseline when the input includes overlay docs at every position. + +await test("partition surfaces overlay even when overlay is mostly low-score", async () => { + // Simulates a realistic candidate pool where 4 baseline docs outrank a single + // overlay doc at position 5. Without widening, this overlay doc would be in + // the input. With widening, more overlay candidates from positions 6+ are + // also pulled in. Either way, partition must correctly surface overlay first. + const wideCandidates = [ + { path: "docs/a.md", title: "Baseline A", source: "baseline", score: 100 }, + { path: "docs/b.md", title: "Baseline B", source: "baseline", score: 90 }, + { path: "docs/c.md", title: "Baseline C", source: "baseline", score: 80 }, + { path: "docs/d.md", title: "Baseline D", source: "baseline", score: 70 }, + { path: "canon/e.md", title: "Canon E", source: "canon", score: 60 }, + { path: "canon/f.md", title: "Canon F", source: "canon", score: 50 }, + { path: "canon/g.md", title: "Canon G", source: "canon", score: 40 }, + { path: "docs/h.md", title: "Baseline H", source: "baseline", score: 30 }, + ]; + const { overlay, baseline } = partitionBySource(wideCandidates); + // All overlay docs first, in original BM25 order. + const merged = [...overlay, ...baseline]; + // Top 5 of merged should now be overlay-first. + assert.equal(merged[0].source, "canon", "rank 1 should be overlay"); + assert.equal(merged[1].source, "canon", "rank 2 should be overlay"); + assert.equal(merged[2].source, "canon", "rank 3 should be overlay"); + // BM25 score within partition preserved. + assert.equal(merged[0].path, "canon/e.md"); + assert.equal(merged[1].path, "canon/f.md"); + assert.equal(merged[2].path, "canon/g.md"); + assert.equal(merged[3].source, "baseline", "rank 4 should be baseline (no more overlay)"); +}); + +await test("widened pool: 50 candidates partition correctly without losing overlay", async () => { + // Simulate a 50-candidate pool where overlay docs are scattered at positions + // 8, 17, 23, 41 — positions that would be invisible at the original limit-5. + const wideCandidates = []; + for (let i = 0; i < 50; i++) { + const isOverlay = i === 7 || i === 16 || i === 22 || i === 40; + wideCandidates.push({ + path: isOverlay ? `canon/${i}.md` : `docs/${i}.md`, + title: `Doc ${i}`, + source: isOverlay ? "canon" : "baseline", + score: 1000 - i, // descending + }); + } + const { overlay, baseline } = partitionBySource(wideCandidates); + assert.equal(overlay.length, 4, "should find all 4 overlay docs in 50-candidate pool"); + assert.equal(baseline.length, 46); + // Overlay docs preserve their BM25 order (positions 7, 16, 22, 40). + assert.deepEqual( + overlay.map((d) => d.path), + ["canon/7.md", "canon/16.md", "canon/22.md", "canon/40.md"], + ); + // After partition + slice(0, 5): all 4 overlay + 1 baseline. + const top5 = [...overlay, ...baseline].slice(0, 5); + assert.equal(top5.filter((d) => d.source === "canon").length, 4); + assert.equal(top5.filter((d) => d.source === "baseline").length, 1); + assert.equal(top5[4].path, "docs/0.md", "first baseline by BM25 score"); +}); + // ─── Summary ────────────────────────────────────────────────────────────── console.log(`\n${pass + fail} tests: ${pass} passed, ${fail} failed`); From ed3dd14e267b03cb4df38c7fb97f16361b69633a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 28 Apr 2026 17:34:27 +0000 Subject: [PATCH 4/8] fix(workers): align state and grouped arrays with returned results - runSearch: compute updatedState/canon_refs from orderedHits (the truncated returned set), not the wider 50-candidate BM25 pool. Mirrors src/core/actions.js ordering: partition+truncate first, then derive state. - runPreflight: build start_here_overlay / start_here_baseline by partitioning startHere (top 3 of results) instead of the full allScored list, so grouped arrays are subsets of start_here. --- workers/src/orchestrate.ts | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index 86b4724..78246de 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -1373,14 +1373,27 @@ async function runSearch( }) .filter(Boolean) as Array; + // Apply result_grouping partition (overlay_first / grouped re-order; + // merged preserves BM25 score order). Single forward pass — no re-sort. + // After partitioning the wider candidate pool, truncate to FINAL_LIMIT. + let orderedHits = hits.slice(0, FINAL_LIMIT); + let isGrouped = false; + if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { + const { overlay, baseline } = partitionBySource(hits); + orderedHits = [...overlay, ...baseline].slice(0, FINAL_LIMIT); + isGrouped = resolvedGrouping === "grouped"; + } + + // Compute state from the truncated, returned hits — not the wider candidate + // pool — so canon_refs only tracks documents actually shown to the user. const updatedState = state ? addCanonRefs( initState(state), - hits.map((h) => h.path), + orderedHits.map((h) => h.path), ) : undefined; - if (hits.length === 0) { + if (orderedHits.length === 0) { const noMatchResult: Record = { status: "NO_MATCH", docs_considered: index.entries.length, @@ -1405,17 +1418,6 @@ async function runSearch( }; } - // Apply result_grouping partition (overlay_first / grouped re-order; - // merged preserves BM25 score order). Single forward pass — no re-sort. - // After partitioning the wider candidate pool, truncate to FINAL_LIMIT. - let orderedHits = hits; - let isGrouped = false; - if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { - const { overlay, baseline } = partitionBySource(hits); - orderedHits = [...overlay, ...baseline].slice(0, FINAL_LIMIT); - isGrouped = resolvedGrouping === "grouped"; - } - // Cache for fetched content to avoid redundant fetches when include_metadata is enabled const contentCache = new Map(); @@ -2436,7 +2438,7 @@ async function runPreflight( // For "grouped" mode, split start_here into overlay and baseline arrays (each capped at 3) if (resolvedGrouping === "grouped") { - const { overlay, baseline } = partitionBySource(allScored); + const { overlay, baseline } = partitionBySource(startHere); resultObj.start_here_overlay = overlay.slice(0, 3).map((r) => r.path); resultObj.start_here_baseline = baseline.slice(0, 3).map((r) => r.path); } From 8cb0b32aa3096bdea5a7909c0b36ffcf8d4994a3 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 28 Apr 2026 17:47:34 +0000 Subject: [PATCH 5/8] fix: thread result_grouping through CLI preflight path The CLI preflight case in handleAction delegated to runOrchestrate without forwarding resolvedGrouping, so runPreflight always received the default merged ordering. This broke parity with the worker, which already threads resolvedGrouping into runPreflight. - src/core/actions.js: pass resolvedGrouping into runOrchestrate for the preflight case. - src/mcp/orchestrate.js: accept result_grouping option and forward it to runPreflight. - src/tasks/preflight.js: partition start_here by origin for overlay_first/grouped, and emit start_here_overlay / start_here_baseline for grouped, mirroring workers/src/orchestrate.ts. --- src/core/actions.js | 1 + src/mcp/orchestrate.js | 3 +++ src/tasks/preflight.js | 38 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/core/actions.js b/src/core/actions.js index 94ca60b..f37c41a 100644 --- a/src/core/actions.js +++ b/src/core/actions.js @@ -484,6 +484,7 @@ export async function handleAction(params) { repoRoot, baseline, action: "preflight", + result_grouping: resolvedGrouping, }); return { action: "preflight", diff --git a/src/mcp/orchestrate.js b/src/mcp/orchestrate.js index 2236a0a..7b65bae 100644 --- a/src/mcp/orchestrate.js +++ b/src/mcp/orchestrate.js @@ -291,6 +291,7 @@ export function detectAction(message) { * @param {string} [options.baseline_root] - For instruction_sync: filesystem mode baseline path * @param {Object} [options.registry_payload] - For instruction_sync: payload mode registry object * @param {Object} [options.state_payload] - For instruction_sync: payload mode state object + * @param {"merged"|"overlay_first"|"grouped"} [options.result_grouping] - Ranking policy (#150) * @returns {Object} { action, assistant_text, result, debug, suggest_orient } */ export async function runOrchestrate(options) { @@ -303,6 +304,7 @@ export async function runOrchestrate(options) { baseline_root, registry_payload, state_payload, + result_grouping, } = options; // Runtime validation (schema is permissive, runtime enforces) @@ -409,6 +411,7 @@ export async function runOrchestrate(options) { repo: repoRoot || process.cwd(), baseline, message, + result_grouping, }); result.result = taskResult; result.assistant_text = buildPreflightAssistantText(taskResult); diff --git a/src/tasks/preflight.js b/src/tasks/preflight.js index fa37b79..02dfc5f 100644 --- a/src/tasks/preflight.js +++ b/src/tasks/preflight.js @@ -158,10 +158,12 @@ function findPitfallDocs(docs, keywords) { * @param {string} options.repo - Repository root path * @param {string} options.baseline - Baseline override * @param {string} options.message - The preflight message (what the agent is about to do) + * @param {"merged"|"overlay_first"|"grouped"} [options.result_grouping] - Ranking policy (#150) * @returns {Promise} */ export async function runPreflight(options) { - const { repo: repoRoot, baseline: baselineOverride, message } = options; + const { repo: repoRoot, baseline: baselineOverride, message, result_grouping } = options; + const resolvedGrouping = result_grouping || "merged"; // Reuse catalog to get start_here, next_up, canon_by_tag, playbooks const catalogResult = await runCatalog({ @@ -191,10 +193,27 @@ export async function runPreflight(options) { const dod = findDodDoc(docs); const pitfalls = findPitfallDocs(docs, keywords); + // Apply result_grouping to start_here. Mirror of the worker's runPreflight: + // overlay_first/grouped reorder the list so overlay (local) entries come + // first; "grouped" additionally exposes start_here_overlay/start_here_baseline. + // Origin is read from the index (start_here entries don't carry origin themselves). + let startHere = catalogResult.start_here; + if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { + const originByPath = new Map(docs.map((d) => [d.path, d.origin || "local"])); + const overlay = []; + const baselineHits = []; + for (const entry of startHere) { + const origin = originByPath.get(entry.path) || "local"; + if (origin === "local") overlay.push(entry); + else baselineHits.push(entry); + } + startHere = [...overlay, ...baselineHits]; + } + const result = { status: "SUPPORTED", advisory: false, - start_here: catalogResult.start_here, + start_here: startHere, next_up: catalogResult.next_up, canon_by_tag: catalogResult.canon_by_tag, playbooks: catalogResult.playbooks, @@ -212,9 +231,24 @@ export async function runPreflight(options) { timestamp: new Date().toISOString(), repo_root: repoRoot, keywords_extracted: keywords, + result_grouping: resolvedGrouping, }, }; + // For "grouped" mode, also expose start_here split by origin (parity with worker). + if (resolvedGrouping === "grouped") { + const originByPath = new Map(docs.map((d) => [d.path, d.origin || "local"])); + const overlay = []; + const baselineHits = []; + for (const entry of startHere) { + const origin = originByPath.get(entry.path) || "local"; + if (origin === "local") overlay.push(entry.path); + else baselineHits.push(entry.path); + } + result.start_here_overlay = overlay.slice(0, 3); + result.start_here_baseline = baselineHits.slice(0, 3); + } + writeLast(result); return result; } From 3c8f77f347abef7c4f7cacc20b0c0dbd599c0155 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 28 Apr 2026 17:56:56 +0000 Subject: [PATCH 6/8] fix(preflight): reuse partition for grouped mode, use nullish coalescing --- src/tasks/preflight.js | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/tasks/preflight.js b/src/tasks/preflight.js index 02dfc5f..3d41f26 100644 --- a/src/tasks/preflight.js +++ b/src/tasks/preflight.js @@ -163,7 +163,7 @@ function findPitfallDocs(docs, keywords) { */ export async function runPreflight(options) { const { repo: repoRoot, baseline: baselineOverride, message, result_grouping } = options; - const resolvedGrouping = result_grouping || "merged"; + const resolvedGrouping = result_grouping ?? "merged"; // Reuse catalog to get start_here, next_up, canon_by_tag, playbooks const catalogResult = await runCatalog({ @@ -198,16 +198,18 @@ export async function runPreflight(options) { // first; "grouped" additionally exposes start_here_overlay/start_here_baseline. // Origin is read from the index (start_here entries don't carry origin themselves). let startHere = catalogResult.start_here; + let overlayEntries = null; + let baselineEntries = null; if (resolvedGrouping === "overlay_first" || resolvedGrouping === "grouped") { const originByPath = new Map(docs.map((d) => [d.path, d.origin || "local"])); - const overlay = []; - const baselineHits = []; + overlayEntries = []; + baselineEntries = []; for (const entry of startHere) { const origin = originByPath.get(entry.path) || "local"; - if (origin === "local") overlay.push(entry); - else baselineHits.push(entry); + if (origin === "local") overlayEntries.push(entry); + else baselineEntries.push(entry); } - startHere = [...overlay, ...baselineHits]; + startHere = [...overlayEntries, ...baselineEntries]; } const result = { @@ -237,16 +239,8 @@ export async function runPreflight(options) { // For "grouped" mode, also expose start_here split by origin (parity with worker). if (resolvedGrouping === "grouped") { - const originByPath = new Map(docs.map((d) => [d.path, d.origin || "local"])); - const overlay = []; - const baselineHits = []; - for (const entry of startHere) { - const origin = originByPath.get(entry.path) || "local"; - if (origin === "local") overlay.push(entry.path); - else baselineHits.push(entry.path); - } - result.start_here_overlay = overlay.slice(0, 3); - result.start_here_baseline = baselineHits.slice(0, 3); + result.start_here_overlay = overlayEntries.slice(0, 3).map((e) => e.path); + result.start_here_baseline = baselineEntries.slice(0, 3).map((e) => e.path); } writeLast(result); From 96ef432e999ec3377d392dff4bd887234409dba4 Mon Sep 17 00:00:00 2001 From: klappy Date: Tue, 28 Apr 2026 19:57:13 +0000 Subject: [PATCH 7/8] fix(preflight): apply conditional default in runPreflight (validator F-1) The Sonnet 4.6 read-only validator dispatch flagged that runPreflight had 'result_grouping ?? "merged"' which is dead code on every public path (src/core/actions.js pre-resolves before delegating) but would silently return "merged" for any direct importer with baseline set and no explicit result_grouping. Mirror the worker's conditional default for parity. Validator: dispatched per klappy://canon/constraints/release-validation-gate Rule 2; verdict PASS with this finding. Validator report archived in the session ledger. --- src/tasks/preflight.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tasks/preflight.js b/src/tasks/preflight.js index 3d41f26..4f5abd5 100644 --- a/src/tasks/preflight.js +++ b/src/tasks/preflight.js @@ -163,7 +163,12 @@ function findPitfallDocs(docs, keywords) { */ export async function runPreflight(options) { const { repo: repoRoot, baseline: baselineOverride, message, result_grouping } = options; - const resolvedGrouping = result_grouping ?? "merged"; + // Mirror the worker's conditional default (#150 validator F-1): when a + // baseline override is set, default to overlay_first; otherwise merged. + // src/core/actions.js pre-resolves this before delegating, so the public + // CLI/MCP paths are unaffected. This handles direct importers of + // runPreflight that bypass that pre-resolution. + const resolvedGrouping = result_grouping ?? (baselineOverride ? "overlay_first" : "merged"); // Reuse catalog to get start_here, next_up, canon_by_tag, playbooks const catalogResult = await runCatalog({ From 885fcc9bc3e0ff7f7e45c3c18f83485ccf4083f1 Mon Sep 17 00:00:00 2001 From: klappy Date: Tue, 28 Apr 2026 20:17:36 +0000 Subject: [PATCH 8/8] fix(worker): include result_grouping in search/preflight debug envelopes (Bugbot) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cursor Bugbot finding (Low): worker debug envelopes for runSearch and runPreflight omitted result_grouping while CLI counterparts in src/core/actions.js and src/tasks/preflight.js included it. Closes the parity gap so debug observability is consistent across worker and CLI. The other six Bugbot findings on this PR were point-in-time reviews already addressed by intermediate commits (60fe9b8, ed3dd14, 8cb0b32, 3c8f77f3, 96ef432) — verified by inspecting HEAD bytes against each finding's locator. --- workers/src/orchestrate.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index 78246de..2ca7440 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -1412,6 +1412,7 @@ async function runSearch( baseline_url: index.baseline_url, knowledge_base_url: knowledgeBaseUrl, search_index_size: bm25.N, + result_grouping: resolvedGrouping, duration_ms: Date.now() - startMs, generated_at: new Date().toISOString(), }, @@ -1498,6 +1499,7 @@ async function runSearch( baseline_url: index.baseline_url, knowledge_base_url: knowledgeBaseUrl, search_index_size: bm25.N, + result_grouping: resolvedGrouping, duration_ms: Date.now() - startMs, generated_at: new Date().toISOString(), }, @@ -2451,6 +2453,7 @@ async function runPreflight( debug: { docs_considered: index.entries.length, knowledge_base_url: knowledgeBaseUrl, + result_grouping: resolvedGrouping, duration_ms: Date.now() - startMs, generated_at: new Date().toISOString(), },