diff --git a/odd/ledger/learnings.jsonl b/odd/ledger/learnings.jsonl index 3e4cd07..3294843 100644 --- a/odd/ledger/learnings.jsonl +++ b/odd/ledger/learnings.jsonl @@ -40,3 +40,4 @@ {"type":"D","summary":"E0008 challenge governance refactor: replaced hardcoded detectClaimType logic in runChallengeAction with four governance-driven fetch functions (discoverChallengeTypes, fetchBasePrerequisites, fetchNormativeVocabulary, fetchStakesCalibration). Voice-dump suppression invariant is load-bearing — questionTiers.length === 0 short-circuits all output. Four new caches cleared in runCleanupStorage. tsc clean. PR #100.","rationale":"Hardcoded challenge logic cannot evolve with governance articles; governance-driven extraction means challenge behavior updates when articles update, no code change required. Mirrors PR #96 encode precedent exactly.","context":"workers/src/orchestrate.ts, branch feat/e0008-challenge-governance-driven, commit aa4445c","date":"2026-04-17"} {"date": "2026-04-24", "epoch": "E0008", "task": "feat/telemetry-semantic-names", "summary": "TypeScript bundler moduleResolution omits .js extensions on local imports in compiled output \u2014 Node.js ESM resolver requires explicit .js suffix. When compiling telemetry.ts for integration tests, all compiled .js files in the build dir must be post-processed to add .js to extensionless relative imports. Patch all files in the build dir, not just telemetry.js.", "detail": "telemetry.ts now imports KnowledgeBaseFetcher (a value import, not just a type import) from zip-baseline-fetcher.ts. The existing integration test only compiled tokenize.ts and telemetry.ts. Adding zip-baseline-fetcher.ts to the tsconfig include list is necessary but insufficient \u2014 the compiled JS has extensionless imports (./zip-baseline-fetcher, ./tracing) that Node ESM cannot resolve. Must patch all .js files in the build dir with a regex replace of from \"./foo\" -> from \"./foo.js\".", "pr": "https://github.com/klappy/oddkit/pull/137"} {"date": "2026-04-24", "epoch": "E0008", "task": "feat/telemetry-semantic-names", "summary": "JSDoc block comments must not contain */ sequences \u2014 they terminate the comment prematurely. Patterns like blob*/double* in a JSDoc comment cause TypeScript parse errors. Use blob1..9/double1..6 or similar notation instead.", "detail": "detectRawSlotNames JSDoc had blob*/double* which the TypeScript parser reads as end-of-comment at the first */. tsc reported TS1109 (Expression expected) at line 459. The fix is trivial but the error message is cryptic \u2014 the real cause is invisible until you stare at the raw characters.", "pr": "https://github.com/klappy/oddkit/pull/137"} +{"date": "2026-04-28", "epoch": "E0008", "task": "feat/overlay-first-ranking", "summary": "Issue #150 Option D1 — overlay-first re-ranking when knowledge_base_url is set. Pull a 50-result candidate pool from BM25 instead of the default 5, then sort overlay (source: canon) hits above baseline before truncating to top 5. Without the larger pool, an overlay doc that scored at BM25 position 6 would be invisible after re-ranking. Stable sort preserves BM25 order within each tier.", "detail": "Workers: workers/src/orchestrate.ts adds rerankOverlayFirst helper, applies it in runSearch and runPreflight when knowledgeBaseUrl is provided. Node CLI: src/core/actions.js search case mirrors the logic against origin: local|baseline (different field name from the worker's source: canon|baseline). Debug envelope now reports overlay_hits_in_top_n, baseline_hits_in_top_n, overlay_doc_count, baseline_doc_count, result_grouping for §5.4 telemetry. No cache key changes — index is unchanged, only post-retrieval ranking differs. No required-baseline manifest dependency. Unit test workers/test/overlay-first-rerank.test.mjs covers the happy path, both single-tier no-ops, missing-entry default, and immutability.", "pr": "TBD"} diff --git a/src/core/actions.js b/src/core/actions.js index 689395f..cc8ebb3 100644 --- a/src/core/actions.js +++ b/src/core/actions.js @@ -286,9 +286,31 @@ export async function handleAction(params) { } const bm25 = getBM25Index(index.documents, baselineSha); - const results = searchBM25(bm25, input, 5); + // Issue #150 Option D1: when the index merges local overlay + remote + // baseline docs, baseline volume drowns out the local KB in BM25 + // ranking. Pull a larger candidate pool, then re-rank so origin: + // "local" hits sort above origin: "baseline" hits. BM25 still + // orders within each tier. When the index has only one origin + // (e.g. baseline-only canon_url with no local docs), this is a + // no-op. + const FINAL_LIMIT = 5; const docMap = new Map(index.documents.map((d) => [d.path, d])); + const hasMixedOrigins = + index.documents.some((d) => d.origin === "local") && + index.documents.some((d) => d.origin === "baseline"); + const candidateLimit = hasMixedOrigins ? 50 : FINAL_LIMIT; + const rawResults = searchBM25(bm25, input, candidateLimit); + const orderedResults = hasMixedOrigins + ? [...rawResults].sort((a, b) => { + const ao = docMap.get(a.id)?.origin || "baseline"; + const bo = docMap.get(b.id)?.origin || "baseline"; + if (ao !== bo) return ao === "local" ? -1 : 1; + return b.score - a.score; + }) + : rawResults; + const results = orderedResults.slice(0, FINAL_LIMIT); + const hits = results .map((r) => { const doc = docMap.get(r.id); @@ -297,6 +319,9 @@ export async function handleAction(params) { }) .filter(Boolean); + const overlayHitsInTopN = hits.filter((h) => h.origin === "local").length; + const baselineHitsInTopN = hits.filter((h) => h.origin === "baseline").length; + const updatedState = state ? addCanonRefs(initState(state), hits.map((h) => h.path)) : undefined; if (hits.length === 0) { @@ -305,7 +330,12 @@ export async function handleAction(params) { result: { status: "NO_MATCH", docs_considered: index.documents.length, hits: [] }, state: updatedState, assistant_text: `Searched ${index.documents.length} documents but found no matches for "${input}". Try rephrasing or use action "catalog".`, - debug: makeDebug({ search_index_size: bm25.N }), + debug: makeDebug({ + search_index_size: bm25.N, + overlay_hits_in_top_n: 0, + baseline_hits_in_top_n: 0, + result_grouping: hasMixedOrigins ? "overlay_first" : "default", + }), }; } @@ -358,7 +388,12 @@ export async function handleAction(params) { }, state: updatedState, assistant_text: assistantLines.join("\n").trim(), - debug: makeDebug({ search_index_size: bm25.N }), + debug: makeDebug({ + search_index_size: bm25.N, + overlay_hits_in_top_n: overlayHitsInTopN, + baseline_hits_in_top_n: baselineHitsInTopN, + result_grouping: hasMixedOrigins ? "overlay_first" : "default", + }), }; } diff --git a/workers/src/orchestrate.ts b/workers/src/orchestrate.ts index c99827f..0247705 100644 --- a/workers/src/orchestrate.ts +++ b/workers/src/orchestrate.ts @@ -1320,6 +1320,42 @@ function scoreEntries(entries: IndexEntry[], query: string): Array b.score - a.score); } +// ────────────────────────────────────────────────────────────────────────────── +// Overlay-first re-ranking (Option D1 from issue #150) +// +// When a caller provides knowledge_base_url, the search index merges two +// corpora: the project KB overlay (source: "canon") and the klappy.dev +// baseline (source: "baseline"). The baseline is large (~566 docs) relative +// to typical project overlays (~20 docs), so plain BM25 ranking lets baseline +// content monopolize the top of the result list — the contamination shape +// klappy://canon/principles/scoped-truth names as the anti-pattern. +// +// Fix: when knowledge_base_url is set, re-rank so all overlay hits sort +// above all baseline hits. BM25 still orders within each tier, so a baseline +// doc that uniquely matches the query still surfaces — just below the +// overlay's hits. When knowledge_base_url is NOT set (default canon only), +// every entry has source: "canon" and this is a no-op. +// +// This is purely a ranking-layer change; the index itself is unchanged. +// No cache key changes. No new required-baseline classification. No epoch +// bump per governance-change-discipline. See issue #150 §4 Option D1. +// ────────────────────────────────────────────────────────────────────────────── + +function rerankOverlayFirst( + results: Array<{ id: string; score: number }>, + entryMap: Map, +): Array<{ id: string; score: number }> { + return [...results].sort((a, b) => { + const aSource = entryMap.get(a.id)?.source ?? "baseline"; + const bSource = entryMap.get(b.id)?.source ?? "baseline"; + if (aSource !== bSource) { + // Overlay (canon) wins ties on tier; preserve BM25 order within tier. + return aSource === "canon" ? -1 : 1; + } + return b.score - a.score; + }); +} + // ────────────────────────────────────────────────────────────────────────────── // Individual action handlers // ────────────────────────────────────────────────────────────────────────────── @@ -1334,10 +1370,24 @@ async function runSearch( const startMs = Date.now(); const index = await fetcher.getIndex(knowledgeBaseUrl); const bm25 = getBM25Index(index.entries); - const results = searchBM25(bm25, input, 5); + + // Issue #150 Option D1: when a project KB overlay is present, retrieve a + // larger candidate pool from BM25 then re-rank to put overlay (source: + // "canon") hits above baseline. With the default 5-result cap, an overlay + // doc that scored at position 6 would be invisible even after re-ranking. + // Pulling 50 candidates is enough headroom for any realistic overlay + // without indexing the entire corpus. + const FINAL_LIMIT = 5; + const candidateLimit = knowledgeBaseUrl ? 50 : FINAL_LIMIT; + const rawResults = searchBM25(bm25, input, candidateLimit); // Map scores back to entries const entryMap = new Map(index.entries.map((e) => [e.path, e])); + + const results = knowledgeBaseUrl + ? rerankOverlayFirst(rawResults, entryMap).slice(0, FINAL_LIMIT) + : rawResults.slice(0, FINAL_LIMIT); + const hits = results .map((r) => { const entry = entryMap.get(r.id); @@ -1346,6 +1396,10 @@ async function runSearch( }) .filter(Boolean) as Array; + // Per-tier counts for telemetry / debug observability (issue #150 §5.4). + const overlayHitsInTopN = hits.filter((h) => h.source === "canon").length; + const baselineHitsInTopN = hits.filter((h) => h.source === "baseline").length; + const updatedState = state ? addCanonRefs( initState(state), @@ -1367,6 +1421,11 @@ async function runSearch( baseline_url: index.baseline_url, knowledge_base_url: knowledgeBaseUrl, search_index_size: bm25.N, + overlay_doc_count: index.stats.canon, + baseline_doc_count: index.stats.baseline, + overlay_hits_in_top_n: 0, + baseline_hits_in_top_n: 0, + result_grouping: knowledgeBaseUrl ? "overlay_first" : "default", duration_ms: Date.now() - startMs, generated_at: new Date().toISOString(), }, @@ -1439,6 +1498,11 @@ async function runSearch( baseline_url: index.baseline_url, knowledge_base_url: knowledgeBaseUrl, search_index_size: bm25.N, + overlay_doc_count: index.stats.canon, + baseline_doc_count: index.stats.baseline, + overlay_hits_in_top_n: overlayHitsInTopN, + baseline_hits_in_top_n: baselineHitsInTopN, + result_grouping: knowledgeBaseUrl ? "overlay_first" : "default", duration_ms: Date.now() - startMs, generated_at: new Date().toISOString(), }, @@ -2331,12 +2395,38 @@ async function runPreflight( const startMs = Date.now(); const index = await fetcher.getIndex(knowledgeBaseUrl); const topic = message.replace(/^preflight:\s*/i, "").trim(); - const results = scoreEntries(index.entries, topic).slice(0, 5); + const scored = scoreEntries(index.entries, topic); + + // Issue #150 Option D1: when an overlay is present, surface project KB + // start-here docs above baseline. Stable sort preserves the heuristic + // ranking within each tier. + const orderedScored = knowledgeBaseUrl + ? [...scored].sort((a, b) => { + if (a.source !== b.source) return a.source === "canon" ? -1 : 1; + return b.score - a.score; + }) + : scored; + const results = orderedScored.slice(0, 5); + + // Constraints: prefer overlay constraint docs when an overlay is present — + // otherwise the baseline's generic constraints (ai-voice-cliches, + // author-identity-language, etc.) crowd out project-specific governance. + const allConstraints = index.entries.filter( + (e) => e.path.includes("constraint") || e.authority_band === "governing", + ); + const orderedConstraints = knowledgeBaseUrl + ? [...allConstraints].sort((a, b) => { + if (a.source !== b.source) return a.source === "canon" ? -1 : 1; + return 0; + }) + : allConstraints; + const constraints = orderedConstraints.slice(0, 3); + + // Issue #150 §5.4 — per-tier counts for telemetry/debug observability. + const overlayHitsInTopN = results.filter((r) => r.source === "canon").length; + const baselineHitsInTopN = results.filter((r) => r.source === "baseline").length; const dodEntry = index.entries.find((e) => e.path.toLowerCase().includes("definition-of-done")); - const constraints = index.entries - .filter((e) => e.path.includes("constraint") || e.authority_band === "governing") - .slice(0, 3); const assistantText = [ `Preflight: ${topic}`, @@ -2372,6 +2462,11 @@ async function runPreflight( debug: { docs_considered: index.entries.length, knowledge_base_url: knowledgeBaseUrl, + overlay_doc_count: index.stats.canon, + baseline_doc_count: index.stats.baseline, + overlay_hits_in_top_n: overlayHitsInTopN, + baseline_hits_in_top_n: baselineHitsInTopN, + result_grouping: knowledgeBaseUrl ? "overlay_first" : "default", duration_ms: Date.now() - startMs, generated_at: new Date().toISOString(), }, diff --git a/workers/test/overlay-first-rerank.test.mjs b/workers/test/overlay-first-rerank.test.mjs new file mode 100644 index 0000000..5a33bbb --- /dev/null +++ b/workers/test/overlay-first-rerank.test.mjs @@ -0,0 +1,230 @@ +#!/usr/bin/env node +/** + * Unit test for overlay-first re-ranking when knowledge_base_url is set. + * + * Issue #150 — Option D1: when a project KB overlay is merged with the + * klappy.dev baseline corpus, search/preflight must surface overlay docs + * (source: "canon") above baseline docs (source: "baseline"). BM25 still + * orders within each tier, so a uniquely-relevant baseline doc still + * surfaces — just below the overlay's hits. + * + * The helper under test is `rerankOverlayFirst` in orchestrate.ts. We + * exercise it via small synthetic corpora that mirror the contamination + * shape from the issue: an overlay doc that scored just below a flood of + * baseline hits should bubble to the top after re-ranking. + * + * The compile-then-import approach mirrors tokenize.test.mjs. + */ + +import assert from "node:assert/strict"; +import { spawnSync } from "node:child_process"; +import { mkdtempSync, writeFileSync, symlinkSync, existsSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const WORKERS_ROOT = join(__dirname, ".."); +const SRC_DIR = join(WORKERS_ROOT, "src"); + +// Minimal harness: copy just the helper out of orchestrate.ts so we can +// compile it standalone without dragging in the full Worker dependency +// graph (env types, agents SDK, KV bindings, etc.). +const HARNESS_TS = ` +export interface Entry { path: string; source: "canon" | "baseline"; } + +export function rerankOverlayFirst( + results: Array<{ id: string; score: number }>, + entryMap: Map, +): Array<{ id: string; score: number }> { + return [...results].sort((a, b) => { + const aSource = entryMap.get(a.id)?.source ?? "baseline"; + const bSource = entryMap.get(b.id)?.source ?? "baseline"; + if (aSource !== bSource) { + return aSource === "canon" ? -1 : 1; + } + return b.score - a.score; + }); +} +`; + +const tmp = mkdtempSync(join(tmpdir(), "oddkit-rerank-test-")); +const srcDir = join(tmp, "src"); +const outDir = join(tmp, "out"); +const { mkdirSync } = await import("node:fs"); +mkdirSync(srcDir, { recursive: true }); +mkdirSync(outDir, { recursive: true }); +const harnessPath = join(srcDir, "rerank.ts"); +writeFileSync(harnessPath, HARNESS_TS); + +const tsconfig = { + compilerOptions: { + target: "ES2022", + module: "ES2022", + moduleResolution: "bundler", + lib: ["ES2022", "DOM"], + types: [], + strict: false, + skipLibCheck: true, + rootDir: srcDir, + outDir, + }, + include: [harnessPath], +}; +const tsconfigPath = join(tmp, "tsconfig.json"); +writeFileSync(tsconfigPath, JSON.stringify(tsconfig, null, 2)); + +const tmpNodeModules = join(tmp, "node_modules"); +if (!existsSync(tmpNodeModules)) { + symlinkSync(join(WORKERS_ROOT, "node_modules"), tmpNodeModules); +} + +// Verify the canonical helper text in orchestrate.ts matches the harness so +// that this test stays meaningful when the helper is edited. +const { readFileSync } = await import("node:fs"); +const orchestrateSrc = readFileSync(join(SRC_DIR, "orchestrate.ts"), "utf8"); +assert.ok( + /function rerankOverlayFirst\(/.test(orchestrateSrc), + "orchestrate.ts must export rerankOverlayFirst (otherwise this test is testing a stale shape)", +); +assert.ok( + /aSource === "canon" \? -1 : 1/.test(orchestrateSrc), + "orchestrate.ts rerankOverlayFirst must promote canon over baseline", +); + +const compile = spawnSync("npx", ["--yes", "tsc", "-p", tsconfigPath], { + encoding: "utf8", +}); +if (compile.status !== 0) { + console.error("TypeScript compile failed:"); + console.error(compile.stdout); + console.error(compile.stderr); + process.exit(1); +} + +const compiledPath = join(outDir, "rerank.js"); +const { rerankOverlayFirst } = await import(compiledPath); + +let pass = 0; +let fail = 0; + +async function test(name, fn) { + try { + await fn(); + console.log(` \u2713 ${name}`); + pass++; + } catch (err) { + console.log(` \u2717 ${name}`); + console.log(` ${err.message}`); + fail++; + } +} + +console.log("overlay-first re-rank unit tests"); + +await test("overlay hit bubbles above higher-scored baseline hits", async () => { + // Mirrors issue #150 §1: baseline docs dominate BM25 even when an + // overlay doc has the better answer. Pre-rerank order is BM25 score: + // baseline:7, baseline:6, overlay:4.5, baseline:3. + const entries = new Map([ + ["baseline/a", { path: "baseline/a", source: "baseline" }], + ["baseline/b", { path: "baseline/b", source: "baseline" }], + ["overlay/spec", { path: "overlay/spec", source: "canon" }], + ["baseline/c", { path: "baseline/c", source: "baseline" }], + ]); + const input = [ + { id: "baseline/a", score: 7 }, + { id: "baseline/b", score: 6 }, + { id: "overlay/spec", score: 4.5 }, + { id: "baseline/c", score: 3 }, + ]; + const out = rerankOverlayFirst(input, entries); + assert.equal(out[0].id, "overlay/spec", "overlay must rank first"); + assert.equal(out[1].id, "baseline/a", "baseline order preserved within tier"); + assert.equal(out[2].id, "baseline/b"); + assert.equal(out[3].id, "baseline/c"); +}); + +await test("multiple overlay hits keep their BM25 order", async () => { + const entries = new Map([ + ["overlay/a", { path: "overlay/a", source: "canon" }], + ["overlay/b", { path: "overlay/b", source: "canon" }], + ["baseline/x", { path: "baseline/x", source: "baseline" }], + ]); + const input = [ + { id: "baseline/x", score: 9 }, + { id: "overlay/a", score: 5 }, + { id: "overlay/b", score: 6 }, + ]; + const out = rerankOverlayFirst(input, entries); + // Overlay tier first, ordered by score within: b (6) > a (5). + assert.deepEqual( + out.map((r) => r.id), + ["overlay/b", "overlay/a", "baseline/x"], + ); +}); + +await test("baseline-only corpus is a no-op (preserves BM25 order)", async () => { + const entries = new Map([ + ["baseline/a", { path: "baseline/a", source: "baseline" }], + ["baseline/b", { path: "baseline/b", source: "baseline" }], + ]); + const input = [ + { id: "baseline/b", score: 8 }, + { id: "baseline/a", score: 4 }, + ]; + const out = rerankOverlayFirst(input, entries); + assert.deepEqual( + out.map((r) => r.id), + ["baseline/b", "baseline/a"], + ); +}); + +await test("overlay-only corpus is a no-op (preserves BM25 order)", async () => { + const entries = new Map([ + ["overlay/a", { path: "overlay/a", source: "canon" }], + ["overlay/b", { path: "overlay/b", source: "canon" }], + ]); + const input = [ + { id: "overlay/a", score: 7 }, + { id: "overlay/b", score: 2 }, + ]; + const out = rerankOverlayFirst(input, entries); + assert.deepEqual( + out.map((r) => r.id), + ["overlay/a", "overlay/b"], + ); +}); + +await test("missing entry is treated as baseline (defensive default)", async () => { + const entries = new Map([ + ["overlay/spec", { path: "overlay/spec", source: "canon" }], + ]); + const input = [ + { id: "ghost", score: 99 }, + { id: "overlay/spec", score: 1 }, + ]; + const out = rerankOverlayFirst(input, entries); + // Unknown entries default to baseline tier, so overlay/spec wins + // despite the lower BM25 score. This protects against an entry being + // deleted from the index between BM25 ranking and the re-rank step. + assert.equal(out[0].id, "overlay/spec"); + assert.equal(out[1].id, "ghost"); +}); + +await test("does not mutate the input array", async () => { + const entries = new Map([ + ["overlay/a", { path: "overlay/a", source: "canon" }], + ["baseline/x", { path: "baseline/x", source: "baseline" }], + ]); + const input = [ + { id: "baseline/x", score: 9 }, + { id: "overlay/a", score: 1 }, + ]; + const snapshot = input.map((r) => ({ ...r })); + rerankOverlayFirst(input, entries); + assert.deepEqual(input, snapshot, "input array must not be mutated"); +}); + +console.log(`\n${pass} passed, ${fail} failed`); +process.exit(fail > 0 ? 1 : 0);