Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions odd/ledger/learnings.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,4 @@
{"type":"D","summary":"E0008 challenge governance refactor: replaced hardcoded detectClaimType logic in runChallengeAction with four governance-driven fetch functions (discoverChallengeTypes, fetchBasePrerequisites, fetchNormativeVocabulary, fetchStakesCalibration). Voice-dump suppression invariant is load-bearing — questionTiers.length === 0 short-circuits all output. Four new caches cleared in runCleanupStorage. tsc clean. PR #100.","rationale":"Hardcoded challenge logic cannot evolve with governance articles; governance-driven extraction means challenge behavior updates when articles update, no code change required. Mirrors PR #96 encode precedent exactly.","context":"workers/src/orchestrate.ts, branch feat/e0008-challenge-governance-driven, commit aa4445c","date":"2026-04-17"}
{"date": "2026-04-24", "epoch": "E0008", "task": "feat/telemetry-semantic-names", "summary": "TypeScript bundler moduleResolution omits .js extensions on local imports in compiled output \u2014 Node.js ESM resolver requires explicit .js suffix. When compiling telemetry.ts for integration tests, all compiled .js files in the build dir must be post-processed to add .js to extensionless relative imports. Patch all files in the build dir, not just telemetry.js.", "detail": "telemetry.ts now imports KnowledgeBaseFetcher (a value import, not just a type import) from zip-baseline-fetcher.ts. The existing integration test only compiled tokenize.ts and telemetry.ts. Adding zip-baseline-fetcher.ts to the tsconfig include list is necessary but insufficient \u2014 the compiled JS has extensionless imports (./zip-baseline-fetcher, ./tracing) that Node ESM cannot resolve. Must patch all .js files in the build dir with a regex replace of from \"./foo\" -> from \"./foo.js\".", "pr": "https://github.com/klappy/oddkit/pull/137"}
{"date": "2026-04-24", "epoch": "E0008", "task": "feat/telemetry-semantic-names", "summary": "JSDoc block comments must not contain */ sequences \u2014 they terminate the comment prematurely. Patterns like blob*/double* in a JSDoc comment cause TypeScript parse errors. Use blob1..9/double1..6 or similar notation instead.", "detail": "detectRawSlotNames JSDoc had blob*/double* which the TypeScript parser reads as end-of-comment at the first */. tsc reported TS1109 (Expression expected) at line 459. The fix is trivial but the error message is cryptic \u2014 the real cause is invisible until you stare at the raw characters.", "pr": "https://github.com/klappy/oddkit/pull/137"}
{"date": "2026-04-28", "epoch": "E0008", "task": "feat/overlay-first-ranking", "summary": "Issue #150 Option D1 — overlay-first re-ranking when knowledge_base_url is set. Pull a 50-result candidate pool from BM25 instead of the default 5, then sort overlay (source: canon) hits above baseline before truncating to top 5. Without the larger pool, an overlay doc that scored at BM25 position 6 would be invisible after re-ranking. Stable sort preserves BM25 order within each tier.", "detail": "Workers: workers/src/orchestrate.ts adds rerankOverlayFirst helper, applies it in runSearch and runPreflight when knowledgeBaseUrl is provided. Node CLI: src/core/actions.js search case mirrors the logic against origin: local|baseline (different field name from the worker's source: canon|baseline). Debug envelope now reports overlay_hits_in_top_n, baseline_hits_in_top_n, overlay_doc_count, baseline_doc_count, result_grouping for §5.4 telemetry. No cache key changes — index is unchanged, only post-retrieval ranking differs. No required-baseline manifest dependency. Unit test workers/test/overlay-first-rerank.test.mjs covers the happy path, both single-tier no-ops, missing-entry default, and immutability.", "pr": "TBD"}
41 changes: 38 additions & 3 deletions src/core/actions.js
Original file line number Diff line number Diff line change
Expand Up @@ -286,9 +286,31 @@ export async function handleAction(params) {
}

const bm25 = getBM25Index(index.documents, baselineSha);
const results = searchBM25(bm25, input, 5);

// Issue #150 Option D1: when the index merges local overlay + remote
// baseline docs, baseline volume drowns out the local KB in BM25
// ranking. Pull a larger candidate pool, then re-rank so origin:
// "local" hits sort above origin: "baseline" hits. BM25 still
// orders within each tier. When the index has only one origin
// (e.g. baseline-only canon_url with no local docs), this is a
// no-op.
const FINAL_LIMIT = 5;
const docMap = new Map(index.documents.map((d) => [d.path, d]));
const hasMixedOrigins =
index.documents.some((d) => d.origin === "local") &&
index.documents.some((d) => d.origin === "baseline");
const candidateLimit = hasMixedOrigins ? 50 : FINAL_LIMIT;
const rawResults = searchBM25(bm25, input, candidateLimit);
const orderedResults = hasMixedOrigins
? [...rawResults].sort((a, b) => {
const ao = docMap.get(a.id)?.origin || "baseline";
const bo = docMap.get(b.id)?.origin || "baseline";
if (ao !== bo) return ao === "local" ? -1 : 1;
return b.score - a.score;
})
: rawResults;
const results = orderedResults.slice(0, FINAL_LIMIT);

const hits = results
.map((r) => {
const doc = docMap.get(r.id);
Expand All @@ -297,6 +319,9 @@ export async function handleAction(params) {
})
.filter(Boolean);

const overlayHitsInTopN = hits.filter((h) => h.origin === "local").length;
const baselineHitsInTopN = hits.filter((h) => h.origin === "baseline").length;

const updatedState = state ? addCanonRefs(initState(state), hits.map((h) => h.path)) : undefined;

if (hits.length === 0) {
Expand All @@ -305,7 +330,12 @@ export async function handleAction(params) {
result: { status: "NO_MATCH", docs_considered: index.documents.length, hits: [] },
state: updatedState,
assistant_text: `Searched ${index.documents.length} documents but found no matches for "${input}". Try rephrasing or use action "catalog".`,
debug: makeDebug({ search_index_size: bm25.N }),
debug: makeDebug({
search_index_size: bm25.N,
overlay_hits_in_top_n: 0,
baseline_hits_in_top_n: 0,
result_grouping: hasMixedOrigins ? "overlay_first" : "default",
}),
};
}

Expand Down Expand Up @@ -358,7 +388,12 @@ export async function handleAction(params) {
},
state: updatedState,
assistant_text: assistantLines.join("\n").trim(),
debug: makeDebug({ search_index_size: bm25.N }),
debug: makeDebug({
search_index_size: bm25.N,
overlay_hits_in_top_n: overlayHitsInTopN,
baseline_hits_in_top_n: baselineHitsInTopN,
result_grouping: hasMixedOrigins ? "overlay_first" : "default",
}),
};
}

Expand Down
105 changes: 100 additions & 5 deletions workers/src/orchestrate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1320,6 +1320,42 @@ function scoreEntries(entries: IndexEntry[], query: string): Array<IndexEntry &
.sort((a, b) => b.score - a.score);
}

// ──────────────────────────────────────────────────────────────────────────────
// Overlay-first re-ranking (Option D1 from issue #150)
//
// When a caller provides knowledge_base_url, the search index merges two
// corpora: the project KB overlay (source: "canon") and the klappy.dev
// baseline (source: "baseline"). The baseline is large (~566 docs) relative
// to typical project overlays (~20 docs), so plain BM25 ranking lets baseline
// content monopolize the top of the result list — the contamination shape
// klappy://canon/principles/scoped-truth names as the anti-pattern.
//
// Fix: when knowledge_base_url is set, re-rank so all overlay hits sort
// above all baseline hits. BM25 still orders within each tier, so a baseline
// doc that uniquely matches the query still surfaces — just below the
// overlay's hits. When knowledge_base_url is NOT set (default canon only),
// every entry has source: "canon" and this is a no-op.
//
// This is purely a ranking-layer change; the index itself is unchanged.
// No cache key changes. No new required-baseline classification. No epoch
// bump per governance-change-discipline. See issue #150 §4 Option D1.
// ──────────────────────────────────────────────────────────────────────────────

function rerankOverlayFirst(
results: Array<{ id: string; score: number }>,
entryMap: Map<string, IndexEntry>,
): Array<{ id: string; score: number }> {
return [...results].sort((a, b) => {
const aSource = entryMap.get(a.id)?.source ?? "baseline";
const bSource = entryMap.get(b.id)?.source ?? "baseline";
if (aSource !== bSource) {
// Overlay (canon) wins ties on tier; preserve BM25 order within tier.
return aSource === "canon" ? -1 : 1;
}
return b.score - a.score;
});
}

// ──────────────────────────────────────────────────────────────────────────────
// Individual action handlers
// ──────────────────────────────────────────────────────────────────────────────
Expand All @@ -1334,10 +1370,24 @@ async function runSearch(
const startMs = Date.now();
const index = await fetcher.getIndex(knowledgeBaseUrl);
const bm25 = getBM25Index(index.entries);
const results = searchBM25(bm25, input, 5);

// Issue #150 Option D1: when a project KB overlay is present, retrieve a
// larger candidate pool from BM25 then re-rank to put overlay (source:
// "canon") hits above baseline. With the default 5-result cap, an overlay
// doc that scored at position 6 would be invisible even after re-ranking.
// Pulling 50 candidates is enough headroom for any realistic overlay
// without indexing the entire corpus.
const FINAL_LIMIT = 5;
const candidateLimit = knowledgeBaseUrl ? 50 : FINAL_LIMIT;
const rawResults = searchBM25(bm25, input, candidateLimit);

// Map scores back to entries
const entryMap = new Map(index.entries.map((e) => [e.path, e]));

const results = knowledgeBaseUrl
? rerankOverlayFirst(rawResults, entryMap).slice(0, FINAL_LIMIT)
: rawResults.slice(0, FINAL_LIMIT);

const hits = results
.map((r) => {
const entry = entryMap.get(r.id);
Expand All @@ -1346,6 +1396,10 @@ async function runSearch(
})
.filter(Boolean) as Array<IndexEntry & { score: number }>;

// Per-tier counts for telemetry / debug observability (issue #150 §5.4).
const overlayHitsInTopN = hits.filter((h) => h.source === "canon").length;
const baselineHitsInTopN = hits.filter((h) => h.source === "baseline").length;

Comment thread
cursor[bot] marked this conversation as resolved.
const updatedState = state
? addCanonRefs(
initState(state),
Expand All @@ -1367,6 +1421,11 @@ async function runSearch(
baseline_url: index.baseline_url,
knowledge_base_url: knowledgeBaseUrl,
search_index_size: bm25.N,
overlay_doc_count: index.stats.canon,
baseline_doc_count: index.stats.baseline,
overlay_hits_in_top_n: 0,
baseline_hits_in_top_n: 0,
result_grouping: knowledgeBaseUrl ? "overlay_first" : "default",
duration_ms: Date.now() - startMs,
generated_at: new Date().toISOString(),
},
Expand Down Expand Up @@ -1439,6 +1498,11 @@ async function runSearch(
baseline_url: index.baseline_url,
knowledge_base_url: knowledgeBaseUrl,
search_index_size: bm25.N,
overlay_doc_count: index.stats.canon,
baseline_doc_count: index.stats.baseline,
overlay_hits_in_top_n: overlayHitsInTopN,
baseline_hits_in_top_n: baselineHitsInTopN,
result_grouping: knowledgeBaseUrl ? "overlay_first" : "default",
duration_ms: Date.now() - startMs,
generated_at: new Date().toISOString(),
},
Expand Down Expand Up @@ -2331,12 +2395,38 @@ async function runPreflight(
const startMs = Date.now();
const index = await fetcher.getIndex(knowledgeBaseUrl);
const topic = message.replace(/^preflight:\s*/i, "").trim();
const results = scoreEntries(index.entries, topic).slice(0, 5);
const scored = scoreEntries(index.entries, topic);

// Issue #150 Option D1: when an overlay is present, surface project KB
// start-here docs above baseline. Stable sort preserves the heuristic
// ranking within each tier.
const orderedScored = knowledgeBaseUrl
? [...scored].sort((a, b) => {
if (a.source !== b.source) return a.source === "canon" ? -1 : 1;
return b.score - a.score;
})
: scored;
const results = orderedScored.slice(0, 5);

// Constraints: prefer overlay constraint docs when an overlay is present —
// otherwise the baseline's generic constraints (ai-voice-cliches,
// author-identity-language, etc.) crowd out project-specific governance.
const allConstraints = index.entries.filter(
(e) => e.path.includes("constraint") || e.authority_band === "governing",
);
const orderedConstraints = knowledgeBaseUrl
? [...allConstraints].sort((a, b) => {
if (a.source !== b.source) return a.source === "canon" ? -1 : 1;
return 0;
})
: allConstraints;
const constraints = orderedConstraints.slice(0, 3);

// Issue #150 §5.4 — per-tier counts for telemetry/debug observability.
const overlayHitsInTopN = results.filter((r) => r.source === "canon").length;
const baselineHitsInTopN = results.filter((r) => r.source === "baseline").length;

const dodEntry = index.entries.find((e) => e.path.toLowerCase().includes("definition-of-done"));
const constraints = index.entries
.filter((e) => e.path.includes("constraint") || e.authority_band === "governing")
.slice(0, 3);

const assistantText = [
`Preflight: ${topic}`,
Expand Down Expand Up @@ -2372,6 +2462,11 @@ async function runPreflight(
debug: {
docs_considered: index.entries.length,
knowledge_base_url: knowledgeBaseUrl,
overlay_doc_count: index.stats.canon,
baseline_doc_count: index.stats.baseline,
overlay_hits_in_top_n: overlayHitsInTopN,
baseline_hits_in_top_n: baselineHitsInTopN,
result_grouping: knowledgeBaseUrl ? "overlay_first" : "default",
duration_ms: Date.now() - startMs,
generated_at: new Date().toISOString(),
},
Expand Down
Loading