diff --git a/docs/oddkit/tools/oddkit_preflight.md b/docs/oddkit/tools/oddkit_preflight.md index e36a7358..bc59be5d 100644 --- a/docs/oddkit/tools/oddkit_preflight.md +++ b/docs/oddkit/tools/oddkit_preflight.md @@ -44,6 +44,15 @@ For document deliverables, preflight includes the Writing Canon (`klappy://canon "input": { "type": "string", "description": "Description of what you are about to implement. Be specific — the more detail provided, the more precisely relevant the returned docs and constraints will be." + }, + "knowledge_base_url": { + "type": "string", + "description": "Optional. GitHub repo URL for canon override. Defaults to the configured baseline." + }, + "result_grouping": { + "type": "string", + "enum": ["merged", "overlay_first", "grouped"], + "description": "Optional. Ranking policy for start_here when an overlay (knowledge_base_url) is set. \"merged\" preserves pure relevance ranking. \"overlay_first\" promotes overlay (canon) docs above baseline docs. \"grouped\" additionally returns separate start_here_overlay and start_here_baseline arrays. Conditional default: knowledge_base_url unset → \"merged\"; knowledge_base_url set → \"overlay_first\"." } }, "required": ["input"] @@ -60,6 +69,8 @@ For document deliverables, preflight includes the Writing Canon (`klappy://canon "start_here": [ "string — file paths to docs ranked by relevance to the described task" ], + "start_here_overlay": "array — only present when result_grouping is \"grouped\"; subset of start_here restricted to overlay (source=\"canon\") docs", + "start_here_baseline": "array — only present when result_grouping is \"grouped\"; subset of start_here restricted to baseline docs", "dod": "string — path to the definition of done document", "constraints": [ "string — paths to constraint documents applicable to this task" @@ -69,6 +80,16 @@ For document deliverables, preflight includes the Writing Canon (`klappy://canon } ``` +## Result Grouping (Knowledge Base Overlay) + +When `knowledge_base_url` is set, `start_here` and `constraints` are partitioned the same way `oddkit_search` partitions hits: overlay (`source: "canon"`) docs surface above baseline docs by default. Without this, project-specific governance can be displaced from the top of `start_here` by larger baseline corpora — the contamination shape that `klappy://canon/principles/scoped-truth` names. + +`result_grouping` accepts the same three values as `oddkit_search`: + +- **`"merged"`** — Pure relevance ranking, no partition. Default when `knowledge_base_url` is unset. +- **`"overlay_first"`** — Overlay docs ranked above baseline docs in `start_here` and `constraints`, BM25 ordering preserved within each tier. **Default when `knowledge_base_url` is set.** +- **`"grouped"`** — Adds explicit `start_here_overlay` and `start_here_baseline` arrays so callers can render or reason about the tiers separately. + ## Behavioral Rules 1. **Return start_here docs ranked by relevance.** The most relevant documents appear first. Ranking is based on semantic similarity to the described task, not alphabetical order or recency. @@ -117,3 +138,4 @@ The Progressive Disclosure Failure incident (February 2026) proved that agents w - `klappy://canon/constraints/README` — Index of all constraints, returned when broadly applicable - `klappy://canon/meta/writing-canon` — Progressive disclosure checklist, included for document deliverables - `klappy://canon/epistemic-modes` — Mode obligations that inform what "ready to implement" means +- `klappy://canon/principles/scoped-truth` — The contamination shape that motivated `result_grouping`; ranking precedence between overlay and baseline applies to start_here and constraints diff --git a/docs/oddkit/tools/oddkit_search.md b/docs/oddkit/tools/oddkit_search.md index 9d8fdbd9..e8a4d9d2 100644 --- a/docs/oddkit/tools/oddkit_search.md +++ b/docs/oddkit/tools/oddkit_search.md @@ -48,6 +48,11 @@ Results include both hits (ranked document summaries with scores) and evidence ( "knowledge_base_url": { "type": "string", "description": "Optional. GitHub repo URL for canon override. Defaults to the configured baseline." + }, + "result_grouping": { + "type": "string", + "enum": ["merged", "overlay_first", "grouped"], + "description": "Optional. Ranking policy for results when an overlay (knowledge_base_url) is set. \"merged\" preserves pure BM25 score order. \"overlay_first\" promotes overlay (canon) hits above baseline hits while preserving BM25 score order within each tier. \"grouped\" additionally returns separate overlay_hits and baseline_hits arrays. Conditional default: knowledge_base_url unset → \"merged\" (no behavior change); knowledge_base_url set → \"overlay_first\"." } }, "required": ["input"] @@ -69,14 +74,16 @@ Results include both hits (ranked document summaries with scores) and evidence ( "tags": ["string — tags from frontmatter"], "score": "number — relevance score (higher is more relevant)", "snippet": "string — excerpt from the document", - "source": "baseline" + "source": "canon | baseline" } ], + "overlay_hits": "array — only present when result_grouping is \"grouped\"; same shape as hits, restricted to source=\"canon\"", + "baseline_hits": "array — only present when result_grouping is \"grouped\"; same shape as hits, restricted to source=\"baseline\"", "evidence": [ { "quote": "string — direct quote from the document", "citation": "string — path#Section Name", - "source": "baseline" + "source": "canon | baseline" } ], "docs_considered": "number — total documents in the search index" @@ -84,6 +91,18 @@ Results include both hits (ranked document summaries with scores) and evidence ( } ``` +## Result Grouping (Knowledge Base Overlay) + +When `knowledge_base_url` is set, the search index merges the project's overlay docs (`source: "canon"`) with the configured baseline (`source: "baseline"`). Without ranking guidance, baseline content can outrank project-specific docs simply because the baseline is larger — a contamination shape `klappy://canon/principles/scoped-truth` names as the anti-pattern. + +`result_grouping` controls how this is resolved: + +- **`"merged"`** — Pure BM25 score order. No partition. The previous default for all calls; remains the default when `knowledge_base_url` is unset. +- **`"overlay_first"`** — Stable partition: all `source: "canon"` hits precede all `source: "baseline"` hits. BM25 score order is preserved within each tier, so a uniquely-relevant baseline doc still surfaces — just below the overlay's hits. **This is the default when `knowledge_base_url` is set.** +- **`"grouped"`** — Same ranking as `overlay_first`, plus the response carries explicit `overlay_hits` and `baseline_hits` arrays so callers can render the tiers separately. + +The candidate pool is widened to 50 BM25 results when `result_grouping !== "merged"`, partitioned, then truncated to the response cap of 5. This ensures overlay docs ranked at BM25 position 6+ are visible to the partition rather than truncated before it. + ## Behavioral Rules 1. **Return ranked results by relevance score.** Higher scores indicate stronger semantic match to the query. Results are ordered by descending score. @@ -96,5 +115,6 @@ Results include both hits (ranked document summaries with scores) and evidence ( ## Canon References - `klappy://canon/values/axioms` — Axiom 1 (Reality Is Sovereign) requires retrieval over fabrication +- `klappy://canon/principles/scoped-truth` — The contamination shape that motivated `result_grouping`; ranking precedence between overlay and baseline is the implementation answer - `klappy://canon/constraints/definition-of-done` — Evidence standards that search helps satisfy - `klappy://docs/agents/librarian/trusted-sources` — Citation rules governing how search results should be used