From 9d41359a8ec0f745331aa4a0b27528d847b799f1 Mon Sep 17 00:00:00 2001 From: critesjosh Date: Mon, 4 May 2026 16:11:49 +0000 Subject: [PATCH 1/2] fix(error-lookup): filter content-thin semantic chunks + suppress weak catalog when semantic ran MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Companion to docsgpt's apiref-empty-chunk filter (critesjosh/docsgpt-aztec#66). Two layered changes: 1. **Client-side defense-in-depth filter** (`isUsefulSemanticChunk` in `src/tools/error-lookup.ts`). Mirrors the Python helper `_is_empty_apiref_chunk` in docsgpt's `/api/search`: drops chunks whose body — after stripping the rendered file-path heading — is empty or path-only (every line contains `/` and no whitespace). Defense-in-depth because the MCP server can be pointed at any DocsGPT deployment via `API_URL`; a fork or older instance may not have the server-side filter, and a future ingest regression could reintroduce path-only chunks. Critically: legitimate signature-only chunks survive. Filter inspects content shape (whitespace presence in remaining lines), not length — `pub fn poseidon(input: [Field; N]) -> Field` has spaces, so it never trips the path-only test. When all returned chunks are path-only, `lookupAztecError` now reports `semanticHealth: "no_results"` (semantically accurate: the backend ran cleanly but didn't return anything useful) rather than "ok" with three useless paths. 2. **Suppress weak catalog hints when semantic was useful** (`formatErrorLookupResult` in `src/utils/format.ts`). The user- reported anchoring failure: when semantic returns content-bearing chunks AND every catalog match is below the strong-match threshold, the catalog hits are pure noise — the user keeps reading them as "the primary answer" even though semantic gave us the actual answer. New `suppressWeakCatalog` flag hides the catalog section entirely from rendered output in that case. They remain in `result.catalogMatches` for programmatic consumers needing every signal. When semantic was unhelpful (no_results / failed / version mismatch / no client) the weak catalog is KEPT — it's the user's only signal. The "Lower-Confidence Catalog Hints" header + neutral "treat as low-confidence cues only" note frame it honestly. Tests: 282/282 (was 264, +18 across error-lookup + format suites). - `isUsefulSemanticChunk` regression cases: path-only / md-heading- only / completely empty / signature-bearing / doc-comment-bearing / multi-line path re-exports. - `lookupAztecError` integration: all-path-only chunks → no_results, mixed chunks → only useful ones surface. - Suppression matrix: weak + semantic-ok hides catalog; weak + every other state keeps it visible. - Strong catalog matches always render normally regardless of semantic state. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/tools/error-lookup.ts | 60 ++++++++++++++- src/utils/format.ts | 28 +++++-- tests/tools/error-lookup.test.ts | 126 ++++++++++++++++++++++++++++++- tests/utils/format.test.ts | 124 ++++++++++++++++++++++++++++++ 4 files changed, 329 insertions(+), 9 deletions(-) diff --git a/src/tools/error-lookup.ts b/src/tools/error-lookup.ts index 6d706c3..a9ce1f2 100644 --- a/src/tools/error-lookup.ts +++ b/src/tools/error-lookup.ts @@ -31,6 +31,56 @@ import { checkVersionGate, formatMismatchMessage } from "../utils/version-check. */ const STRONG_MATCH_THRESHOLD = 70; +/** + * Drop semantic chunks whose body is empty or just the file path. + * + * Why this exists client-side even though docsgpt's ``/api/search`` + * has its own equivalent guard: defense-in-depth. The MCP server is + * shipped to end users on whatever DocsGPT instance ``API_URL`` + * points at — that backend may not have the latest filter applied, + * may be a self-hosted fork, or may reintroduce the bug in a future + * regression. Filtering on this side keeps the MCP UX safe regardless. + * + * Mirrors the Python helper in ``application/api/answer/routes/search.py`` + * (``_is_empty_apiref_chunk``) — same content-shape predicate. + */ +function isUsefulSemanticChunk(match: SemanticSearchResult): boolean { + const text = (match.text ?? "").trim(); + if (!text) return false; + + const lines = text + .split("\n") + .map((l) => l.trim()) + .filter((l) => l.length > 0); + if (lines.length === 0) return false; + + // Sourceish: any of the strings the rendered file-heading might have + // matched (source path / filename-equivalent title). Strip a leading + // "#" / "##" markdown heading marker before comparing. Deliberately + // does NOT include match.text — that would create a fixed point + // (text === sourceish[0]) that filters single-line legitimate + // chunks that happen to be one line long. + const sourceish = new Set( + [match.source, match.title] + .map((s) => (s ?? "").trim()) + .filter(Boolean) + ); + const firstStripped = lines[0].replace(/^#+\s*/, "").trim(); + let body = lines; + if (sourceish.has(firstStripped) || sourceish.has(lines[0])) { + body = body.slice(1); + } + + if (body.length === 0) return false; + + // Body still looks like a file path: every remaining line is path- + // shaped (contains "/" and no whitespace). A real signature line + // ("pub fn ..., struct Foo, ...") always has whitespace. + if (body.every((l) => l.includes("/") && !/\s/.test(l))) return false; + + return true; +} + export type SemanticHealth = | "ok" // semantic returned results | "no_results" // semantic ran cleanly, returned empty @@ -143,11 +193,19 @@ export async function lookupAztecError( } try { - const semanticResults = await docsgptClient.search( + const rawResults = await docsgptClient.search( `Aztec error: ${query}`, 3 ); + // Filter content-thin / path-only chunks. If the server-side guard + // is in place these will be empty already, but defense-in-depth + // protects against older docsgpt deployments and any future + // regression in the apiref ingest. "Returned 3 chunks but all + // were just file paths" is functionally equivalent to "returned + // nothing useful" and we report it as such. + const semanticResults = rawResults.filter(isUsefulSemanticChunk); + if (semanticResults.length > 0) { return { success: true, diff --git a/src/utils/format.ts b/src/utils/format.ts index 53bd962..2bff566 100644 --- a/src/utils/format.ts +++ b/src/utils/format.ts @@ -187,17 +187,27 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string { const { catalogMatches, codeMatches } = result.result; // When semantic results exist AND every catalog match is below the - // strong-match threshold, the catalog hits are low-confidence cues - // that shouldn't visually dominate the response. Render semantic - // first under "## Related Documentation", and the catalog after - // under "## Lower-Confidence Catalog Hints" so the LLM consumer - // doesn't anchor on a misleading top hit (e.g. "note already - // nullified" matching "Contract already initialized" with score 54). + // strong-match threshold, the catalog hits are low-confidence cues. + // Two cases: + // + // semanticHasResults = true → semantic returned content-bearing + // chunks (the lookupAztecError filter only sets semanticResults + // when at least one chunk passed isUsefulSemanticChunk). The + // weak catalog hint is now actively misleading — the user keeps + // anchoring on it as the "primary answer" even though semantic + // gave us better context. SUPPRESS the catalog section entirely. + // + // semanticHasResults = false → semantic ran but produced nothing + // useful (or didn't run: no client, version mismatch, backend + // failed). The user has no other signal. KEEP the weak catalog + // with a clear "Lower-Confidence Catalog Hints" header so they + // have *something* to look at, framed honestly. const semanticHasResults = !!result.semanticResults && result.semanticResults.length > 0; const catalogIsWeakOnly = catalogMatches.length > 0 && catalogMatches.every((m) => m.score < 70); + const suppressWeakCatalog = catalogIsWeakOnly && semanticHasResults; const renderSemanticFirst = semanticHasResults && catalogIsWeakOnly; function renderSemantic() { @@ -221,6 +231,12 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string { function renderCatalog() { if (catalogMatches.length === 0) return; + // Phase 2 suppression: when semantic returned content-bearing + // chunks AND the catalog is weak-only, the catalog hits are + // pure noise that the user keeps anchoring on. Hide them + // entirely. They remain in `result.catalogMatches` for + // programmatic consumers that need every signal. + if (suppressWeakCatalog) return; lines.push( catalogIsWeakOnly ? "## Lower-Confidence Catalog Hints" diff --git a/tests/tools/error-lookup.test.ts b/tests/tools/error-lookup.test.ts index ed2e785..4123079 100644 --- a/tests/tools/error-lookup.test.ts +++ b/tests/tools/error-lookup.test.ts @@ -292,6 +292,128 @@ describe("lookupAztecError — semantic fallback", () => { }); }); +describe("lookupAztecError — content-thin chunk filter", () => { + /** + * Defense-in-depth filter: even if docsgpt's `/api/search` regresses + * and starts returning path-only / empty-body apiref chunks, + * `isUsefulSemanticChunk` drops them before they're surfaced to the + * LLM consumer. Mirrors the server-side + * `_is_empty_apiref_chunk` helper. + */ + function chunk(text: string, source = "aztec-nr/aztec/src/foo.nr") { + return { text, title: "foo.nr", source }; + } + + it("treats raw output of all path-only chunks as 'no_results'", async () => { + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + chunk("\n\naztec-nr/aztec/src/context/note_existence_request.nr\n\n", + "aztec-nr/aztec/src/context/note_existence_request.nr"), + chunk("\n\naztec-nr/aztec/src/note/hinted_note.nr\n", + "aztec-nr/aztec/src/note/hinted_note.nr"), + ]), + }); + const result = await lookupAztecError({ query: "obscure" }, client); + expect(result.semanticHealth).toBe("no_results"); + expect(result.semanticResults).toBeUndefined(); + }); + + it("keeps mixed results when at least one chunk has substantive body", async () => { + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + chunk("\n\naztec-nr/aztec/src/empty.nr\n", + "aztec-nr/aztec/src/empty.nr"), + chunk( + "# aztec-nr/aztec/src/hash.nr\npub fn poseidon(input: [Field; N]) -> Field", + "aztec-nr/aztec/src/hash.nr" + ), + chunk("\n\naztec-nr/aztec/src/utils.nr\n", + "aztec-nr/aztec/src/utils.nr"), + ]), + }); + const result = await lookupAztecError({ query: "poseidon" }, client); + expect(result.semanticHealth).toBe("ok"); + expect(result.semanticResults).toHaveLength(1); + expect(result.semanticResults![0].text).toContain("poseidon"); + }); +}); + +describe("lookupAztecError — weak catalog suppression when semantic is useful", () => { + /** + * The user-reported "bogus result still appears" failure mode: weak + * catalog hits visible alongside semantic results lets the LLM + * consumer anchor on the wrong answer. When semantic returned + * useful (post-filter) chunks, the weak catalog is now suppressed + * from the rendered output entirely (still present in + * `result.catalogMatches` for programmatic consumers). + * + * This tests the data-shape that the formatter consumes; the + * formatter test (`tests/utils/format.test.ts`) verifies the + * suppression actually happens at render time. + */ + it("returns semanticHealth='ok' with weak catalog still in result.catalogMatches", async () => { + mockLookupError.mockReturnValue({ + query: "note already nullified", + catalogMatches: [ + catalogHit(54, "Contract already initialized", "word-overlap"), + ], + codeMatches: [], + }); + + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + { + text: "Notes in Aztec are nullified by emitting a nullifier...", + title: "Note Lifecycle", + source: "docs/notes.md", + }, + ]), + }); + + const result = await lookupAztecError( + { query: "note already nullified" }, + client + ); + expect(result.semanticHealth).toBe("ok"); + expect(result.semanticResults).toHaveLength(1); + // The weak catalog hit is preserved in the data — the formatter + // is responsible for hiding it. Programmatic consumers can still + // see all signals. + expect(result.result.catalogMatches).toHaveLength(1); + expect(result.result.catalogMatches[0].score).toBe(54); + }); + + it("when semantic is filtered out (all path-only) AND catalog is weak, keeps catalog", async () => { + mockLookupError.mockReturnValue({ + query: "note already nullified", + catalogMatches: [ + catalogHit(54, "Contract already initialized", "word-overlap"), + ], + codeMatches: [], + }); + + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + // Path-only chunks that the filter will drop + { text: "\n\naztec-nr/aztec/src/foo.nr\n", + title: "foo.nr", + source: "aztec-nr/aztec/src/foo.nr" }, + ]), + }); + + const result = await lookupAztecError( + { query: "note already nullified" }, + client + ); + // semantic returned empty (after filter) → no_results + expect(result.semanticHealth).toBe("no_results"); + // Weak catalog stays in the result so the user has *some* signal + expect(result.result.catalogMatches).toHaveLength(1); + expect(result.message).toContain("low-confidence"); + expect(result.message).toMatch(/no relevant documentation|Semantic search/i); + }); +}); + describe("lookupAztecError — semantic failure (sanitized message)", () => { it("sets semanticHealth='failed' and returns sanitized message on 401", async () => { const client = makeClient({ @@ -325,7 +447,7 @@ describe("lookupAztecError — version-mismatch gate", () => { it("blocks semantic fallback when local clone diverges from corpus", async () => { mockGetRepoTag.mockResolvedValue("v4.1.0"); const client = makeClient({ - search: vi.fn().mockResolvedValue([{ text: "x", title: "x", source: "x" }]), + search: vi.fn().mockResolvedValue([{ text: "Some prose body content here.", title: "T", source: "x" }]), getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }), }); @@ -340,7 +462,7 @@ describe("lookupAztecError — version-mismatch gate", () => { mockGetRepoTag.mockResolvedValue("v4.1.0"); const client = makeClient({ search: vi.fn().mockResolvedValue([ - { text: "x", title: "x", source: "x" }, + { text: "Some prose body content here.", title: "T", source: "x" }, ]), getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }), }); diff --git a/tests/utils/format.test.ts b/tests/utils/format.test.ts index f61293c..82f0825 100644 --- a/tests/utils/format.test.ts +++ b/tests/utils/format.test.ts @@ -6,7 +6,9 @@ import { formatExamplesList, formatExampleContent, formatFileContent, + formatErrorLookupResult, } from "../../src/utils/format.js"; +import type { ErrorLookupToolResult } from "../../src/tools/error-lookup.js"; import { MCP_VERSION } from "../../src/version.js"; import { setUpgradeInfo, @@ -257,6 +259,128 @@ describe("formatExampleContent", () => { }); }); +describe("formatErrorLookupResult", () => { + function buildWeak( + semanticResults: { text: string; title: string; source: string }[] | undefined, + health: ErrorLookupToolResult["semanticHealth"] + ): ErrorLookupToolResult { + return { + success: true, + semanticHealth: health, + semanticResults, + message: "msg", + result: { + query: "note already nullified", + codeMatches: [], + catalogMatches: [ + { + entry: { + id: "contract-already-initialized", + name: "Contract already initialized", + patterns: ["already initialized"], + cause: "c", + fix: "f", + category: "contract", + source: "s", + }, + matchType: "word-overlap", + score: 54, + }, + ], + }, + }; + } + + it("suppresses weak catalog hints when semantic returned useful results", () => { + /** + * The user-reported anchoring failure: weak catalog hits visible + * alongside semantic results lets the LLM consumer focus on the + * wrong answer. With useful semantic results, we hide the weak + * catalog from the rendered output entirely. + */ + const result = formatErrorLookupResult( + buildWeak( + [ + { + text: "Notes in Aztec are nullified by emitting a nullifier...", + title: "Note Lifecycle", + source: "docs/notes.md", + }, + ], + "ok" + ) + ); + expect(result).toContain("## Related Documentation"); + expect(result).toContain("Note Lifecycle"); + // Weak catalog: HIDDEN + expect(result).not.toContain("Contract already initialized"); + expect(result).not.toContain("Lower-Confidence Catalog Hints"); + }); + + it("keeps weak catalog when semantic produced nothing useful (no_results)", () => { + /** + * Don't hide the user's only signal. Without semantic content, + * keep the weak catalog visible with a "low-confidence" header. + */ + const result = formatErrorLookupResult( + buildWeak(undefined, "no_results") + ); + expect(result).toContain("## Lower-Confidence Catalog Hints"); + expect(result).toContain("Contract already initialized"); + expect(result).not.toContain("## Related Documentation"); + }); + + it("keeps weak catalog when semantic backend failed", () => { + const result = formatErrorLookupResult( + buildWeak(undefined, "failed") + ); + expect(result).toContain("## Lower-Confidence Catalog Hints"); + expect(result).toContain("Contract already initialized"); + }); + + it("keeps weak catalog when no client (semantic skipped)", () => { + const result = formatErrorLookupResult( + buildWeak(undefined, "skipped") + ); + expect(result).toContain("## Lower-Confidence Catalog Hints"); + expect(result).toContain("Contract already initialized"); + }); + + it("renders strong catalog matches normally regardless of semantic state", () => { + const result = formatErrorLookupResult({ + success: true, + semanticHealth: "ok", + semanticResults: [ + { text: "Some doc text", title: "T", source: "docs/x.md" }, + ], + message: "Found one", + result: { + query: "x", + codeMatches: [], + catalogMatches: [ + { + entry: { + id: "y", + name: "Strong Match", + patterns: ["x"], + cause: "c", + fix: "f", + category: "contract", + source: "s", + }, + matchType: "exact-name", + score: 100, + }, + ], + }, + }); + // Strong catalog → normal "## Known Errors" header, NOT suppressed. + expect(result).toContain("## Known Errors"); + expect(result).toContain("Strong Match"); + expect(result).not.toContain("Lower-Confidence Catalog Hints"); + }); +}); + describe("formatFileContent", () => { it("returns message on failure", () => { const result = formatFileContent({ From 607fbfdcd49c8a19a8d39c41f4ceb84189347090 Mon Sep 17 00:00:00 2001 From: critesjosh Date: Mon, 4 May 2026 16:19:15 +0000 Subject: [PATCH 2/2] =?UTF-8?q?fix(error-lookup):=20simplify=20isUsefulSem?= =?UTF-8?q?anticChunk=20=E2=80=94=20shape-only=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review feedback. Two related issues: 1. Sourceish set used `match.source` and `match.title` to detect a rendered file-path heading line. But `/api/search` rewrites `source` to a public URL (`_aztec_source_url` produces e.g. `https://github.com/.../foo.nr`), so the bare-path heading `aztec-nr/.../foo.nr` never matched the URL — the heading was never stripped, the chunk fell through to the path-shape check which also missed because `# foo/bar.nr` contains whitespace from the markdown marker. Result: a class of empty chunks slipping through both gates. 2. The mitigation — strip a leading `#+ ` from each line before the path-shape predicate — makes the metadata coupling unnecessary. Drop the sourceish comparison entirely. New helper `lineIsPathShaped` strips heading markers, then checks "contains `/` and no whitespace". Real signature lines always have whitespace (`pub fn ...`, `struct ...`, `pub use a::b;`), so they never trip the predicate. Equivalent fix on the docsgpt side: critesjosh/docsgpt-aztec#66 gets the same shape-only simplification. New regression test: chunk with `#`-prefixed heading body and a URL-rewritten source field — the exact failure mode codex described — is correctly identified as "no useful results". 283/283 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/tools/error-lookup.ts | 47 ++++++++++++++++---------------- tests/tools/error-lookup.test.ts | 24 ++++++++++++++++ 2 files changed, 48 insertions(+), 23 deletions(-) diff --git a/src/tools/error-lookup.ts b/src/tools/error-lookup.ts index a9ce1f2..a848220 100644 --- a/src/tools/error-lookup.ts +++ b/src/tools/error-lookup.ts @@ -31,6 +31,20 @@ import { checkVersionGate, formatMismatchMessage } from "../utils/version-check. */ const STRONG_MATCH_THRESHOLD = 70; +/** + * A line is "path-shaped" if it looks like a filesystem path rather + * than a code/docs line. Strips a leading markdown heading marker so + * ``# aztec-nr/.../foo.nr`` is recognized as path-shaped just like + * the bare ``aztec-nr/.../foo.nr``. Path-shaped means: contains ``/`` + * and has no whitespace. Real signature lines (``pub fn foo(...)``, + * ``struct Bar { ... }``, ``pub use a::b;``) always have whitespace, + * so they never trip this predicate. + */ +function lineIsPathShaped(line: string): boolean { + const cleaned = line.replace(/^#+\s*/, "").trim(); + return cleaned.length > 0 && cleaned.includes("/") && !/\s/.test(cleaned); +} + /** * Drop semantic chunks whose body is empty or just the file path. * @@ -43,6 +57,14 @@ const STRONG_MATCH_THRESHOLD = 70; * * Mirrors the Python helper in ``application/api/answer/routes/search.py`` * (``_is_empty_apiref_chunk``) — same content-shape predicate. + * + * The predicate is deliberately metadata-free. An earlier draft used + * ``match.source`` / ``match.title`` as a "heading-equivalent" set + * to strip a rendered file heading before checking the rest, but + * docsgpt's ``/api/search`` rewrites ``source`` to a public URL via + * ``_aztec_source_url`` — so the heading string never matches the + * post-rewrite source field. The shape-only check below works + * regardless of metadata transformations. */ function isUsefulSemanticChunk(match: SemanticSearchResult): boolean { const text = (match.text ?? "").trim(); @@ -54,29 +76,8 @@ function isUsefulSemanticChunk(match: SemanticSearchResult): boolean { .filter((l) => l.length > 0); if (lines.length === 0) return false; - // Sourceish: any of the strings the rendered file-heading might have - // matched (source path / filename-equivalent title). Strip a leading - // "#" / "##" markdown heading marker before comparing. Deliberately - // does NOT include match.text — that would create a fixed point - // (text === sourceish[0]) that filters single-line legitimate - // chunks that happen to be one line long. - const sourceish = new Set( - [match.source, match.title] - .map((s) => (s ?? "").trim()) - .filter(Boolean) - ); - const firstStripped = lines[0].replace(/^#+\s*/, "").trim(); - let body = lines; - if (sourceish.has(firstStripped) || sourceish.has(lines[0])) { - body = body.slice(1); - } - - if (body.length === 0) return false; - - // Body still looks like a file path: every remaining line is path- - // shaped (contains "/" and no whitespace). A real signature line - // ("pub fn ..., struct Foo, ...") always has whitespace. - if (body.every((l) => l.includes("/") && !/\s/.test(l))) return false; + // All non-empty lines are path-shaped → no real API content. + if (lines.every(lineIsPathShaped)) return false; return true; } diff --git a/tests/tools/error-lookup.test.ts b/tests/tools/error-lookup.test.ts index 4123079..0fb8d4b 100644 --- a/tests/tools/error-lookup.test.ts +++ b/tests/tools/error-lookup.test.ts @@ -304,6 +304,30 @@ describe("lookupAztecError — content-thin chunk filter", () => { return { text, title: "foo.nr", source }; } + it("drops chunks with `#`-prefixed path heading even when source field is a public URL", async () => { + /** + * Regression for codex review: `/api/search` rewrites the chunk's + * `source` field to a public URL via `_aztec_source_url`. A chunk + * whose body is `# aztec-nr/.../foo.nr` (path heading only) won't + * match the URL-rewritten source field by string equality. The + * earlier filter would fail to strip the heading, then fall through + * to the path-shape check — which also failed because `# ...` has + * whitespace from the markdown marker. The new shape-only filter + * catches this directly. + */ + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + { + text: "# aztec-nr/aztec/src/context/foo.nr\n", + title: "foo.nr", + source: "https://github.com/AztecProtocol/aztec-packages/blob/v4.2.0/noir-projects/aztec-nr/aztec/src/context/foo.nr", + }, + ]), + }); + const result = await lookupAztecError({ query: "obscure" }, client); + expect(result.semanticHealth).toBe("no_results"); + }); + it("treats raw output of all path-only chunks as 'no_results'", async () => { const client = makeClient({ search: vi.fn().mockResolvedValue([