diff --git a/src/tools/error-lookup.ts b/src/tools/error-lookup.ts index 6d706c3..a848220 100644 --- a/src/tools/error-lookup.ts +++ b/src/tools/error-lookup.ts @@ -31,6 +31,57 @@ import { checkVersionGate, formatMismatchMessage } from "../utils/version-check. */ const STRONG_MATCH_THRESHOLD = 70; +/** + * A line is "path-shaped" if it looks like a filesystem path rather + * than a code/docs line. Strips a leading markdown heading marker so + * ``# aztec-nr/.../foo.nr`` is recognized as path-shaped just like + * the bare ``aztec-nr/.../foo.nr``. Path-shaped means: contains ``/`` + * and has no whitespace. Real signature lines (``pub fn foo(...)``, + * ``struct Bar { ... }``, ``pub use a::b;``) always have whitespace, + * so they never trip this predicate. + */ +function lineIsPathShaped(line: string): boolean { + const cleaned = line.replace(/^#+\s*/, "").trim(); + return cleaned.length > 0 && cleaned.includes("/") && !/\s/.test(cleaned); +} + +/** + * Drop semantic chunks whose body is empty or just the file path. + * + * Why this exists client-side even though docsgpt's ``/api/search`` + * has its own equivalent guard: defense-in-depth. The MCP server is + * shipped to end users on whatever DocsGPT instance ``API_URL`` + * points at — that backend may not have the latest filter applied, + * may be a self-hosted fork, or may reintroduce the bug in a future + * regression. Filtering on this side keeps the MCP UX safe regardless. + * + * Mirrors the Python helper in ``application/api/answer/routes/search.py`` + * (``_is_empty_apiref_chunk``) — same content-shape predicate. + * + * The predicate is deliberately metadata-free. An earlier draft used + * ``match.source`` / ``match.title`` as a "heading-equivalent" set + * to strip a rendered file heading before checking the rest, but + * docsgpt's ``/api/search`` rewrites ``source`` to a public URL via + * ``_aztec_source_url`` — so the heading string never matches the + * post-rewrite source field. The shape-only check below works + * regardless of metadata transformations. + */ +function isUsefulSemanticChunk(match: SemanticSearchResult): boolean { + const text = (match.text ?? "").trim(); + if (!text) return false; + + const lines = text + .split("\n") + .map((l) => l.trim()) + .filter((l) => l.length > 0); + if (lines.length === 0) return false; + + // All non-empty lines are path-shaped → no real API content. + if (lines.every(lineIsPathShaped)) return false; + + return true; +} + export type SemanticHealth = | "ok" // semantic returned results | "no_results" // semantic ran cleanly, returned empty @@ -143,11 +194,19 @@ export async function lookupAztecError( } try { - const semanticResults = await docsgptClient.search( + const rawResults = await docsgptClient.search( `Aztec error: ${query}`, 3 ); + // Filter content-thin / path-only chunks. If the server-side guard + // is in place these will be empty already, but defense-in-depth + // protects against older docsgpt deployments and any future + // regression in the apiref ingest. "Returned 3 chunks but all + // were just file paths" is functionally equivalent to "returned + // nothing useful" and we report it as such. + const semanticResults = rawResults.filter(isUsefulSemanticChunk); + if (semanticResults.length > 0) { return { success: true, diff --git a/src/utils/format.ts b/src/utils/format.ts index 53bd962..2bff566 100644 --- a/src/utils/format.ts +++ b/src/utils/format.ts @@ -187,17 +187,27 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string { const { catalogMatches, codeMatches } = result.result; // When semantic results exist AND every catalog match is below the - // strong-match threshold, the catalog hits are low-confidence cues - // that shouldn't visually dominate the response. Render semantic - // first under "## Related Documentation", and the catalog after - // under "## Lower-Confidence Catalog Hints" so the LLM consumer - // doesn't anchor on a misleading top hit (e.g. "note already - // nullified" matching "Contract already initialized" with score 54). + // strong-match threshold, the catalog hits are low-confidence cues. + // Two cases: + // + // semanticHasResults = true → semantic returned content-bearing + // chunks (the lookupAztecError filter only sets semanticResults + // when at least one chunk passed isUsefulSemanticChunk). The + // weak catalog hint is now actively misleading — the user keeps + // anchoring on it as the "primary answer" even though semantic + // gave us better context. SUPPRESS the catalog section entirely. + // + // semanticHasResults = false → semantic ran but produced nothing + // useful (or didn't run: no client, version mismatch, backend + // failed). The user has no other signal. KEEP the weak catalog + // with a clear "Lower-Confidence Catalog Hints" header so they + // have *something* to look at, framed honestly. const semanticHasResults = !!result.semanticResults && result.semanticResults.length > 0; const catalogIsWeakOnly = catalogMatches.length > 0 && catalogMatches.every((m) => m.score < 70); + const suppressWeakCatalog = catalogIsWeakOnly && semanticHasResults; const renderSemanticFirst = semanticHasResults && catalogIsWeakOnly; function renderSemantic() { @@ -221,6 +231,12 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string { function renderCatalog() { if (catalogMatches.length === 0) return; + // Phase 2 suppression: when semantic returned content-bearing + // chunks AND the catalog is weak-only, the catalog hits are + // pure noise that the user keeps anchoring on. Hide them + // entirely. They remain in `result.catalogMatches` for + // programmatic consumers that need every signal. + if (suppressWeakCatalog) return; lines.push( catalogIsWeakOnly ? "## Lower-Confidence Catalog Hints" diff --git a/tests/tools/error-lookup.test.ts b/tests/tools/error-lookup.test.ts index ed2e785..0fb8d4b 100644 --- a/tests/tools/error-lookup.test.ts +++ b/tests/tools/error-lookup.test.ts @@ -292,6 +292,152 @@ describe("lookupAztecError — semantic fallback", () => { }); }); +describe("lookupAztecError — content-thin chunk filter", () => { + /** + * Defense-in-depth filter: even if docsgpt's `/api/search` regresses + * and starts returning path-only / empty-body apiref chunks, + * `isUsefulSemanticChunk` drops them before they're surfaced to the + * LLM consumer. Mirrors the server-side + * `_is_empty_apiref_chunk` helper. + */ + function chunk(text: string, source = "aztec-nr/aztec/src/foo.nr") { + return { text, title: "foo.nr", source }; + } + + it("drops chunks with `#`-prefixed path heading even when source field is a public URL", async () => { + /** + * Regression for codex review: `/api/search` rewrites the chunk's + * `source` field to a public URL via `_aztec_source_url`. A chunk + * whose body is `# aztec-nr/.../foo.nr` (path heading only) won't + * match the URL-rewritten source field by string equality. The + * earlier filter would fail to strip the heading, then fall through + * to the path-shape check — which also failed because `# ...` has + * whitespace from the markdown marker. The new shape-only filter + * catches this directly. + */ + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + { + text: "# aztec-nr/aztec/src/context/foo.nr\n", + title: "foo.nr", + source: "https://github.com/AztecProtocol/aztec-packages/blob/v4.2.0/noir-projects/aztec-nr/aztec/src/context/foo.nr", + }, + ]), + }); + const result = await lookupAztecError({ query: "obscure" }, client); + expect(result.semanticHealth).toBe("no_results"); + }); + + it("treats raw output of all path-only chunks as 'no_results'", async () => { + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + chunk("\n\naztec-nr/aztec/src/context/note_existence_request.nr\n\n", + "aztec-nr/aztec/src/context/note_existence_request.nr"), + chunk("\n\naztec-nr/aztec/src/note/hinted_note.nr\n", + "aztec-nr/aztec/src/note/hinted_note.nr"), + ]), + }); + const result = await lookupAztecError({ query: "obscure" }, client); + expect(result.semanticHealth).toBe("no_results"); + expect(result.semanticResults).toBeUndefined(); + }); + + it("keeps mixed results when at least one chunk has substantive body", async () => { + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + chunk("\n\naztec-nr/aztec/src/empty.nr\n", + "aztec-nr/aztec/src/empty.nr"), + chunk( + "# aztec-nr/aztec/src/hash.nr\npub fn poseidon(input: [Field; N]) -> Field", + "aztec-nr/aztec/src/hash.nr" + ), + chunk("\n\naztec-nr/aztec/src/utils.nr\n", + "aztec-nr/aztec/src/utils.nr"), + ]), + }); + const result = await lookupAztecError({ query: "poseidon" }, client); + expect(result.semanticHealth).toBe("ok"); + expect(result.semanticResults).toHaveLength(1); + expect(result.semanticResults![0].text).toContain("poseidon"); + }); +}); + +describe("lookupAztecError — weak catalog suppression when semantic is useful", () => { + /** + * The user-reported "bogus result still appears" failure mode: weak + * catalog hits visible alongside semantic results lets the LLM + * consumer anchor on the wrong answer. When semantic returned + * useful (post-filter) chunks, the weak catalog is now suppressed + * from the rendered output entirely (still present in + * `result.catalogMatches` for programmatic consumers). + * + * This tests the data-shape that the formatter consumes; the + * formatter test (`tests/utils/format.test.ts`) verifies the + * suppression actually happens at render time. + */ + it("returns semanticHealth='ok' with weak catalog still in result.catalogMatches", async () => { + mockLookupError.mockReturnValue({ + query: "note already nullified", + catalogMatches: [ + catalogHit(54, "Contract already initialized", "word-overlap"), + ], + codeMatches: [], + }); + + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + { + text: "Notes in Aztec are nullified by emitting a nullifier...", + title: "Note Lifecycle", + source: "docs/notes.md", + }, + ]), + }); + + const result = await lookupAztecError( + { query: "note already nullified" }, + client + ); + expect(result.semanticHealth).toBe("ok"); + expect(result.semanticResults).toHaveLength(1); + // The weak catalog hit is preserved in the data — the formatter + // is responsible for hiding it. Programmatic consumers can still + // see all signals. + expect(result.result.catalogMatches).toHaveLength(1); + expect(result.result.catalogMatches[0].score).toBe(54); + }); + + it("when semantic is filtered out (all path-only) AND catalog is weak, keeps catalog", async () => { + mockLookupError.mockReturnValue({ + query: "note already nullified", + catalogMatches: [ + catalogHit(54, "Contract already initialized", "word-overlap"), + ], + codeMatches: [], + }); + + const client = makeClient({ + search: vi.fn().mockResolvedValue([ + // Path-only chunks that the filter will drop + { text: "\n\naztec-nr/aztec/src/foo.nr\n", + title: "foo.nr", + source: "aztec-nr/aztec/src/foo.nr" }, + ]), + }); + + const result = await lookupAztecError( + { query: "note already nullified" }, + client + ); + // semantic returned empty (after filter) → no_results + expect(result.semanticHealth).toBe("no_results"); + // Weak catalog stays in the result so the user has *some* signal + expect(result.result.catalogMatches).toHaveLength(1); + expect(result.message).toContain("low-confidence"); + expect(result.message).toMatch(/no relevant documentation|Semantic search/i); + }); +}); + describe("lookupAztecError — semantic failure (sanitized message)", () => { it("sets semanticHealth='failed' and returns sanitized message on 401", async () => { const client = makeClient({ @@ -325,7 +471,7 @@ describe("lookupAztecError — version-mismatch gate", () => { it("blocks semantic fallback when local clone diverges from corpus", async () => { mockGetRepoTag.mockResolvedValue("v4.1.0"); const client = makeClient({ - search: vi.fn().mockResolvedValue([{ text: "x", title: "x", source: "x" }]), + search: vi.fn().mockResolvedValue([{ text: "Some prose body content here.", title: "T", source: "x" }]), getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }), }); @@ -340,7 +486,7 @@ describe("lookupAztecError — version-mismatch gate", () => { mockGetRepoTag.mockResolvedValue("v4.1.0"); const client = makeClient({ search: vi.fn().mockResolvedValue([ - { text: "x", title: "x", source: "x" }, + { text: "Some prose body content here.", title: "T", source: "x" }, ]), getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }), }); diff --git a/tests/utils/format.test.ts b/tests/utils/format.test.ts index f61293c..82f0825 100644 --- a/tests/utils/format.test.ts +++ b/tests/utils/format.test.ts @@ -6,7 +6,9 @@ import { formatExamplesList, formatExampleContent, formatFileContent, + formatErrorLookupResult, } from "../../src/utils/format.js"; +import type { ErrorLookupToolResult } from "../../src/tools/error-lookup.js"; import { MCP_VERSION } from "../../src/version.js"; import { setUpgradeInfo, @@ -257,6 +259,128 @@ describe("formatExampleContent", () => { }); }); +describe("formatErrorLookupResult", () => { + function buildWeak( + semanticResults: { text: string; title: string; source: string }[] | undefined, + health: ErrorLookupToolResult["semanticHealth"] + ): ErrorLookupToolResult { + return { + success: true, + semanticHealth: health, + semanticResults, + message: "msg", + result: { + query: "note already nullified", + codeMatches: [], + catalogMatches: [ + { + entry: { + id: "contract-already-initialized", + name: "Contract already initialized", + patterns: ["already initialized"], + cause: "c", + fix: "f", + category: "contract", + source: "s", + }, + matchType: "word-overlap", + score: 54, + }, + ], + }, + }; + } + + it("suppresses weak catalog hints when semantic returned useful results", () => { + /** + * The user-reported anchoring failure: weak catalog hits visible + * alongside semantic results lets the LLM consumer focus on the + * wrong answer. With useful semantic results, we hide the weak + * catalog from the rendered output entirely. + */ + const result = formatErrorLookupResult( + buildWeak( + [ + { + text: "Notes in Aztec are nullified by emitting a nullifier...", + title: "Note Lifecycle", + source: "docs/notes.md", + }, + ], + "ok" + ) + ); + expect(result).toContain("## Related Documentation"); + expect(result).toContain("Note Lifecycle"); + // Weak catalog: HIDDEN + expect(result).not.toContain("Contract already initialized"); + expect(result).not.toContain("Lower-Confidence Catalog Hints"); + }); + + it("keeps weak catalog when semantic produced nothing useful (no_results)", () => { + /** + * Don't hide the user's only signal. Without semantic content, + * keep the weak catalog visible with a "low-confidence" header. + */ + const result = formatErrorLookupResult( + buildWeak(undefined, "no_results") + ); + expect(result).toContain("## Lower-Confidence Catalog Hints"); + expect(result).toContain("Contract already initialized"); + expect(result).not.toContain("## Related Documentation"); + }); + + it("keeps weak catalog when semantic backend failed", () => { + const result = formatErrorLookupResult( + buildWeak(undefined, "failed") + ); + expect(result).toContain("## Lower-Confidence Catalog Hints"); + expect(result).toContain("Contract already initialized"); + }); + + it("keeps weak catalog when no client (semantic skipped)", () => { + const result = formatErrorLookupResult( + buildWeak(undefined, "skipped") + ); + expect(result).toContain("## Lower-Confidence Catalog Hints"); + expect(result).toContain("Contract already initialized"); + }); + + it("renders strong catalog matches normally regardless of semantic state", () => { + const result = formatErrorLookupResult({ + success: true, + semanticHealth: "ok", + semanticResults: [ + { text: "Some doc text", title: "T", source: "docs/x.md" }, + ], + message: "Found one", + result: { + query: "x", + codeMatches: [], + catalogMatches: [ + { + entry: { + id: "y", + name: "Strong Match", + patterns: ["x"], + cause: "c", + fix: "f", + category: "contract", + source: "s", + }, + matchType: "exact-name", + score: 100, + }, + ], + }, + }); + // Strong catalog → normal "## Known Errors" header, NOT suppressed. + expect(result).toContain("## Known Errors"); + expect(result).toContain("Strong Match"); + expect(result).not.toContain("Lower-Confidence Catalog Hints"); + }); +}); + describe("formatFileContent", () => { it("returns message on failure", () => { const result = formatFileContent({