Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion src/tools/error-lookup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,57 @@ import { checkVersionGate, formatMismatchMessage } from "../utils/version-check.
*/
const STRONG_MATCH_THRESHOLD = 70;

/**
* A line is "path-shaped" if it looks like a filesystem path rather
* than a code/docs line. Strips a leading markdown heading marker so
* ``# aztec-nr/.../foo.nr`` is recognized as path-shaped just like
* the bare ``aztec-nr/.../foo.nr``. Path-shaped means: contains ``/``
* and has no whitespace. Real signature lines (``pub fn foo(...)``,
* ``struct Bar { ... }``, ``pub use a::b;``) always have whitespace,
* so they never trip this predicate.
*/
function lineIsPathShaped(line: string): boolean {
const cleaned = line.replace(/^#+\s*/, "").trim();
return cleaned.length > 0 && cleaned.includes("/") && !/\s/.test(cleaned);
}

/**
* Drop semantic chunks whose body is empty or just the file path.
*
* Why this exists client-side even though docsgpt's ``/api/search``
* has its own equivalent guard: defense-in-depth. The MCP server is
* shipped to end users on whatever DocsGPT instance ``API_URL``
* points at — that backend may not have the latest filter applied,
* may be a self-hosted fork, or may reintroduce the bug in a future
* regression. Filtering on this side keeps the MCP UX safe regardless.
*
* Mirrors the Python helper in ``application/api/answer/routes/search.py``
* (``_is_empty_apiref_chunk``) — same content-shape predicate.
*
* The predicate is deliberately metadata-free. An earlier draft used
* ``match.source`` / ``match.title`` as a "heading-equivalent" set
* to strip a rendered file heading before checking the rest, but
* docsgpt's ``/api/search`` rewrites ``source`` to a public URL via
* ``_aztec_source_url`` — so the heading string never matches the
* post-rewrite source field. The shape-only check below works
* regardless of metadata transformations.
*/
function isUsefulSemanticChunk(match: SemanticSearchResult): boolean {
const text = (match.text ?? "").trim();
if (!text) return false;

const lines = text
.split("\n")
.map((l) => l.trim())
.filter((l) => l.length > 0);
if (lines.length === 0) return false;

// All non-empty lines are path-shaped → no real API content.
if (lines.every(lineIsPathShaped)) return false;

return true;
}

export type SemanticHealth =
| "ok" // semantic returned results
| "no_results" // semantic ran cleanly, returned empty
Expand Down Expand Up @@ -143,11 +194,19 @@ export async function lookupAztecError(
}

try {
const semanticResults = await docsgptClient.search(
const rawResults = await docsgptClient.search(
`Aztec error: ${query}`,
3
);

// Filter content-thin / path-only chunks. If the server-side guard
// is in place these will be empty already, but defense-in-depth
// protects against older docsgpt deployments and any future
// regression in the apiref ingest. "Returned 3 chunks but all
// were just file paths" is functionally equivalent to "returned
// nothing useful" and we report it as such.
const semanticResults = rawResults.filter(isUsefulSemanticChunk);

if (semanticResults.length > 0) {
return {
success: true,
Expand Down
28 changes: 22 additions & 6 deletions src/utils/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -187,17 +187,27 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string {
const { catalogMatches, codeMatches } = result.result;

// When semantic results exist AND every catalog match is below the
// strong-match threshold, the catalog hits are low-confidence cues
// that shouldn't visually dominate the response. Render semantic
// first under "## Related Documentation", and the catalog after
// under "## Lower-Confidence Catalog Hints" so the LLM consumer
// doesn't anchor on a misleading top hit (e.g. "note already
// nullified" matching "Contract already initialized" with score 54).
// strong-match threshold, the catalog hits are low-confidence cues.
// Two cases:
//
// semanticHasResults = true → semantic returned content-bearing
// chunks (the lookupAztecError filter only sets semanticResults
// when at least one chunk passed isUsefulSemanticChunk). The
// weak catalog hint is now actively misleading — the user keeps
// anchoring on it as the "primary answer" even though semantic
// gave us better context. SUPPRESS the catalog section entirely.
//
// semanticHasResults = false → semantic ran but produced nothing
// useful (or didn't run: no client, version mismatch, backend
// failed). The user has no other signal. KEEP the weak catalog
// with a clear "Lower-Confidence Catalog Hints" header so they
// have *something* to look at, framed honestly.
const semanticHasResults =
!!result.semanticResults && result.semanticResults.length > 0;
const catalogIsWeakOnly =
catalogMatches.length > 0 &&
catalogMatches.every((m) => m.score < 70);
const suppressWeakCatalog = catalogIsWeakOnly && semanticHasResults;
const renderSemanticFirst = semanticHasResults && catalogIsWeakOnly;

function renderSemantic() {
Expand All @@ -221,6 +231,12 @@ export function formatErrorLookupResult(result: ErrorLookupToolResult): string {

function renderCatalog() {
if (catalogMatches.length === 0) return;
// Phase 2 suppression: when semantic returned content-bearing
// chunks AND the catalog is weak-only, the catalog hits are
// pure noise that the user keeps anchoring on. Hide them
// entirely. They remain in `result.catalogMatches` for
// programmatic consumers that need every signal.
if (suppressWeakCatalog) return;
lines.push(
catalogIsWeakOnly
? "## Lower-Confidence Catalog Hints"
Expand Down
150 changes: 148 additions & 2 deletions tests/tools/error-lookup.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,152 @@ describe("lookupAztecError — semantic fallback", () => {
});
});

describe("lookupAztecError — content-thin chunk filter", () => {
/**
* Defense-in-depth filter: even if docsgpt's `/api/search` regresses
* and starts returning path-only / empty-body apiref chunks,
* `isUsefulSemanticChunk` drops them before they're surfaced to the
* LLM consumer. Mirrors the server-side
* `_is_empty_apiref_chunk` helper.
*/
function chunk(text: string, source = "aztec-nr/aztec/src/foo.nr") {
return { text, title: "foo.nr", source };
}

it("drops chunks with `#`-prefixed path heading even when source field is a public URL", async () => {
/**
* Regression for codex review: `/api/search` rewrites the chunk's
* `source` field to a public URL via `_aztec_source_url`. A chunk
* whose body is `# aztec-nr/.../foo.nr` (path heading only) won't
* match the URL-rewritten source field by string equality. The
* earlier filter would fail to strip the heading, then fall through
* to the path-shape check — which also failed because `# ...` has
* whitespace from the markdown marker. The new shape-only filter
* catches this directly.
*/
const client = makeClient({
search: vi.fn().mockResolvedValue([
{
text: "# aztec-nr/aztec/src/context/foo.nr\n",
title: "foo.nr",
source: "https://github.com/AztecProtocol/aztec-packages/blob/v4.2.0/noir-projects/aztec-nr/aztec/src/context/foo.nr",
},
]),
});
const result = await lookupAztecError({ query: "obscure" }, client);
expect(result.semanticHealth).toBe("no_results");
});

it("treats raw output of all path-only chunks as 'no_results'", async () => {
const client = makeClient({
search: vi.fn().mockResolvedValue([
chunk("\n\naztec-nr/aztec/src/context/note_existence_request.nr\n\n",
"aztec-nr/aztec/src/context/note_existence_request.nr"),
chunk("\n\naztec-nr/aztec/src/note/hinted_note.nr\n",
"aztec-nr/aztec/src/note/hinted_note.nr"),
]),
});
const result = await lookupAztecError({ query: "obscure" }, client);
expect(result.semanticHealth).toBe("no_results");
expect(result.semanticResults).toBeUndefined();
});

it("keeps mixed results when at least one chunk has substantive body", async () => {
const client = makeClient({
search: vi.fn().mockResolvedValue([
chunk("\n\naztec-nr/aztec/src/empty.nr\n",
"aztec-nr/aztec/src/empty.nr"),
chunk(
"# aztec-nr/aztec/src/hash.nr\npub fn poseidon(input: [Field; N]) -> Field",
"aztec-nr/aztec/src/hash.nr"
),
chunk("\n\naztec-nr/aztec/src/utils.nr\n",
"aztec-nr/aztec/src/utils.nr"),
]),
});
const result = await lookupAztecError({ query: "poseidon" }, client);
expect(result.semanticHealth).toBe("ok");
expect(result.semanticResults).toHaveLength(1);
expect(result.semanticResults![0].text).toContain("poseidon");
});
});

describe("lookupAztecError — weak catalog suppression when semantic is useful", () => {
/**
* The user-reported "bogus result still appears" failure mode: weak
* catalog hits visible alongside semantic results lets the LLM
* consumer anchor on the wrong answer. When semantic returned
* useful (post-filter) chunks, the weak catalog is now suppressed
* from the rendered output entirely (still present in
* `result.catalogMatches` for programmatic consumers).
*
* This tests the data-shape that the formatter consumes; the
* formatter test (`tests/utils/format.test.ts`) verifies the
* suppression actually happens at render time.
*/
it("returns semanticHealth='ok' with weak catalog still in result.catalogMatches", async () => {
mockLookupError.mockReturnValue({
query: "note already nullified",
catalogMatches: [
catalogHit(54, "Contract already initialized", "word-overlap"),
],
codeMatches: [],
});

const client = makeClient({
search: vi.fn().mockResolvedValue([
{
text: "Notes in Aztec are nullified by emitting a nullifier...",
title: "Note Lifecycle",
source: "docs/notes.md",
},
]),
});

const result = await lookupAztecError(
{ query: "note already nullified" },
client
);
expect(result.semanticHealth).toBe("ok");
expect(result.semanticResults).toHaveLength(1);
// The weak catalog hit is preserved in the data — the formatter
// is responsible for hiding it. Programmatic consumers can still
// see all signals.
expect(result.result.catalogMatches).toHaveLength(1);
expect(result.result.catalogMatches[0].score).toBe(54);
});

it("when semantic is filtered out (all path-only) AND catalog is weak, keeps catalog", async () => {
mockLookupError.mockReturnValue({
query: "note already nullified",
catalogMatches: [
catalogHit(54, "Contract already initialized", "word-overlap"),
],
codeMatches: [],
});

const client = makeClient({
search: vi.fn().mockResolvedValue([
// Path-only chunks that the filter will drop
{ text: "\n\naztec-nr/aztec/src/foo.nr\n",
title: "foo.nr",
source: "aztec-nr/aztec/src/foo.nr" },
]),
});

const result = await lookupAztecError(
{ query: "note already nullified" },
client
);
// semantic returned empty (after filter) → no_results
expect(result.semanticHealth).toBe("no_results");
// Weak catalog stays in the result so the user has *some* signal
expect(result.result.catalogMatches).toHaveLength(1);
expect(result.message).toContain("low-confidence");
expect(result.message).toMatch(/no relevant documentation|Semantic search/i);
});
});

describe("lookupAztecError — semantic failure (sanitized message)", () => {
it("sets semanticHealth='failed' and returns sanitized message on 401", async () => {
const client = makeClient({
Expand Down Expand Up @@ -325,7 +471,7 @@ describe("lookupAztecError — version-mismatch gate", () => {
it("blocks semantic fallback when local clone diverges from corpus", async () => {
mockGetRepoTag.mockResolvedValue("v4.1.0");
const client = makeClient({
search: vi.fn().mockResolvedValue([{ text: "x", title: "x", source: "x" }]),
search: vi.fn().mockResolvedValue([{ text: "Some prose body content here.", title: "T", source: "x" }]),
getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }),
});

Expand All @@ -340,7 +486,7 @@ describe("lookupAztecError — version-mismatch gate", () => {
mockGetRepoTag.mockResolvedValue("v4.1.0");
const client = makeClient({
search: vi.fn().mockResolvedValue([
{ text: "x", title: "x", source: "x" },
{ text: "Some prose body content here.", title: "T", source: "x" },
]),
getCorpusVersion: vi.fn().mockResolvedValue({ aztec_corpus_version: "v4.2.0" }),
});
Expand Down
Loading
Loading