diff --git a/src/explore.zig b/src/explore.zig index 9816922..1ffb19d 100644 --- a/src/explore.zig +++ b/src/explore.zig @@ -2479,6 +2479,18 @@ pub const Explorer = struct { return .{ .data = data, .owned = true, .allocator = allocator }; } + /// Issue #626: nudge agents toward codedb_outline when they pull a whole + /// large file with no range — the grep+cat habit reads entire files and + /// burns tokens. Returns a static one-liner, or null for small files. + pub fn fullFileReadHint(content: []const u8) ?[]const u8 { + var lines: usize = 1; + for (content) |c| { + if (c == '\n') lines += 1; + } + if (lines < 400) return null; + return "↪ whole-file read: codedb_outline maps this file 4-15x smaller — get the structure, pick a line range, then codedb_read just that range.\n"; + } + fn renderReadBytes( path: []const u8, content: []const u8, @@ -2516,6 +2528,7 @@ pub const Explorer = struct { const lang = detectLanguage(path); try appendExtractedLines(content, start, end, true, opts.compact, lang, allocator, out); } else { + if (fullFileReadHint(content)) |hint| try out.appendSlice(allocator, hint); try out.appendSlice(allocator, content); } } diff --git a/src/mcp.zig b/src/mcp.zig index 8a9c81b..bc2b70f 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -521,7 +521,7 @@ pub const BenchContext = struct { agents: *AgentRegistry, telem: *telemetry_mod.Telemetry, ) void { - handleCall(io, alloc, root, stdout, id, store, explorer, agents, &self.cache, telem, null, 1); + handleCall(io, alloc, root, stdout, id, store, explorer, agents, &self.cache, telem, null, 1, null); } pub fn runToolCall( @@ -618,18 +618,18 @@ pub const tools_list = \\{"tools":[ \\{"name":"codedb_tree","description":"Whole-repo file tree with per-file language, line counts, and symbol counts. Use to orient in an unfamiliar project.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_outline","description":"Symbol outline of one file: functions, structs, enums, imports, consts with line numbers. 4-15x smaller than reading the raw file. Run before codedb_read to find the lines you actually need. Pass skeleton=true for a signature view — each symbol's declaration line with its body elided as '{ … N lines }', so a 2,000-line file collapses to ~one line per symbol.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"compact":{"type":"boolean","description":"Condensed format without detail comments (default: false)"},"skeleton":{"type":"boolean","description":"Signature view: each symbol's declaration line with its body elided as '{ … N lines }'. Lossless at the API surface; codedb_read the range to expand a body (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, - \\{"name":"codedb_symbol","description":"Find symbol definitions across the index — exact name, prefix, glob pattern, fuzzy match, or kind filter. Returns file, line, kind, and score. Pass format=json for structured output.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Exact symbol name"},"prefix":{"type":"string","description":"Prefix match (e.g. parse_)"},"pattern":{"type":"string","description":"Glob pattern on symbol name (e.g. *Manager)"},"kind":{"type":"string","description":"Filter by kind: function, struct, interface, class, method, enum"},"fuzzy":{"type":"boolean","description":"Fuzzy/typo-tolerant match when name is set (default: false)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"max_results":{"type":"integer","description":"Max results (default: 50, cap 200)"},"format":{"type":"string","description":"Set to json for structured JSON output"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_search","description":"Substring full-text search across the index (regex if regex=true). For one identifier prefer codedb_word; for a definition prefer codedb_symbol. Pass format=json for structured output with search provenance meta.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Page size (default: 20, raise to 50 for broad surveys)"},"offset":{"type":"integer","description":"Pagination offset into the ranked results (default: 0). When more results exist, the response ends with a 'more results ... offset=N' line; pass that offset to get the next page."},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"paths_only":{"type":"boolean","description":"Return path:line per result without the matching line text — ~50% fewer tokens per call, useful for broad surveys or for budget-conscious agents (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"path_glob":{"type":"string","description":"Filter results to paths matching this glob, e.g. '*.zig', 'src/**/*.zig', or '**/*.{yaml,yml}'. Bare patterns like '*.zig' are auto-promoted to '**/*.zig' to match nested files."},"format":{"type":"string","description":"Set to json for structured JSON output with provenance meta"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}}, + \\{"name":"codedb_symbol","description":"PRIMARY tool for locating a definition — reach for this FIRST when you know or can guess a symbol name, instead of codedb_search. Finds symbol definitions across the index — exact name, prefix, glob pattern, fuzzy match, or kind filter. Returns file, line, kind, and score. Pass format=json for structured output.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Exact symbol name"},"prefix":{"type":"string","description":"Prefix match (e.g. parse_)"},"pattern":{"type":"string","description":"Glob pattern on symbol name (e.g. *Manager)"},"kind":{"type":"string","description":"Filter by kind: function, struct, interface, class, method, enum"},"fuzzy":{"type":"boolean","description":"Fuzzy/typo-tolerant match when name is set (default: false)"},"body":{"type":"boolean","description":"Include source body for each symbol (default: false)"},"max_results":{"type":"integer","description":"Max results (default: 50, cap 200)"},"format":{"type":"string","description":"Set to json for structured JSON output"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, + \\{"name":"codedb_search","description":"Exploratory substring/phrase search — use ONLY when you do NOT know the exact symbol name. If you know a symbol name, do NOT use this: codedb_symbol returns its definition, codedb_callers its call sites, codedb_word its every occurrence — each in one call. Substring full-text across the index (regex if regex=true). Pass format=json for structured output with search provenance meta.","inputSchema":{"type":"object","properties":{"query":{"type":"string","description":"Text to search for (substring match, or regex if regex=true)"},"max_results":{"type":"integer","description":"Page size (default: 20, raise to 50 for broad surveys)"},"offset":{"type":"integer","description":"Pagination offset into the ranked results (default: 0). When more results exist, the response ends with a 'more results ... offset=N' line; pass that offset to get the next page."},"scope":{"type":"boolean","description":"Annotate results with enclosing symbol scope (default: false)"},"compact":{"type":"boolean","description":"Skip comment and blank lines in results (default: false)"},"paths_only":{"type":"boolean","description":"Return path:line per result without the matching line text — ~50% fewer tokens per call, useful for broad surveys or for budget-conscious agents (default: false)"},"regex":{"type":"boolean","description":"Treat query as regex pattern (default: false)"},"path_glob":{"type":"string","description":"Filter results to paths matching this glob, e.g. '*.zig', 'src/**/*.zig', or '**/*.{yaml,yml}'. Bare patterns like '*.zig' are auto-promoted to '**/*.zig' to match nested files."},"format":{"type":"string","description":"Set to json for structured JSON output with provenance meta"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["query"]}}, \\{"name":"codedb_word","description":"Exact-identifier lookup via inverted index — every occurrence of one word, O(1). Use for single identifiers; use codedb_search for substrings or phrases.","inputSchema":{"type":"object","properties":{"word":{"type":"string","description":"Exact word/identifier to look up"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["word"]}}, - \\{"name":"codedb_callers","description":"Find every call site of a named symbol — fuses word-index occurrences with outline scope info. One round-trip vs codedb_word + codedb_outline-per-file. Returns {path, line, snippet, scope_name, scope_kind, scope_lines}. Excludes the symbol's own definition site.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name (exact identifier match)"},"max_results":{"type":"integer","description":"Maximum call sites to return (default: 30, raise for hot symbols)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}}, + \\{"name":"codedb_callers","description":"PRIMARY tool for finding usages — reach for this FIRST when you need who calls or uses a symbol, instead of grepping with codedb_search. Finds every call site of a named symbol — fuses word-index occurrences with outline scope info. One round-trip vs codedb_word + codedb_outline-per-file. Returns {path, line, snippet, scope_name, scope_kind, scope_lines}. Excludes the symbol's own definition site.","inputSchema":{"type":"object","properties":{"name":{"type":"string","description":"Symbol name (exact identifier match)"},"max_results":{"type":"integer","description":"Maximum call sites to return (default: 30, raise for hot symbols)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["name"]}}, \\{"name":"codedb_callpath","description":"Shortest resolved call chain between two symbols via the local call graph (A→…→B). Use after codedb_callers when you need how execution reaches a callee. Returns each hop as path:name@line.","inputSchema":{"type":"object","properties":{"from":{"type":"string","description":"Source symbol name (exact identifier)"},"to":{"type":"string","description":"Target symbol name (exact identifier)"},"max_hops":{"type":"integer","description":"Max call hops to search (default: 12)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["from","to"]}}, \\{"name":"codedb_context","description":"Task-shaped composer: pass a natural-language task; returns ONE tight block (keywords used + symbol definitions + ranked files + top file:line snippets). Replaces 3-5 sequential search/word/symbol calls — use for first-touch orientation on a new task. For narrow follow-ups stick with codedb_search/codedb_symbol.","inputSchema":{"type":"object","properties":{"task":{"type":"string","description":"Natural-language task description (3-1024 chars). Include candidate identifiers (camelCase / snake_case) or \"quoted strings\" so the composer can extract keywords."},"max_tokens":{"type":"integer","description":"Approximate response token budget (~4 chars/token, min 256). Sections are packed by value — files, symbol definitions, callers, calls, snippets — and omitted ones leave a one-line marker."},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["task"]}}, \\{"name":"codedb_diagnostics","description":"Fetch the latest linter diagnostics for a file, produced off the edit path (ruff/biome/etc.) after a recent codedb_edit. Call right after an edit to surface real errors the change may have introduced (undefined names, type/lint issues) on top of codedb's built-in checks. Returns 'no diagnostics available yet' when none are cached or external linters are disabled.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to fetch diagnostics for"}},"required":["path"]}}, - \\{"name":"codedb_hot","description":"Most recently modified files in the project, newest first.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, - \\{"name":"codedb_deps","description":"Dependency graph: who imports a file (default) or what a file imports (direction=depends_on). Set transitive=true for the full BFS blast radius.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"direction":{"type":"string","enum":["imported_by","depends_on"],"description":"imported_by (default): who imports this file. depends_on: what this file imports."},"transitive":{"type":"boolean","description":"Follow dependency chain transitively (default: false)"},"max_depth":{"type":"integer","description":"Max traversal depth for transitive queries (default: unlimited)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, + \\{"name":"codedb_hot","description":"Recently modified files, newest first — reach for this to see WHERE work is happening before searching an unfamiliar or mid-sprint codebase.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, + \\{"name":"codedb_deps","description":"PRIMARY tool for impact/blast-radius — use this instead of grepping import lines. Dependency graph: who imports a file (default) or what a file imports (direction=depends_on). Set transitive=true for the full BFS blast radius.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"direction":{"type":"string","enum":["imported_by","depends_on"],"description":"imported_by (default): who imports this file. depends_on: what this file imports."},"transitive":{"type":"boolean","description":"Follow dependency chain transitively (default: false)"},"max_depth":{"type":"integer","description":"Max traversal depth for transitive queries (default: unlimited)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, \\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, \\{"name":"codedb_edit","description":"Fallback editor — prefer your own native file-editing tool. codedb is a context/navigation tool, not an editor; reach for codedb_edit only when no native edit capability is available. When you do edit through codedb, op=str_replace with old_string/new_string is safest (old_string must match exactly once) — it cannot mis-target surrounding lines the way a range replace can. Also supports line ops: replace (range), insert (after line), delete (range), and create (author a new file from content). The result includes a syntax-health warning if the edit unbalances delimiters or drops a still-used import — heed it and re-read before continuing. Pass if_hash from the latest codedb_read to reject stale-line edits. Set dry_run=true for a diff preview.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["str_replace","replace","insert","delete","create"],"description":"Edit operation. str_replace=anchored (old_string/new_string); replace/delete use range; insert uses after; create=author a NEW file from content (errors if the path already exists)."},"content":{"type":"string","description":"New content (for replace/insert/create)"},"old_string":{"type":"string","description":"For op=str_replace: exact text to find; must occur exactly once in the file."},"new_string":{"type":"string","description":"For op=str_replace: replacement text for old_string."},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"},"if_hash":{"type":"string","description":"Hex hash from codedb_read's 'hash:' line. Edit is rejected with HashMismatch if the file has changed since."},"dry_run":{"type":"boolean","description":"If true, return a diff preview without writing. Disk and store are untouched. Default: false."}},"required":["path","op"]}}, - \\{"name":"codedb_changes","description":"Files changed since a given sequence number. Pair with codedb_status to poll for updates.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}}, + \\{"name":"codedb_changes","description":"Direct way to see WHAT changed since a point in time, instead of re-scanning the tree. Files changed since a given sequence number. Pair with codedb_status (which reports the current sequence number) to poll for updates.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}}, \\{"name":"codedb_status","description":"Current indexed-file count, sequence number, and scan phase.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_snapshot","description":"Pre-rendered JSON snapshot of the entire index — tree, outlines, symbols, deps. For caching or shipping to edge workers.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_bundle","description":"Run up to 20 codedb_* calls in one round-trip. Each op is either MCP-style {\"tool\":\"codedb_search\",\"arguments\":{\"query\":\"Agent\"}} or inline {\"tool\":\"codedb_search\",\"query\":\"Agent\"} — both are accepted. Example: {\"ops\":[{\"tool\":\"codedb_search\",\"arguments\":{\"query\":\"Agent\"}},{\"tool\":\"codedb_outline\",\"arguments\":{\"path\":\"src/main.zig\"}}]}. Best for parallel outline/symbol/search; avoid bundling large codedb_read calls — responses are not size-capped. If a sub-op reports `received keys: []`, the wrapper field is misnamed: use `arguments` (MCP spec), not `args`.","inputSchema":{"type":"object","properties":{"ops":{"type":"array","description":"Sub-tool calls to dispatch (max 20). Each item must have `tool` AND `arguments` (pass `{}` if the sub-tool takes none). Inline args alongside `tool` are still accepted as a fallback.","items":{"type":"object","properties":{"tool":{"type":"string","description":"codedb_* tool name to invoke (e.g. codedb_outline, codedb_symbol, codedb_search, codedb_word, codedb_callers, codedb_read, codedb_deps, codedb_tree, codedb_hot, codedb_status, codedb_changes). Required."},"arguments":{"type":"object","description":"Per-call args matching that tool's inputSchema. Field MUST be named `arguments` (MCP `tools/call` convention) — `args` is silently ignored. Pass `{}` only if the sub-tool takes no arguments. Required."}},"required":["tool","arguments"]}},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["ops"]}}, @@ -830,6 +830,8 @@ const Session = struct { /// distinct registered agent id at session start; defaults to 1 so any path /// that constructs a Session without registering still uses __filesystem__. edit_agent_id: u64 = 1, + /// Convergence governor state (#624): recent call signatures for this session. + governor: ConvergenceGovernor = .{}, fn freeRoots(self: *Session) void { for (self.roots.items) |r| { @@ -845,6 +847,61 @@ const Session = struct { } }; +/// Convergence governor (#624): tracks recent tool-call signatures within a +/// session so a non-convergent agent that keeps firing the *same* navigation +/// call gets an in-band nudge to change strategy instead of looping (the 3–5× +/// token runaways seen on large repos). It never changes a tool's result — it +/// only lets handleCall append a one-line hint once a call repeats. +pub const ConvergenceGovernor = struct { + pub const HISTORY = 8; // ring-buffer window of recent calls + pub const WARN_AT = 3; // same signature this many times in the window -> nudge + + sigs: [HISTORY]u64 = [_]u64{0} ** HISTORY, + head: usize = 0, + + /// Record a call signature and return how many times it has occurred within + /// the recent window (including this call). >= WARN_AT means it's looping. + pub fn record(self: *ConvergenceGovernor, sig: u64) usize { + const s = if (sig == 0) 1 else sig; // 0 is the empty-slot sentinel + var occurrences: usize = 1; + for (self.sigs) |prev| { + if (prev == s) occurrences += 1; + } + self.sigs[self.head] = s; + self.head = (self.head + 1) % HISTORY; + return occurrences; + } +}; + +/// Stable signature of a tool call (name + its argument values) so two identical +/// calls hash equal. Iteration order is consistent for an identical call shape. +fn callSignature(name: []const u8, args: *const std.json.ObjectMap) u64 { + var h = std.hash.Wyhash.init(0); + h.update(name); + var it = args.iterator(); + while (it.next()) |e| { + h.update(e.key_ptr.*); + switch (e.value_ptr.*) { + .string => |sv| h.update(sv), + .integer => |n| h.update(std.mem.asBytes(&n)), + .float => |f| h.update(std.mem.asBytes(&f)), + .bool => |b| h.update(if (b) "1" else "0"), + else => {}, + } + } + return h.final(); +} + +/// Navigation tools where a repeated identical call is a runaway signal worth +/// nudging on. Write/admin tools (edit, status, changes, projects) are excluded. +fn isGovernedNavTool(name: []const u8) bool { + return std.mem.eql(u8, name, "codedb_search") or + std.mem.eql(u8, name, "codedb_find") or + std.mem.eql(u8, name, "codedb_word") or + std.mem.eql(u8, name, "codedb_read") or + std.mem.eql(u8, name, "codedb_outline"); +} + pub fn run( io: std.Io, alloc: std.mem.Allocator, @@ -963,7 +1020,7 @@ pub fn run( } else if (mcpj.eql(method, "tools/list")) { if (!is_notification) writeResult(alloc, stdout, id, tools_list_response); } else if (mcpj.eql(method, "tools/call")) { - handleCall(io, alloc, root, stdout, id, store, explorer, agents, &cache, telem, session.deferred_scan, session.edit_agent_id); + handleCall(io, alloc, root, stdout, id, store, explorer, agents, &cache, telem, session.deferred_scan, session.edit_agent_id, &session.governor); } else if (mcpj.eql(method, "ping")) { if (!is_notification) writeResult(alloc, stdout, id, "{}"); } else { @@ -1005,7 +1062,7 @@ fn handleInitialize(s: *Session, root: *const std.json.ObjectMap, id: ?std.json. if (negotiateProtocolVersion(requested)) |v| negotiated = v; } const init_result = std.fmt.allocPrint(s.alloc, - \\{{"protocolVersion":"{s}","capabilities":{{"tools":{{"listChanged":false}}}},"serverInfo":{{"name":"codedb","version":"{s}"}},"instructions":"codedb is a code-intelligence and context tool — not your editor. Use it to understand the codebase before you change it: search, symbol/caller lookup, dependency graph, outlines, and codedb_context for task-shaped orientation. Make edits with your own native file tools. codedb_edit is only a fallback for clients with no native editing."}} + \\{{"protocolVersion":"{s}","capabilities":{{"tools":{{"listChanged":false}}}},"serverInfo":{{"name":"codedb","version":"{s}"}},"instructions":"codedb is a code-intelligence and context tool — not your editor. Default to the structural tools FIRST: codedb_symbol for a definition, codedb_callers for usages, codedb_outline for a file's structure before codedb_read, and codedb_context to orient on a new task. Use codedb_search only for substrings or phrases when you do NOT know the exact symbol name — it is a fallback, not the default. Make edits with your own native file tools. codedb_edit is only a fallback for clients with no native editing."}} , .{ negotiated, release_info.semver }) catch return; defer s.alloc.free(init_result); writeResult(s.alloc, s.stdout, id, init_result); @@ -1126,6 +1183,7 @@ fn handleCall( telem: *telemetry_mod.Telemetry, deferred_scan: ?*DeferredScan, edit_agent_id: u64, + governor: ?*ConvergenceGovernor, ) void { const is_notification = id == null; @@ -1186,6 +1244,19 @@ fn handleCall( } } } + + // Convergence governor (#624): if this exact navigation call keeps + // repeating within the session, nudge the agent to change strategy instead + // of looping. The nudge is appended to the assistant-visible output; it + // never alters the underlying result. + if (governor) |gov| { + if (isGovernedNavTool(name)) { + const occurrences = gov.record(callSignature(name, args)); + if (occurrences >= ConvergenceGovernor.WARN_AT) { + out.appendSlice(alloc, "\n\n[codedb] You have issued this exact call several times — repeating it will not surface anything new. Change strategy: use a structural tool (codedb_symbol for a definition, codedb_callers for usages, codedb_deps for impact), open the file directly with codedb_read, or refine the query.") catch {}; + } + } + } if (is_notification) return; const lean = mcpLeanMode(); @@ -1566,6 +1637,54 @@ fn handleSymbol(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: } } } + if (depsHint(results.len)) |h| out.appendSlice(alloc, h) catch {}; +} + +// Issue #626: agents reach for codedb_search with a bare symbol name and skip +// the structural tools entirely. When the query is a single identifier the index +// already knows as a symbol, prepend a one-line nudge toward codedb_symbol / +// codedb_callers — fired in-context, exactly at the grep-style call. +pub fn isBareIdentifier(s: []const u8) bool { + if (s.len == 0 or s.len > 128) return false; + if (std.ascii.isDigit(s[0])) return false; + for (s) |c| { + if (!std.ascii.isAlphanumeric(c) and c != '_') return false; + } + return true; +} + +fn appendSearchSymbolNudge(alloc: std.mem.Allocator, explorer: *Explorer, query: []const u8, out: *std.ArrayList(u8)) void { + if (!isBareIdentifier(query)) return; + const spec = Explorer.SymbolSearchSpec{ + .name = query, + .prefix = null, + .pattern = null, + .kind = null, + .fuzzy = false, + .max_results = 1, + }; + const results = explorer.searchSymbols(spec, alloc) catch return; + defer { + for (results) |r| { + alloc.free(r.path); + alloc.free(r.symbol.name); + if (r.symbol.detail) |d| alloc.free(d); + } + alloc.free(results); + } + if (results.len == 0) return; + const w = cio.listWriter(out, alloc); + w.print("↪ '{s}' is an indexed symbol — codedb_symbol returns its definition and codedb_callers its call sites in one call (no search+read needed).\n", .{query}) catch {}; +} + +// Issue #626 follow-up: codedb_deps is the one structural tool nothing points +// at — mcpGenerateGuidance already steers callers->callpath, edit->changes, hot. +// Nudge toward deps right after a single-definition codedb_symbol hit, the +// moment before an edit when blast-radius matters. Pure + count-gated so it +// unit-tests like fullFileReadHint; text is generic since the caller knows it. +pub fn depsHint(result_count: usize) ?[]const u8 { + if (result_count != 1) return null; + return "↪ to see what imports this file (impact/blast radius), use codedb_deps path=.\n"; } fn handleSearch(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *std.ArrayList(u8), explorer: *Explorer) void { @@ -1616,6 +1735,9 @@ fn handleSearch(alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: writeJsonToolError(out, alloc, "codedb_search", "unsupported", "format=json does not support scope=true yet"); return; } + // Issue #626: nudge toward the structural tools when the query is a bare + // symbol name. Text output only — would corrupt the format=json payload. + if (!json_fmt) appendSearchSymbolNudge(alloc, explorer, query, out); if (scope and is_regex) { const results = explorer.searchContentRegexWithScope(query, alloc, max_results) catch |e| { out.appendSlice(alloc, if (e == error.InvalidRegex) "error: invalid regex" else "error: scoped regex search failed") catch {}; @@ -2913,6 +3035,7 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object defer alloc.free(extracted); out.appendSlice(alloc, extracted) catch {}; } else { + if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {}; out.appendSlice(alloc, content) catch {}; } } diff --git a/src/snapshot.zig b/src/snapshot.zig index eb45782..4cf0e9f 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -381,6 +381,57 @@ pub fn writeSnapshot( std.Io.Dir.cwd().deleteFile(io, tmp_path) catch {}; return err; }; + + // #625: the snapshot lives inside the project tree — make sure git ignores + // it so it never pollutes `git status` or gets committed by accident. + if (isRootSnapshot(output_path, root_path)) { + ensureGitIgnoresSnapshot(io, root_path, allocator); + } +} + +/// True when `output_path` is the in-tree project-root snapshot +/// (`{root_path}/codedb.snapshot`), as opposed to the central ~/.codedb store. +pub fn isRootSnapshot(output_path: []const u8, root_path: []const u8) bool { + if (root_path.len == 0) return false; + if (!std.mem.startsWith(u8, output_path, root_path)) return false; + return std.mem.eql(u8, output_path[root_path.len..], "/codedb.snapshot"); +} + +/// Append `codedb.snapshot` to the repo's `.git/info/exclude` — a local, +/// untracked ignore file — so the in-tree index is invisible to git without +/// touching the user's own `.gitignore`. Best-effort: not-a-git-repo, a git +/// worktree where `.git` is a file, permissions, or any I/O error is silently +/// skipped. Indexing must never fail because of this. Idempotent. +pub fn ensureGitIgnoresSnapshot(io: std.Io, root_path: []const u8, allocator: std.mem.Allocator) void { + var info_buf: [std.fs.max_path_bytes]u8 = undefined; + const info_path = std.fmt.bufPrint(&info_buf, "{s}/.git/info", .{root_path}) catch return; + var info_dir = std.Io.Dir.cwd().openDir(io, info_path, .{}) catch return; + defer info_dir.close(io); + + const needle = "codedb.snapshot"; + const existing: ?[]u8 = info_dir.readFileAlloc(io, "exclude", allocator, .limited(1024 * 1024)) catch null; + defer if (existing) |e| allocator.free(e); + + if (existing) |content| { + var it = std.mem.splitScalar(u8, content, '\n'); + while (it.next()) |line| { + const t = std.mem.trim(u8, line, " \t\r"); + if (std.mem.eql(u8, t, needle) or std.mem.eql(u8, t, "/codedb.snapshot")) return; + } + } + + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(allocator); + if (existing) |content| { + buf.appendSlice(allocator, content) catch return; + if (content.len > 0 and content[content.len - 1] != '\n') buf.append(allocator, '\n') catch return; + } + buf.appendSlice(allocator, needle) catch return; + buf.append(allocator, '\n') catch return; + + var file = info_dir.createFile(io, "exclude", .{}) catch return; + defer file.close(io); + file.writeStreamingAll(io, buf.items) catch return; } /// Read section table from a `.codedb` file. diff --git a/src/test_mcp.zig b/src/test_mcp.zig index 7a851b3..6b5669d 100644 --- a/src/test_mcp.zig +++ b/src/test_mcp.zig @@ -149,6 +149,23 @@ test "issue-93: isPathSafe blocks traversal" { try testing.expect(MCP.isPathSafe("README.md")); } +test "issue-624: convergence governor flags a repeated identical call" { + const MCP = @import("mcp.zig"); + var gov: MCP.ConvergenceGovernor = .{}; + const sig: u64 = 0xC0FFEE; + try testing.expectEqual(@as(usize, 1), gov.record(sig)); + try testing.expectEqual(@as(usize, 2), gov.record(sig)); + // Third identical call reaches the warn threshold — the loop is detected. + try testing.expectEqual(@as(usize, 3), gov.record(sig)); + try testing.expect(gov.record(sig) >= MCP.ConvergenceGovernor.WARN_AT); + + // Distinct calls in sequence are never flagged as looping. + var gov2: MCP.ConvergenceGovernor = .{}; + try testing.expectEqual(@as(usize, 1), gov2.record(1)); + try testing.expectEqual(@as(usize, 1), gov2.record(2)); + try testing.expectEqual(@as(usize, 1), gov2.record(3)); +} + test "auto-update: shouldRunAutoUpdate gates correctly" { const day_ms: i64 = 24 * 60 * 60 * 1000; @@ -2296,3 +2313,39 @@ test "issue-531: codedb_context max_tokens packs sections by value under the bud try testing.expect(std.mem.indexOf(u8, out_budget.items, "## Most-relevant files") != null); try testing.expect(std.mem.indexOf(u8, out_budget.items, "## Top sites") == null); } + +// Issue #626: structural-tool steering. The search nudge only fires for bare +// identifiers; the read nudge only for large whole-file reads. +test "issue-626: isBareIdentifier gates the search nudge" { + try testing.expect(mcp_mod.isBareIdentifier("make_bytes")); + try testing.expect(mcp_mod.isBareIdentifier("HttpResponse")); + try testing.expect(mcp_mod.isBareIdentifier("_private")); + try testing.expect(mcp_mod.isBareIdentifier("parse2")); + + // Anything that isn't a single identifier is left to plain substring search. + try testing.expect(!mcp_mod.isBareIdentifier("")); + try testing.expect(!mcp_mod.isBareIdentifier("def content")); + try testing.expect(!mcp_mod.isBareIdentifier("make_bytes(")); + try testing.expect(!mcp_mod.isBareIdentifier("obj.method")); + try testing.expect(!mcp_mod.isBareIdentifier("2fast")); +} + +test "issue-626: fullFileReadHint only nudges on large whole-file reads" { + try testing.expect(Explorer.fullFileReadHint("one\ntwo\nthree\n") == null); + + var big: std.ArrayList(u8) = .empty; + defer big.deinit(testing.allocator); + var i: usize = 0; + while (i < 500) : (i += 1) try big.appendSlice(testing.allocator, "x\n"); + const hint = Explorer.fullFileReadHint(big.items); + try testing.expect(hint != null); + try testing.expect(std.mem.indexOf(u8, hint.?, "codedb_outline") != null); +} + +test "issue-626: depsHint fires only on a single unambiguous definition" { + try testing.expect(mcp_mod.depsHint(0) == null); + try testing.expect(mcp_mod.depsHint(5) == null); + const h = mcp_mod.depsHint(1); + try testing.expect(h != null); + try testing.expect(std.mem.indexOf(u8, h.?, "codedb_deps") != null); +} diff --git a/src/test_snapshot.zig b/src/test_snapshot.zig index 72cd7fb..434a4e8 100644 --- a/src/test_snapshot.zig +++ b/src/test_snapshot.zig @@ -18,6 +18,36 @@ const AgentRegistry = @import("agent.zig").AgentRegistry; const edit_mod = @import("edit.zig"); +test "issue-625: in-tree snapshot is added to .git/info/exclude" { + var tmp = testing.tmpDir(.{}); + defer tmp.cleanup(); + + var path_buf: [std.fs.max_path_bytes]u8 = undefined; + const root_len = try tmp.dir.realPathFile(io, ".", &path_buf); + const root = path_buf[0..root_len]; + + // Simulate a git repo. + try tmp.dir.createDirPath(io, ".git/info"); + + // isRootSnapshot fires only for the in-tree snapshot, not the central store. + const root_snap = try std.fmt.allocPrint(testing.allocator, "{s}/codedb.snapshot", .{root}); + defer testing.allocator.free(root_snap); + try testing.expect(snapshot_mod.isRootSnapshot(root_snap, root)); + try testing.expect(!snapshot_mod.isRootSnapshot("/home/u/.codedb/projects/ab/codedb.snapshot", root)); + + // First call records the exclude so git never sees the index. + snapshot_mod.ensureGitIgnoresSnapshot(io, root, testing.allocator); + const exclude1 = try tmp.dir.readFileAlloc(io, ".git/info/exclude", testing.allocator, .limited(64 * 1024)); + defer testing.allocator.free(exclude1); + try testing.expect(std.mem.indexOf(u8, exclude1, "codedb.snapshot") != null); + + // Idempotent: a second call must not duplicate the entry. + snapshot_mod.ensureGitIgnoresSnapshot(io, root, testing.allocator); + const exclude2 = try tmp.dir.readFileAlloc(io, ".git/info/exclude", testing.allocator, .limited(64 * 1024)); + defer testing.allocator.free(exclude2); + try testing.expectEqual(exclude1.len, exclude2.len); +} + test "issue-35: edits immediately update explorer and snapshot output" { var tmp = testing.tmpDir(.{}); defer tmp.cleanup();