diff --git a/src/cli_args.zig b/src/cli_args.zig index 5fbc376..a3abe48 100644 --- a/src/cli_args.zig +++ b/src/cli_args.zig @@ -204,7 +204,7 @@ pub fn isValidMcpFlag(arg: []const u8) bool { fn isCommand(arg: []const u8) bool { // cli_query_cmds is the shared query-command table (see its doc); only the // non-query commands are listed here. - const commands = cli_query_cmds ++ [_][]const u8{ "snapshot", "serve", "mcp", "update", "nuke", "cli-daemon" }; + const commands = cli_query_cmds ++ [_][]const u8{ "snapshot", "serve", "mcp", "update", "nuke", "cli-daemon", "index" }; for (commands) |c| { if (std.mem.eql(u8, arg, c)) return true; } diff --git a/src/main.zig b/src/main.zig index 389f2e1..309d5b3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -467,6 +467,19 @@ fn mainImpl() !void { try commands.runServe(&ctx); } else if (std.mem.eql(u8, cmd, "mcp")) { try commands.runMcp(&ctx); + } else if (std.mem.eql(u8, cmd, "index")) { + // #633: `index` is a first-class command. coldLoadOrScan above already + // scanned + persisted the on-disk index for this cmd; confirm and exit + // cleanly. It used to fall through to "unknown command: index" + exit 1 + // even though the index had been built. + explorer.mu.lockShared(); + const file_count = explorer.outlines.count(); + explorer.mu.unlockShared(); + out.p("{s}\xe2\x9c\x93{s} {s}index ready{s} {s}{d} files{s}\n", .{ + s.green, s.reset, s.bold, s.reset, s.dim, file_count, s.reset, + }); + out.flush(); + std.process.exit(0); } else { out.p("{s}\xe2\x9c\x97{s} unknown command: {s}{s}{s}\n", .{ s.red, s.reset, s.bold, cmd, s.reset, diff --git a/src/mcp.zig b/src/mcp.zig index 2318b57..99397e0 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -627,7 +627,7 @@ pub const tools_list = \\{"name":"codedb_diagnostics","description":"Fetch the latest linter diagnostics for a file, produced off the edit path (ruff/biome/etc.) after a recent codedb_edit. Call right after an edit to surface real errors the change may have introduced (undefined names, type/lint issues) on top of codedb's built-in checks. Returns 'no diagnostics available yet' when none are cached or external linters are disabled.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to fetch diagnostics for"}},"required":["path"]}}, \\{"name":"codedb_hot","description":"Recently modified files, newest first — reach for this to see WHERE work is happening before searching an unfamiliar or mid-sprint codebase.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, \\{"name":"codedb_deps","description":"PRIMARY tool for impact/blast-radius — use this instead of grepping import lines. Dependency graph: who imports a file (default) or what a file imports (direction=depends_on). Set transitive=true for the full BFS blast radius.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"direction":{"type":"string","enum":["imported_by","depends_on"],"description":"imported_by (default): who imports this file. depends_on: what this file imports."},"transitive":{"type":"boolean","description":"Follow dependency chain transitively (default: false)"},"max_depth":{"type":"integer","description":"Max traversal depth for transitive queries (default: unlimited)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, - \\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, + \\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"raw":{"type":"boolean","description":"Byte-exact output: no line-number prefixes and no hash header, so the result can feed an exact-string edit (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}}, \\{"name":"codedb_edit","description":"Fallback editor — prefer your own native file-editing tool. codedb is a context/navigation tool, not an editor; reach for codedb_edit only when no native edit capability is available. When you do edit through codedb, op=str_replace with old_string/new_string is safest (old_string must match exactly once) — it cannot mis-target surrounding lines the way a range replace can. Also supports line ops: replace (range), insert (after line), delete (range), and create (author a new file from content). The result includes a syntax-health warning if the edit unbalances delimiters or drops a still-used import — heed it and re-read before continuing. Pass if_hash from the latest codedb_read to reject stale-line edits. Set dry_run=true for a diff preview.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["str_replace","replace","insert","delete","create"],"description":"Edit operation. str_replace=anchored (old_string/new_string); replace/delete use range; insert uses after; create=author a NEW file from content (errors if the path already exists)."},"content":{"type":"string","description":"New content (for replace/insert/create)"},"old_string":{"type":"string","description":"For op=str_replace: exact text to find; must occur exactly once in the file."},"new_string":{"type":"string","description":"For op=str_replace: replacement text for old_string."},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"},"if_hash":{"type":"string","description":"Hex hash from codedb_read's 'hash:' line. Edit is rejected with HashMismatch if the file has changed since."},"dry_run":{"type":"boolean","description":"If true, return a diff preview without writing. Disk and store are untouched. Default: false."}},"required":["path","op"]}}, \\{"name":"codedb_changes","description":"Direct way to see WHAT changed since a point in time, instead of re-scanning the tree. Files changed since a given sequence number. Pair with codedb_status (which reports the current sequence number) to poll for updates.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}}, \\{"name":"codedb_status","description":"Current indexed-file count, sequence number, and scan phase.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}}, @@ -2956,6 +2956,9 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object const line_start_raw = getInt(args, "line_start"); const line_end_raw = getInt(args, "line_end"); const compact = getBool(args, "compact"); + // #632: byte-exact ranged read — no line-number prefixes, no hash header — so + // the output can feed an exact-string editor instead of forcing a native read. + const raw = getBool(args, "raw"); const has_range = line_start_raw != null or line_end_raw != null; // Bug 6: validate line range explicitly. Pre-fix: invalid ranges silently @@ -3038,22 +3041,27 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object } } - // Always prepend hash - const w = cio.listWriter(out, alloc); - w.print("hash:{s}\n", .{hash_str}) catch {}; + // Prepend a content-hash ETag header — but NOT in raw mode (#632), where the + // caller wants byte-exact bytes it can feed to an exact-string edit. + if (!raw) { + const w = cio.listWriter(out, alloc); + w.print("hash:{s}\n", .{hash_str}) catch {}; + } if (has_range or compact) { const start: u32 = if (line_start_raw) |n| @intCast(@min(@max(1, n), std.math.maxInt(u32))) else 1; const end: u32 = if (line_end_raw) |n| @intCast(@min(@max(1, n), std.math.maxInt(u32))) else std.math.maxInt(u32); const lang = explore_mod.detectLanguage(path); - const extracted = explore_mod.extractLines(content, start, end, true, compact, lang, alloc) catch { + const extracted = explore_mod.extractLines(content, start, end, !raw, compact, lang, alloc) catch { out.appendSlice(alloc, "error: line extraction failed") catch {}; return; }; defer alloc.free(extracted); out.appendSlice(alloc, extracted) catch {}; } else { - if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {}; + if (!raw) { + if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {}; + } out.appendSlice(alloc, content) catch {}; } } diff --git a/src/test_mcp.zig b/src/test_mcp.zig index e9b2dc2..0f1fee0 100644 --- a/src/test_mcp.zig +++ b/src/test_mcp.zig @@ -2523,3 +2523,142 @@ test "issue-624: convergence nudge is suppressed for format=json" { try testing.expect(MCP.convergenceNudge(warn - 1, false) == null); try testing.expect(MCP.convergenceNudge(warn - 1, true) == null); } + +test "issue-632: codedb_read raw mode returns byte-exact range without line-number prefixes" { + // #632: a ranged codedb_read emits line-number-prefixed output (handleRead + // hardcodes extractLines line_numbers=true), so its bytes are NOT a verbatim + // copy of the source. Agents therefore can't feed it to an exact-string + // editor and fall back to a native read for the pre-edit span — defeating + // codedb on the read path (see justrach/codegraff#66). A `raw` mode should + // return the exact lines so codedb can serve read+edit, not just locate. + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + const rel = "small.txt"; + const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel }); + defer testing.allocator.free(full); + { + const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true }); + defer f.close(io); + try f.writePositionalAll(io, "alpha\nbeta\ngamma\n", 0); + } + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(io, dir_path); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + // raw=true + a line range: expect the exact source bytes for lines 1-2, + // with NO "N | " line-number prefixes (so it can feed an exact-match edit). + const args_json = try std.fmt.allocPrint(testing.allocator, "{{\"path\":\"{s}\",\"line_start\":1,\"line_end\":2,\"raw\":true}}", .{rel}); + defer testing.allocator.free(args_json); + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents); + + // The exact source lines must be present verbatim... + try testing.expect(std.mem.indexOf(u8, out.items, "alpha\nbeta") != null); + // ...and there must be no line-number prefix separator ("N | "), which would + // make the output non-byte-exact and unusable for an exact-string edit. + try testing.expect(std.mem.indexOf(u8, out.items, " | ") == null); +} + +test "issue-633: `index` is a recognized command (not a usage/unknown error)" { + // `codedb index` scanned + persisted (the cold-load path keys on + // cmd=="index") but then fell through the dispatch with no `index` branch → + // "unknown command: index" + exit 1; and `codedb index` (no root) was a + // usage error because isCommand() never listed it. `index` must parse as a + // first-class command. + // `codedb index` (no explicit root) must parse as cmd=index, root=".". + const p = main_mod.parsePositional(&[_][]const u8{ "codedb", "index" }); + try testing.expect(!p.usage_exit); + try testing.expectEqualStrings("index", p.cmd); + try testing.expectEqualStrings(".", p.root); + + // `codedb index` (explicit root) also resolves cmd=index. + const p2 = main_mod.parsePositional(&[_][]const u8{ "codedb", "/proj", "index" }); + try testing.expectEqualStrings("index", p2.cmd); + try testing.expectEqualStrings("/proj", p2.root); +} + +test "issue-632: codedb_read raw mode coverage — full-file byte-exact, default unchanged" { + // Broader coverage for #632: (a) raw full-file read is byte-exact (no hash + // header, no line-number prefix, no full-file hint); (b) the default ranged + // read still has BOTH the hash header and the "N | " prefix (regression + // guard); (c) a raw ranged read drops both. + var tmp_dir = testing.tmpDir(.{}); + defer tmp_dir.cleanup(); + + var dir_buf: [std.fs.max_path_bytes]u8 = undefined; + const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf); + const dir_path = dir_buf[0..dir_path_len]; + + const rel = "small.txt"; + const content = "alpha\nbeta\ngamma\n"; + const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel }); + defer testing.allocator.free(full); + { + const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true }); + defer f.close(io); + try f.writePositionalAll(io, content, 0); + } + + var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer explorer.deinit(); + explorer.setRoot(io, dir_path); + var store = Store.init(testing.allocator); + defer store.deinit(); + var agents = AgentRegistry.init(testing.allocator); + defer agents.deinit(); + _ = try agents.register("__filesystem__"); + + var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY); + defer bench_ctx.deinit(); + + const Run = struct { + fn call(ctx: *mcp_mod.BenchContext, st: *Store, ex: *Explorer, ag: *AgentRegistry, args_json: []const u8, out: *std.ArrayList(u8)) !void { + const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{}); + defer parsed.deinit(); + ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, out, st, ex, ag); + } + }; + + // (a) raw full-file read → byte-exact copy of the source, nothing prepended. + { + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"raw\":true}", &out); + try testing.expectEqualStrings(content, out.items); + } + // (b) default ranged read → unchanged: hash header present AND "N | " prefix. + { + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"line_start\":1,\"line_end\":2}", &out); + try testing.expect(std.mem.indexOf(u8, out.items, "hash:") != null); + try testing.expect(std.mem.indexOf(u8, out.items, " | ") != null); + } + // (c) raw ranged read → exact line, no hash header, no line-number prefix. + { + var out: std.ArrayList(u8) = .empty; + defer out.deinit(testing.allocator); + try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"line_start\":2,\"line_end\":2,\"raw\":true}", &out); + try testing.expect(std.mem.indexOf(u8, out.items, "beta") != null); + try testing.expect(std.mem.indexOf(u8, out.items, " | ") == null); + try testing.expect(std.mem.indexOf(u8, out.items, "hash:") == null); + } +}