justrach · justrach · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026 · Jun 21, 2026
diff --git a/src/cli_args.zig b/src/cli_args.zig
@@ -204,7 +204,7 @@ pub fn isValidMcpFlag(arg: []const u8) bool {
 fn isCommand(arg: []const u8) bool {
     // cli_query_cmds is the shared query-command table (see its doc); only the
     // non-query commands are listed here.
-    const commands = cli_query_cmds ++ [_][]const u8{ "snapshot", "serve", "mcp", "update", "nuke", "cli-daemon" };
+    const commands = cli_query_cmds ++ [_][]const u8{ "snapshot", "serve", "mcp", "update", "nuke", "cli-daemon", "index" };
     for (commands) |c| {
         if (std.mem.eql(u8, arg, c)) return true;
     }

diff --git a/src/main.zig b/src/main.zig
@@ -467,6 +467,19 @@ fn mainImpl() !void {
         try commands.runServe(&ctx);
     } else if (std.mem.eql(u8, cmd, "mcp")) {
         try commands.runMcp(&ctx);
+    } else if (std.mem.eql(u8, cmd, "index")) {
+        // #633: `index` is a first-class command. coldLoadOrScan above already
+        // scanned + persisted the on-disk index for this cmd; confirm and exit
+        // cleanly. It used to fall through to "unknown command: index" + exit 1
+        // even though the index had been built.
+        explorer.mu.lockShared();
+        const file_count = explorer.outlines.count();
+        explorer.mu.unlockShared();
+        out.p("{s}\xe2\x9c\x93{s} {s}index ready{s}  {s}{d} files{s}\n", .{
+            s.green, s.reset, s.bold, s.reset, s.dim, file_count, s.reset,
+        });
+        out.flush();
+        std.process.exit(0);
     } else {
         out.p("{s}\xe2\x9c\x97{s} unknown command: {s}{s}{s}\n", .{
             s.red, s.reset, s.bold, cmd, s.reset,

diff --git a/src/mcp.zig b/src/mcp.zig
@@ -627,7 +627,7 @@ pub const tools_list =
     \\{"name":"codedb_diagnostics","description":"Fetch the latest linter diagnostics for a file, produced off the edit path (ruff/biome/etc.) after a recent codedb_edit. Call right after an edit to surface real errors the change may have introduced (undefined names, type/lint issues) on top of codedb's built-in checks. Returns 'no diagnostics available yet' when none are cached or external linters are disabled.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to fetch diagnostics for"}},"required":["path"]}},
     \\{"name":"codedb_hot","description":"Recently modified files, newest first — reach for this to see WHERE work is happening before searching an unfamiliar or mid-sprint codebase.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
     \\{"name":"codedb_deps","description":"PRIMARY tool for impact/blast-radius — use this instead of grepping import lines. Dependency graph: who imports a file (default) or what a file imports (direction=depends_on). Set transitive=true for the full BFS blast radius.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"direction":{"type":"string","enum":["imported_by","depends_on"],"description":"imported_by (default): who imports this file. depends_on: what this file imports."},"transitive":{"type":"boolean","description":"Follow dependency chain transitively (default: false)"},"max_depth":{"type":"integer","description":"Max traversal depth for transitive queries (default: unlimited)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
-    \\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
+    \\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"raw":{"type":"boolean","description":"Byte-exact output: no line-number prefixes and no hash header, so the result can feed an exact-string edit (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
     \\{"name":"codedb_edit","description":"Fallback editor — prefer your own native file-editing tool. codedb is a context/navigation tool, not an editor; reach for codedb_edit only when no native edit capability is available. When you do edit through codedb, op=str_replace with old_string/new_string is safest (old_string must match exactly once) — it cannot mis-target surrounding lines the way a range replace can. Also supports line ops: replace (range), insert (after line), delete (range), and create (author a new file from content). The result includes a syntax-health warning if the edit unbalances delimiters or drops a still-used import — heed it and re-read before continuing. Pass if_hash from the latest codedb_read to reject stale-line edits. Set dry_run=true for a diff preview.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["str_replace","replace","insert","delete","create"],"description":"Edit operation. str_replace=anchored (old_string/new_string); replace/delete use range; insert uses after; create=author a NEW file from content (errors if the path already exists)."},"content":{"type":"string","description":"New content (for replace/insert/create)"},"old_string":{"type":"string","description":"For op=str_replace: exact text to find; must occur exactly once in the file."},"new_string":{"type":"string","description":"For op=str_replace: replacement text for old_string."},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"},"if_hash":{"type":"string","description":"Hex hash from codedb_read's 'hash:' line. Edit is rejected with HashMismatch if the file has changed since."},"dry_run":{"type":"boolean","description":"If true, return a diff preview without writing. Disk and store are untouched. Default: false."}},"required":["path","op"]}},
     \\{"name":"codedb_changes","description":"Direct way to see WHAT changed since a point in time, instead of re-scanning the tree. Files changed since a given sequence number. Pair with codedb_status (which reports the current sequence number) to poll for updates.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}},
     \\{"name":"codedb_status","description":"Current indexed-file count, sequence number, and scan phase.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
@@ -2956,6 +2956,9 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object
     const line_start_raw = getInt(args, "line_start");
     const line_end_raw = getInt(args, "line_end");
     const compact = getBool(args, "compact");
+    // #632: byte-exact ranged read — no line-number prefixes, no hash header — so
+    // the output can feed an exact-string editor instead of forcing a native read.
+    const raw = getBool(args, "raw");
     const has_range = line_start_raw != null or line_end_raw != null;
 
     // Bug 6: validate line range explicitly. Pre-fix: invalid ranges silently
@@ -3038,22 +3041,27 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object
         }
     }
 
-    // Always prepend hash
-    const w = cio.listWriter(out, alloc);
-    w.print("hash:{s}\n", .{hash_str}) catch {};
+    // Prepend a content-hash ETag header — but NOT in raw mode (#632), where the
+    // caller wants byte-exact bytes it can feed to an exact-string edit.
+    if (!raw) {
+        const w = cio.listWriter(out, alloc);
+        w.print("hash:{s}\n", .{hash_str}) catch {};
+    }
 
     if (has_range or compact) {
         const start: u32 = if (line_start_raw) |n| @intCast(@min(@max(1, n), std.math.maxInt(u32))) else 1;
         const end: u32 = if (line_end_raw) |n| @intCast(@min(@max(1, n), std.math.maxInt(u32))) else std.math.maxInt(u32);
         const lang = explore_mod.detectLanguage(path);
-        const extracted = explore_mod.extractLines(content, start, end, true, compact, lang, alloc) catch {
+        const extracted = explore_mod.extractLines(content, start, end, !raw, compact, lang, alloc) catch {
             out.appendSlice(alloc, "error: line extraction failed") catch {};
             return;
         };
         defer alloc.free(extracted);
         out.appendSlice(alloc, extracted) catch {};
     } else {
-        if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {};
+        if (!raw) {
+            if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {};
+        }
         out.appendSlice(alloc, content) catch {};
     }
 }

diff --git a/src/test_mcp.zig b/src/test_mcp.zig
@@ -2523,3 +2523,142 @@ test "issue-624: convergence nudge is suppressed for format=json" {
     try testing.expect(MCP.convergenceNudge(warn - 1, false) == null);
     try testing.expect(MCP.convergenceNudge(warn - 1, true) == null);
 }
+
+test "issue-632: codedb_read raw mode returns byte-exact range without line-number prefixes" {
+    // #632: a ranged codedb_read emits line-number-prefixed output (handleRead
+    // hardcodes extractLines line_numbers=true), so its bytes are NOT a verbatim
+    // copy of the source. Agents therefore can't feed it to an exact-string
+    // editor and fall back to a native read for the pre-edit span — defeating
+    // codedb on the read path (see justrach/codegraff#66). A `raw` mode should
+    // return the exact lines so codedb can serve read+edit, not just locate.
+    var tmp_dir = testing.tmpDir(.{});
+    defer tmp_dir.cleanup();
+
+    var dir_buf: [std.fs.max_path_bytes]u8 = undefined;
+    const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf);
+    const dir_path = dir_buf[0..dir_path_len];
+
+    const rel = "small.txt";
+    const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel });
+    defer testing.allocator.free(full);
+    {
+        const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true });
+        defer f.close(io);
+        try f.writePositionalAll(io, "alpha\nbeta\ngamma\n", 0);
+    }
+
+    var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
+    defer explorer.deinit();
+    explorer.setRoot(io, dir_path);
+    var store = Store.init(testing.allocator);
+    defer store.deinit();
+    var agents = AgentRegistry.init(testing.allocator);
+    defer agents.deinit();
+    _ = try agents.register("__filesystem__");
+
+    var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
+    defer bench_ctx.deinit();
+
+    // raw=true + a line range: expect the exact source bytes for lines 1-2,
+    // with NO "N | " line-number prefixes (so it can feed an exact-match edit).
+    const args_json = try std.fmt.allocPrint(testing.allocator, "{{\"path\":\"{s}\",\"line_start\":1,\"line_end\":2,\"raw\":true}}", .{rel});
+    defer testing.allocator.free(args_json);
+    const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{});
+    defer parsed.deinit();
+
+    var out: std.ArrayList(u8) = .empty;
+    defer out.deinit(testing.allocator);
+    bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents);
+
+    // The exact source lines must be present verbatim...
+    try testing.expect(std.mem.indexOf(u8, out.items, "alpha\nbeta") != null);
+    // ...and there must be no line-number prefix separator ("N | "), which would
+    // make the output non-byte-exact and unusable for an exact-string edit.
+    try testing.expect(std.mem.indexOf(u8, out.items, " | ") == null);
+}
+
+test "issue-633: `index` is a recognized command (not a usage/unknown error)" {
+    // `codedb <root> index` scanned + persisted (the cold-load path keys on
+    // cmd=="index") but then fell through the dispatch with no `index` branch →
+    // "unknown command: index" + exit 1; and `codedb index` (no root) was a
+    // usage error because isCommand() never listed it. `index` must parse as a
+    // first-class command.
+    // `codedb index` (no explicit root) must parse as cmd=index, root=".".
+    const p = main_mod.parsePositional(&[_][]const u8{ "codedb", "index" });
+    try testing.expect(!p.usage_exit);
+    try testing.expectEqualStrings("index", p.cmd);
+    try testing.expectEqualStrings(".", p.root);
+
+    // `codedb <root> index` (explicit root) also resolves cmd=index.
+    const p2 = main_mod.parsePositional(&[_][]const u8{ "codedb", "/proj", "index" });
+    try testing.expectEqualStrings("index", p2.cmd);
+    try testing.expectEqualStrings("/proj", p2.root);
+}
+
+test "issue-632: codedb_read raw mode coverage — full-file byte-exact, default unchanged" {
+    // Broader coverage for #632: (a) raw full-file read is byte-exact (no hash
+    // header, no line-number prefix, no full-file hint); (b) the default ranged
+    // read still has BOTH the hash header and the "N | " prefix (regression
+    // guard); (c) a raw ranged read drops both.
+    var tmp_dir = testing.tmpDir(.{});
+    defer tmp_dir.cleanup();
+
+    var dir_buf: [std.fs.max_path_bytes]u8 = undefined;
+    const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf);
+    const dir_path = dir_buf[0..dir_path_len];
+
+    const rel = "small.txt";
+    const content = "alpha\nbeta\ngamma\n";
+    const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel });
+    defer testing.allocator.free(full);
+    {
+        const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true });
+        defer f.close(io);
+        try f.writePositionalAll(io, content, 0);
+    }
+
+    var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
+    defer explorer.deinit();
+    explorer.setRoot(io, dir_path);
+    var store = Store.init(testing.allocator);
+    defer store.deinit();
+    var agents = AgentRegistry.init(testing.allocator);
+    defer agents.deinit();
+    _ = try agents.register("__filesystem__");
+
+    var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
+    defer bench_ctx.deinit();
+
+    const Run = struct {
+        fn call(ctx: *mcp_mod.BenchContext, st: *Store, ex: *Explorer, ag: *AgentRegistry, args_json: []const u8, out: *std.ArrayList(u8)) !void {
+            const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{});
+            defer parsed.deinit();
+            ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, out, st, ex, ag);
+        }
+    };
+
+    // (a) raw full-file read → byte-exact copy of the source, nothing prepended.
+    {
+        var out: std.ArrayList(u8) = .empty;
+        defer out.deinit(testing.allocator);
+        try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"raw\":true}", &out);
+        try testing.expectEqualStrings(content, out.items);
+    }
+    // (b) default ranged read → unchanged: hash header present AND "N | " prefix.
+    {
+        var out: std.ArrayList(u8) = .empty;
+        defer out.deinit(testing.allocator);
+        try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"line_start\":1,\"line_end\":2}", &out);
+        try testing.expect(std.mem.indexOf(u8, out.items, "hash:") != null);
+        try testing.expect(std.mem.indexOf(u8, out.items, " | ") != null);
+    }
+    // (c) raw ranged read → exact line, no hash header, no line-number prefix.
+    {
+        var out: std.ArrayList(u8) = .empty;
+        defer out.deinit(testing.allocator);
+        try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"line_start\":2,\"line_end\":2,\"raw\":true}", &out);
+        try testing.expect(std.mem.indexOf(u8, out.items, "beta") != null);
+        try testing.expect(std.mem.indexOf(u8, out.items, " | ") == null);
+        try testing.expect(std.mem.indexOf(u8, out.items, "hash:") == null);
+    }
+}