Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/cli_args.zig
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ pub fn isValidMcpFlag(arg: []const u8) bool {
fn isCommand(arg: []const u8) bool {
// cli_query_cmds is the shared query-command table (see its doc); only the
// non-query commands are listed here.
const commands = cli_query_cmds ++ [_][]const u8{ "snapshot", "serve", "mcp", "update", "nuke", "cli-daemon" };
const commands = cli_query_cmds ++ [_][]const u8{ "snapshot", "serve", "mcp", "update", "nuke", "cli-daemon", "index" };
for (commands) |c| {
if (std.mem.eql(u8, arg, c)) return true;
}
Expand Down
13 changes: 13 additions & 0 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,19 @@ fn mainImpl() !void {
try commands.runServe(&ctx);
} else if (std.mem.eql(u8, cmd, "mcp")) {
try commands.runMcp(&ctx);
} else if (std.mem.eql(u8, cmd, "index")) {
// #633: `index` is a first-class command. coldLoadOrScan above already
// scanned + persisted the on-disk index for this cmd; confirm and exit
// cleanly. It used to fall through to "unknown command: index" + exit 1
// even though the index had been built.
explorer.mu.lockShared();
const file_count = explorer.outlines.count();
explorer.mu.unlockShared();
out.p("{s}\xe2\x9c\x93{s} {s}index ready{s} {s}{d} files{s}\n", .{
s.green, s.reset, s.bold, s.reset, s.dim, file_count, s.reset,
});
out.flush();
std.process.exit(0);
} else {
out.p("{s}\xe2\x9c\x97{s} unknown command: {s}{s}{s}\n", .{
s.red, s.reset, s.bold, cmd, s.reset,
Expand Down
20 changes: 14 additions & 6 deletions src/mcp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ pub const tools_list =
\\{"name":"codedb_diagnostics","description":"Fetch the latest linter diagnostics for a file, produced off the edit path (ruff/biome/etc.) after a recent codedb_edit. Call right after an edit to surface real errors the change may have introduced (undefined names, type/lint issues) on top of codedb's built-in checks. Returns 'no diagnostics available yet' when none are cached or external linters are disabled.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to fetch diagnostics for"}},"required":["path"]}},
\\{"name":"codedb_hot","description":"Recently modified files, newest first — reach for this to see WHERE work is happening before searching an unfamiliar or mid-sprint codebase.","inputSchema":{"type":"object","properties":{"limit":{"type":"integer","description":"Number of files to return (default: 10)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
\\{"name":"codedb_deps","description":"PRIMARY tool for impact/blast-radius — use this instead of grepping import lines. Dependency graph: who imports a file (default) or what a file imports (direction=depends_on). Set transitive=true for the full BFS blast radius.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to check dependencies for"},"direction":{"type":"string","enum":["imported_by","depends_on"],"description":"imported_by (default): who imports this file. depends_on: what this file imports."},"transitive":{"type":"boolean","description":"Follow dependency chain transitively (default: false)"},"max_depth":{"type":"integer","description":"Max traversal depth for transitive queries (default: unlimited)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
\\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
\\{"name":"codedb_read","description":"Read file contents, optionally a line range. Run codedb_outline first to pick the range — large files burn tokens fast. Pass if_hash to skip re-reads when the file is unchanged.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path relative to project root"},"line_start":{"type":"integer","description":"Start line (1-indexed, inclusive). Omit for full file."},"line_end":{"type":"integer","description":"End line (1-indexed, inclusive). Omit to read to EOF."},"if_hash":{"type":"string","description":"Previous content hash. If unchanged, returns short 'unchanged:HASH' response."},"compact":{"type":"boolean","description":"Skip comment and blank lines (default: false)"},"raw":{"type":"boolean","description":"Byte-exact output: no line-number prefixes and no hash header, so the result can feed an exact-string edit (default: false)"},"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":["path"]}},
\\{"name":"codedb_edit","description":"Fallback editor — prefer your own native file-editing tool. codedb is a context/navigation tool, not an editor; reach for codedb_edit only when no native edit capability is available. When you do edit through codedb, op=str_replace with old_string/new_string is safest (old_string must match exactly once) — it cannot mis-target surrounding lines the way a range replace can. Also supports line ops: replace (range), insert (after line), delete (range), and create (author a new file from content). The result includes a syntax-health warning if the edit unbalances delimiters or drops a still-used import — heed it and re-read before continuing. Pass if_hash from the latest codedb_read to reject stale-line edits. Set dry_run=true for a diff preview.","inputSchema":{"type":"object","properties":{"path":{"type":"string","description":"File path to edit"},"op":{"type":"string","enum":["str_replace","replace","insert","delete","create"],"description":"Edit operation. str_replace=anchored (old_string/new_string); replace/delete use range; insert uses after; create=author a NEW file from content (errors if the path already exists)."},"content":{"type":"string","description":"New content (for replace/insert/create)"},"old_string":{"type":"string","description":"For op=str_replace: exact text to find; must occur exactly once in the file."},"new_string":{"type":"string","description":"For op=str_replace: replacement text for old_string."},"range_start":{"type":"integer","description":"Start line number (for replace/delete, 1-indexed)"},"range_end":{"type":"integer","description":"End line number (for replace/delete, 1-indexed)"},"after":{"type":"integer","description":"Insert after this line number (for insert)"},"if_hash":{"type":"string","description":"Hex hash from codedb_read's 'hash:' line. Edit is rejected with HashMismatch if the file has changed since."},"dry_run":{"type":"boolean","description":"If true, return a diff preview without writing. Disk and store are untouched. Default: false."}},"required":["path","op"]}},
\\{"name":"codedb_changes","description":"Direct way to see WHAT changed since a point in time, instead of re-scanning the tree. Files changed since a given sequence number. Pair with codedb_status (which reports the current sequence number) to poll for updates.","inputSchema":{"type":"object","properties":{"since":{"type":"integer","description":"Sequence number to get changes since (default: 0)"}},"required":[]}},
\\{"name":"codedb_status","description":"Current indexed-file count, sequence number, and scan phase.","inputSchema":{"type":"object","properties":{"project":{"type":"string","description":"Optional absolute path to a different project (must have codedb.snapshot)"}},"required":[]}},
Expand Down Expand Up @@ -2956,6 +2956,9 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object
const line_start_raw = getInt(args, "line_start");
const line_end_raw = getInt(args, "line_end");
const compact = getBool(args, "compact");
// #632: byte-exact ranged read — no line-number prefixes, no hash header — so
// the output can feed an exact-string editor instead of forcing a native read.
const raw = getBool(args, "raw");

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Honor raw mode on cached reads

When the requested file is already in Explorer.contents (the normal post-scan MCP path, since indexed files are stored in the content cache), this new raw flag is ignored because renderCachedRead returns before the raw-aware rendering below and ReadRenderOptions has no raw field. In that scenario codedb_read with raw:true still emits the hash: header and N | line prefixes, so the byte-exact mode only works on the disk fallback path; the added tests miss this because they never index the file before reading it.

Useful? React with 👍 / 👎.

const has_range = line_start_raw != null or line_end_raw != null;

// Bug 6: validate line range explicitly. Pre-fix: invalid ranges silently
Expand Down Expand Up @@ -3038,22 +3041,27 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object
}
}

// Always prepend hash
const w = cio.listWriter(out, alloc);
w.print("hash:{s}\n", .{hash_str}) catch {};
// Prepend a content-hash ETag header — but NOT in raw mode (#632), where the
// caller wants byte-exact bytes it can feed to an exact-string edit.
if (!raw) {
const w = cio.listWriter(out, alloc);
w.print("hash:{s}\n", .{hash_str}) catch {};
}

if (has_range or compact) {
const start: u32 = if (line_start_raw) |n| @intCast(@min(@max(1, n), std.math.maxInt(u32))) else 1;
const end: u32 = if (line_end_raw) |n| @intCast(@min(@max(1, n), std.math.maxInt(u32))) else std.math.maxInt(u32);
const lang = explore_mod.detectLanguage(path);
const extracted = explore_mod.extractLines(content, start, end, true, compact, lang, alloc) catch {
const extracted = explore_mod.extractLines(content, start, end, !raw, compact, lang, alloc) catch {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Preserve EOF without a final newline

For raw ranged reads that end at a file without a trailing newline, this still goes through extractLines, whose unnumbered path reprints every selected line with an added \n; for example, contents alpha\nbeta with line_start=1,line_end=2,raw=true returns alpha\nbeta\n. That extra byte makes the advertised byte-exact output unusable for exact-string edits at EOF, so raw mode should slice the original bytes for the requested range instead of formatting lines.

Useful? React with 👍 / 👎.

out.appendSlice(alloc, "error: line extraction failed") catch {};
return;
};
defer alloc.free(extracted);
out.appendSlice(alloc, extracted) catch {};
} else {
if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {};
if (!raw) {
if (Explorer.fullFileReadHint(content)) |hint| out.appendSlice(alloc, hint) catch {};
}
out.appendSlice(alloc, content) catch {};
}
}
Expand Down
139 changes: 139 additions & 0 deletions src/test_mcp.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2523,3 +2523,142 @@ test "issue-624: convergence nudge is suppressed for format=json" {
try testing.expect(MCP.convergenceNudge(warn - 1, false) == null);
try testing.expect(MCP.convergenceNudge(warn - 1, true) == null);
}

test "issue-632: codedb_read raw mode returns byte-exact range without line-number prefixes" {
// #632: a ranged codedb_read emits line-number-prefixed output (handleRead
// hardcodes extractLines line_numbers=true), so its bytes are NOT a verbatim
// copy of the source. Agents therefore can't feed it to an exact-string
// editor and fall back to a native read for the pre-edit span — defeating
// codedb on the read path (see justrach/codegraff#66). A `raw` mode should
// return the exact lines so codedb can serve read+edit, not just locate.
var tmp_dir = testing.tmpDir(.{});
defer tmp_dir.cleanup();

var dir_buf: [std.fs.max_path_bytes]u8 = undefined;
const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf);
const dir_path = dir_buf[0..dir_path_len];

const rel = "small.txt";
const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel });
defer testing.allocator.free(full);
{
const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true });
defer f.close(io);
try f.writePositionalAll(io, "alpha\nbeta\ngamma\n", 0);
}

var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer explorer.deinit();
explorer.setRoot(io, dir_path);
var store = Store.init(testing.allocator);
defer store.deinit();
var agents = AgentRegistry.init(testing.allocator);
defer agents.deinit();
_ = try agents.register("__filesystem__");

var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer bench_ctx.deinit();

// raw=true + a line range: expect the exact source bytes for lines 1-2,
// with NO "N | " line-number prefixes (so it can feed an exact-match edit).
const args_json = try std.fmt.allocPrint(testing.allocator, "{{\"path\":\"{s}\",\"line_start\":1,\"line_end\":2,\"raw\":true}}", .{rel});
defer testing.allocator.free(args_json);
const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{});
defer parsed.deinit();

var out: std.ArrayList(u8) = .empty;
defer out.deinit(testing.allocator);
bench_ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, &out, &store, &explorer, &agents);

// The exact source lines must be present verbatim...
try testing.expect(std.mem.indexOf(u8, out.items, "alpha\nbeta") != null);
// ...and there must be no line-number prefix separator ("N | "), which would
// make the output non-byte-exact and unusable for an exact-string edit.
try testing.expect(std.mem.indexOf(u8, out.items, " | ") == null);
}

test "issue-633: `index` is a recognized command (not a usage/unknown error)" {
// `codedb <root> index` scanned + persisted (the cold-load path keys on
// cmd=="index") but then fell through the dispatch with no `index` branch →
// "unknown command: index" + exit 1; and `codedb index` (no root) was a
// usage error because isCommand() never listed it. `index` must parse as a
// first-class command.
// `codedb index` (no explicit root) must parse as cmd=index, root=".".
const p = main_mod.parsePositional(&[_][]const u8{ "codedb", "index" });
try testing.expect(!p.usage_exit);
try testing.expectEqualStrings("index", p.cmd);
try testing.expectEqualStrings(".", p.root);

// `codedb <root> index` (explicit root) also resolves cmd=index.
const p2 = main_mod.parsePositional(&[_][]const u8{ "codedb", "/proj", "index" });
try testing.expectEqualStrings("index", p2.cmd);
try testing.expectEqualStrings("/proj", p2.root);
}

test "issue-632: codedb_read raw mode coverage — full-file byte-exact, default unchanged" {
// Broader coverage for #632: (a) raw full-file read is byte-exact (no hash
// header, no line-number prefix, no full-file hint); (b) the default ranged
// read still has BOTH the hash header and the "N | " prefix (regression
// guard); (c) a raw ranged read drops both.
var tmp_dir = testing.tmpDir(.{});
defer tmp_dir.cleanup();

var dir_buf: [std.fs.max_path_bytes]u8 = undefined;
const dir_path_len = try tmp_dir.dir.realPathFile(io, ".", &dir_buf);
const dir_path = dir_buf[0..dir_path_len];

const rel = "small.txt";
const content = "alpha\nbeta\ngamma\n";
const full = try std.fmt.allocPrint(testing.allocator, "{s}/{s}", .{ dir_path, rel });
defer testing.allocator.free(full);
{
const f = try std.Io.Dir.cwd().createFile(io, full, .{ .truncate = true });
defer f.close(io);
try f.writePositionalAll(io, content, 0);
}

var explorer = Explorer.init(testing.allocator, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer explorer.deinit();
explorer.setRoot(io, dir_path);
var store = Store.init(testing.allocator);
defer store.deinit();
var agents = AgentRegistry.init(testing.allocator);
defer agents.deinit();
_ = try agents.register("__filesystem__");

var bench_ctx = mcp_mod.BenchContext.init(testing.allocator, dir_path, Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);
defer bench_ctx.deinit();

const Run = struct {
fn call(ctx: *mcp_mod.BenchContext, st: *Store, ex: *Explorer, ag: *AgentRegistry, args_json: []const u8, out: *std.ArrayList(u8)) !void {
const parsed = try std.json.parseFromSlice(std.json.Value, testing.allocator, args_json, .{});
defer parsed.deinit();
ctx.runDispatch(io, testing.allocator, .codedb_read, &parsed.value.object, out, st, ex, ag);
}
};

// (a) raw full-file read → byte-exact copy of the source, nothing prepended.
{
var out: std.ArrayList(u8) = .empty;
defer out.deinit(testing.allocator);
try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"raw\":true}", &out);
try testing.expectEqualStrings(content, out.items);
}
// (b) default ranged read → unchanged: hash header present AND "N | " prefix.
{
var out: std.ArrayList(u8) = .empty;
defer out.deinit(testing.allocator);
try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"line_start\":1,\"line_end\":2}", &out);
try testing.expect(std.mem.indexOf(u8, out.items, "hash:") != null);
try testing.expect(std.mem.indexOf(u8, out.items, " | ") != null);
}
// (c) raw ranged read → exact line, no hash header, no line-number prefix.
{
var out: std.ArrayList(u8) = .empty;
defer out.deinit(testing.allocator);
try Run.call(&bench_ctx, &store, &explorer, &agents, "{\"path\":\"small.txt\",\"line_start\":2,\"line_end\":2,\"raw\":true}", &out);
try testing.expect(std.mem.indexOf(u8, out.items, "beta") != null);
try testing.expect(std.mem.indexOf(u8, out.items, " | ") == null);
try testing.expect(std.mem.indexOf(u8, out.items, "hash:") == null);
}
}
Loading