From ca5be1e94992d9b8b24fa1c2d12b4c1b5e591bc5 Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Fri, 19 Jun 2026 11:37:11 +0800 Subject: [PATCH 1/2] fix(mcp): accept absolute paths inside the project root (#629) isPathSafe rejected every absolute path with a blanket leading-`/` check, so codedb_read / codedb_edit returned "path traversal not allowed" for a file that lives *inside* the indexed project. Agents hold absolute paths, hit the terse error, and abandon codedb for bash (the #626 trajectory shows codedb!,codedb! then seven bash calls). Add projectRelPath(path, root): a safe relative path passes through; an absolute path is accepted only when it is exact-or-child of the project root and is rewritten to its relative form; out-of-root absolutes, `..` traversal, nulls and backslashes stay rejected. handleRead/handleEdit resolve the path through it using cwd's realpath as the root. isExactOrChild is now pub for reuse. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/mcp.zig | 40 ++++++++++++++++++++++++++++++++++------ src/root_policy.zig | 2 +- src/test_mcp.zig | 19 +++++++++++++++++++ 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/mcp.zig b/src/mcp.zig index 8a9c81bb..10b446e0 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -2798,15 +2798,17 @@ fn handleDepsPathOnly(alloc: std.mem.Allocator, path: []const u8, out: *std.Arra } fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *std.ArrayList(u8), explorer: *Explorer) void { - const path = getStr(args, "path") orelse { + const path_arg = getStr(args, "path") orelse { out.appendSlice(alloc, "error: missing 'path' argument") catch {}; appendBundleArgKeysDiagnostic(alloc, out, args); return; }; - if (!isPathSafe(path)) { + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root: []const u8 = if (std.Io.Dir.cwd().realPathFile(io, ".", &root_buf)) |n| root_buf[0..n] else |_| ""; + const path = projectRelPath(path_arg, root) orelse { out.appendSlice(alloc, "error: path traversal not allowed") catch {}; return; - } + }; if (watcher.isSensitivePath(path)) { out.appendSlice(alloc, "error: access to sensitive file blocked") catch {}; return; @@ -2918,14 +2920,16 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object } fn handleEdit(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *std.ArrayList(u8), store: *Store, explorer: *Explorer, agents: *AgentRegistry, cache: *ProjectCache, edit_agent_id: u64) void { - const path = getStr(args, "path") orelse { + const path_arg = getStr(args, "path") orelse { out.appendSlice(alloc, "error: missing 'path'") catch {}; return; }; - if (!isPathSafe(path)) { + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root: []const u8 = if (std.Io.Dir.cwd().realPathFile(io, ".", &root_buf)) |n| root_buf[0..n] else |_| ""; + const path = projectRelPath(path_arg, root) orelse { out.appendSlice(alloc, "error: path traversal not allowed") catch {}; return; - } + }; if (watcher.isSensitivePath(path)) { out.appendSlice(alloc, "error: access to sensitive file blocked") catch {}; return; @@ -5034,6 +5038,30 @@ pub fn isPathSafe(path: []const u8) bool { return true; } +/// Map a read/edit `path` argument to a project-relative path that is safe to +/// resolve, given the project's absolute `root`. A safe relative path passes +/// through unchanged; an absolute path is accepted only when it lives inside +/// `root` and is rewritten to its relative form. Everything else (out-of-root +/// absolutes, `..` traversal, null bytes, backslashes) returns null. +/// +/// Without this, isPathSafe rejects every absolute path as traversal, so agents +/// that pass absolute paths get "path traversal not allowed" and abandon codedb +/// for bash (issue #629). +pub fn projectRelPath(path: []const u8, root: []const u8) ?[]const u8 { + // Already a safe relative path — pass through. + if (isPathSafe(path)) return path; + // Only absolute paths get the in-root rescue; anything else stays rejected. + if (path.len == 0 or path[0] != '/') return null; + if (root.len == 0) return null; + if (!root_policy.isExactOrChild(path, root)) return null; + var rel = path[root.len..]; + while (rel.len > 0 and rel[0] == '/') rel = rel[1..]; + if (rel.len == 0) return null; // the root directory itself, not a file + // Re-validate the stripped remainder (blocks `/root/../escape`, nulls, etc). + if (!isPathSafe(rel)) return null; + return rel; +} + fn writeResult(alloc: std.mem.Allocator, stdout: cio.File, id: ?std.json.Value, result: []const u8) void { var buf: std.ArrayList(u8) = .empty; defer buf.deinit(alloc); diff --git a/src/root_policy.zig b/src/root_policy.zig index 3abe1403..698a8795 100644 --- a/src/root_policy.zig +++ b/src/root_policy.zig @@ -1,7 +1,7 @@ const std = @import("std"); const cio = @import("cio.zig"); -fn isExactOrChild(path: []const u8, prefix: []const u8) bool { +pub fn isExactOrChild(path: []const u8, prefix: []const u8) bool { if (!std.mem.startsWith(u8, path, prefix)) return false; return path.len == prefix.len or path[prefix.len] == '/'; } diff --git a/src/test_mcp.zig b/src/test_mcp.zig index 7a851b3d..b70beebc 100644 --- a/src/test_mcp.zig +++ b/src/test_mcp.zig @@ -149,6 +149,25 @@ test "issue-93: isPathSafe blocks traversal" { try testing.expect(MCP.isPathSafe("README.md")); } +test "issue-629: projectRelPath accepts absolute paths inside the project root" { + const MCP = @import("mcp.zig"); + const root = "/Users/dev/project"; + + // Regression: codedb returned "path traversal not allowed" for ANY absolute + // path, so agents that hold absolute paths abandoned codedb for bash (see + // the #626 trajectory: codedb!,codedb! then seven bash calls). An absolute + // path inside the project root must resolve to its project-relative form. + try testing.expectEqualStrings("src/main.zig", MCP.projectRelPath("/Users/dev/project/src/main.zig", root).?); + // A safe relative path passes through unchanged. + try testing.expectEqualStrings("src/main.zig", MCP.projectRelPath("src/main.zig", root).?); + + // Security must still hold: + try testing.expect(MCP.projectRelPath("/etc/passwd", root) == null); // outside the root + try testing.expect(MCP.projectRelPath("/Users/dev/project/../secret", root) == null); // escapes via .. + try testing.expect(MCP.projectRelPath("../../../etc/passwd", root) == null); // relative traversal + try testing.expect(MCP.projectRelPath("/Users/dev/projectile/secret", root) == null); // sibling prefix, not a child +} + test "auto-update: shouldRunAutoUpdate gates correctly" { const day_ms: i64 = 24 * 60 * 60 * 1000; From 31233f95ff768fb74000a0d0992702ed66016aad Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Fri, 19 Jun 2026 11:37:11 +0800 Subject: [PATCH 2/2] fix(index): alternation with a no-trigram branch must scan all (#628) In `mode=regex`, a top-level alternation like `xy|createGateway` (or `.*|foo`) where one branch yields no trigrams was prefiltered down to only the trigram-bearing branches' candidate files. Files matching the no-trigram branch were silently dropped, so the search returned 0 matches and read as an authoritative "not found". decomposeRegex now detects any branch that produces zero trigrams and returns an unconstrained query, so candidatesRegex returns null and the search scans every file. Alternations where every branch has trigrams still prefilter as before. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/index.zig | 24 ++++++++++++++++++++++++ src/test_index.zig | 22 ++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/src/index.zig b/src/index.zig index 330dfdca..35f95a9e 100644 --- a/src/index.zig +++ b/src/index.zig @@ -2757,14 +2757,24 @@ pub fn decomposeRegex(pattern: []const u8, allocator: std.mem.Allocator) !RegexQ if (top_pipes.items.len > 0) { // Top-level alternation: merge all branch trigrams into a single OR group. // A file matching ANY branch's trigrams is a valid candidate. + // + // Soundness (#628): if ANY branch yields no trigrams (too short, or built + // from regex metachars like `.`/`.*`), that branch can match a line that + // contains none of the other branches' trigrams. Constraining candidates + // to only the trigram-bearing branches would then silently drop real + // matches — exactly the `a|b` "returns 0 and looks authoritative" bug. + // When that happens we cannot prefilter at all, so fall back to an + // unconstrained query (candidatesRegex returns null -> scan everything). var all_tris: std.ArrayList(Trigram) = .empty; errdefer all_tris.deinit(allocator); + var any_branch_empty = false; var start: usize = 0; for (top_pipes.items) |pipe_pos| { const branch = pattern[start..pipe_pos]; const branch_tris = try extractLiteralTrigrams(branch, allocator); defer allocator.free(branch_tris); + if (branch_tris.len == 0) any_branch_empty = true; for (branch_tris) |tri| { try all_tris.append(allocator, tri); } @@ -2774,10 +2784,24 @@ pub fn decomposeRegex(pattern: []const u8, allocator: std.mem.Allocator) !RegexQ const last_branch = pattern[start..]; const last_tris = try extractLiteralTrigrams(last_branch, allocator); defer allocator.free(last_tris); + if (last_tris.len == 0) any_branch_empty = true; for (last_tris) |tri| { try all_tris.append(allocator, tri); } + if (any_branch_empty) { + // Un-prefilterable alternation: scan everything rather than miss matches. + all_tris.deinit(allocator); + all_tris = .empty; + const e_and = try allocator.alloc(Trigram, 0); + const e_or = try allocator.alloc([]Trigram, 0); + return RegexQuery{ + .and_trigrams = e_and, + .or_groups = e_or, + .allocator = allocator, + }; + } + const empty_and = try allocator.alloc(Trigram, 0); var or_groups: std.ArrayList([]Trigram) = .empty; errdefer or_groups.deinit(allocator); diff --git a/src/test_index.zig b/src/test_index.zig index 8ae6ea86..6b606921 100644 --- a/src/test_index.zig +++ b/src/test_index.zig @@ -1273,6 +1273,28 @@ test "decomposeRegex: alternation creates OR groups" { try testing.expectEqual(@as(usize, 2), q.or_groups[0].len); } +test "issue-628: alternation with a no-trigram branch falls back to scan-all" { + // "xy" is too short to yield a trigram. Pre-fix, decomposeRegex constrained + // candidate files to "createGateway"'s trigrams, dropping any file that + // matched only the "xy" branch — a silent 0-match false negative. The query + // must instead be fully unconstrained so the search scans every file. + var q = try decomposeRegex("xy|createGateway", testing.allocator); + defer q.deinit(); + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q.or_groups.len); + + // A metachar-only branch (matches anywhere) must also force scan-all. + var q2 = try decomposeRegex(".*|createGateway", testing.allocator); + defer q2.deinit(); + try testing.expectEqual(@as(usize, 0), q2.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q2.or_groups.len); + + // Sanity: when every branch yields trigrams, prefiltering is still used. + var q3 = try decomposeRegex("generateText|streamText", testing.allocator); + defer q3.deinit(); + try testing.expectEqual(@as(usize, 1), q3.or_groups.len); +} + test "decomposeRegex: quantifier removes preceding char" { var q = try decomposeRegex("hel+o", testing.allocator);