diff --git a/src/index.zig b/src/index.zig index 330dfdc..35f95a9 100644 --- a/src/index.zig +++ b/src/index.zig @@ -2757,14 +2757,24 @@ pub fn decomposeRegex(pattern: []const u8, allocator: std.mem.Allocator) !RegexQ if (top_pipes.items.len > 0) { // Top-level alternation: merge all branch trigrams into a single OR group. // A file matching ANY branch's trigrams is a valid candidate. + // + // Soundness (#628): if ANY branch yields no trigrams (too short, or built + // from regex metachars like `.`/`.*`), that branch can match a line that + // contains none of the other branches' trigrams. Constraining candidates + // to only the trigram-bearing branches would then silently drop real + // matches — exactly the `a|b` "returns 0 and looks authoritative" bug. + // When that happens we cannot prefilter at all, so fall back to an + // unconstrained query (candidatesRegex returns null -> scan everything). var all_tris: std.ArrayList(Trigram) = .empty; errdefer all_tris.deinit(allocator); + var any_branch_empty = false; var start: usize = 0; for (top_pipes.items) |pipe_pos| { const branch = pattern[start..pipe_pos]; const branch_tris = try extractLiteralTrigrams(branch, allocator); defer allocator.free(branch_tris); + if (branch_tris.len == 0) any_branch_empty = true; for (branch_tris) |tri| { try all_tris.append(allocator, tri); } @@ -2774,10 +2784,24 @@ pub fn decomposeRegex(pattern: []const u8, allocator: std.mem.Allocator) !RegexQ const last_branch = pattern[start..]; const last_tris = try extractLiteralTrigrams(last_branch, allocator); defer allocator.free(last_tris); + if (last_tris.len == 0) any_branch_empty = true; for (last_tris) |tri| { try all_tris.append(allocator, tri); } + if (any_branch_empty) { + // Un-prefilterable alternation: scan everything rather than miss matches. + all_tris.deinit(allocator); + all_tris = .empty; + const e_and = try allocator.alloc(Trigram, 0); + const e_or = try allocator.alloc([]Trigram, 0); + return RegexQuery{ + .and_trigrams = e_and, + .or_groups = e_or, + .allocator = allocator, + }; + } + const empty_and = try allocator.alloc(Trigram, 0); var or_groups: std.ArrayList([]Trigram) = .empty; errdefer or_groups.deinit(allocator); diff --git a/src/mcp.zig b/src/mcp.zig index 8a9c81b..10b446e 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -2798,15 +2798,17 @@ fn handleDepsPathOnly(alloc: std.mem.Allocator, path: []const u8, out: *std.Arra } fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *std.ArrayList(u8), explorer: *Explorer) void { - const path = getStr(args, "path") orelse { + const path_arg = getStr(args, "path") orelse { out.appendSlice(alloc, "error: missing 'path' argument") catch {}; appendBundleArgKeysDiagnostic(alloc, out, args); return; }; - if (!isPathSafe(path)) { + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root: []const u8 = if (std.Io.Dir.cwd().realPathFile(io, ".", &root_buf)) |n| root_buf[0..n] else |_| ""; + const path = projectRelPath(path_arg, root) orelse { out.appendSlice(alloc, "error: path traversal not allowed") catch {}; return; - } + }; if (watcher.isSensitivePath(path)) { out.appendSlice(alloc, "error: access to sensitive file blocked") catch {}; return; @@ -2918,14 +2920,16 @@ fn handleRead(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.Object } fn handleEdit(io: std.Io, alloc: std.mem.Allocator, args: *const std.json.ObjectMap, out: *std.ArrayList(u8), store: *Store, explorer: *Explorer, agents: *AgentRegistry, cache: *ProjectCache, edit_agent_id: u64) void { - const path = getStr(args, "path") orelse { + const path_arg = getStr(args, "path") orelse { out.appendSlice(alloc, "error: missing 'path'") catch {}; return; }; - if (!isPathSafe(path)) { + var root_buf: [std.fs.max_path_bytes]u8 = undefined; + const root: []const u8 = if (std.Io.Dir.cwd().realPathFile(io, ".", &root_buf)) |n| root_buf[0..n] else |_| ""; + const path = projectRelPath(path_arg, root) orelse { out.appendSlice(alloc, "error: path traversal not allowed") catch {}; return; - } + }; if (watcher.isSensitivePath(path)) { out.appendSlice(alloc, "error: access to sensitive file blocked") catch {}; return; @@ -5034,6 +5038,30 @@ pub fn isPathSafe(path: []const u8) bool { return true; } +/// Map a read/edit `path` argument to a project-relative path that is safe to +/// resolve, given the project's absolute `root`. A safe relative path passes +/// through unchanged; an absolute path is accepted only when it lives inside +/// `root` and is rewritten to its relative form. Everything else (out-of-root +/// absolutes, `..` traversal, null bytes, backslashes) returns null. +/// +/// Without this, isPathSafe rejects every absolute path as traversal, so agents +/// that pass absolute paths get "path traversal not allowed" and abandon codedb +/// for bash (issue #629). +pub fn projectRelPath(path: []const u8, root: []const u8) ?[]const u8 { + // Already a safe relative path — pass through. + if (isPathSafe(path)) return path; + // Only absolute paths get the in-root rescue; anything else stays rejected. + if (path.len == 0 or path[0] != '/') return null; + if (root.len == 0) return null; + if (!root_policy.isExactOrChild(path, root)) return null; + var rel = path[root.len..]; + while (rel.len > 0 and rel[0] == '/') rel = rel[1..]; + if (rel.len == 0) return null; // the root directory itself, not a file + // Re-validate the stripped remainder (blocks `/root/../escape`, nulls, etc). + if (!isPathSafe(rel)) return null; + return rel; +} + fn writeResult(alloc: std.mem.Allocator, stdout: cio.File, id: ?std.json.Value, result: []const u8) void { var buf: std.ArrayList(u8) = .empty; defer buf.deinit(alloc); diff --git a/src/root_policy.zig b/src/root_policy.zig index 3abe140..698a879 100644 --- a/src/root_policy.zig +++ b/src/root_policy.zig @@ -1,7 +1,7 @@ const std = @import("std"); const cio = @import("cio.zig"); -fn isExactOrChild(path: []const u8, prefix: []const u8) bool { +pub fn isExactOrChild(path: []const u8, prefix: []const u8) bool { if (!std.mem.startsWith(u8, path, prefix)) return false; return path.len == prefix.len or path[prefix.len] == '/'; } diff --git a/src/test_index.zig b/src/test_index.zig index 8ae6ea8..6b60692 100644 --- a/src/test_index.zig +++ b/src/test_index.zig @@ -1273,6 +1273,28 @@ test "decomposeRegex: alternation creates OR groups" { try testing.expectEqual(@as(usize, 2), q.or_groups[0].len); } +test "issue-628: alternation with a no-trigram branch falls back to scan-all" { + // "xy" is too short to yield a trigram. Pre-fix, decomposeRegex constrained + // candidate files to "createGateway"'s trigrams, dropping any file that + // matched only the "xy" branch — a silent 0-match false negative. The query + // must instead be fully unconstrained so the search scans every file. + var q = try decomposeRegex("xy|createGateway", testing.allocator); + defer q.deinit(); + try testing.expectEqual(@as(usize, 0), q.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q.or_groups.len); + + // A metachar-only branch (matches anywhere) must also force scan-all. + var q2 = try decomposeRegex(".*|createGateway", testing.allocator); + defer q2.deinit(); + try testing.expectEqual(@as(usize, 0), q2.and_trigrams.len); + try testing.expectEqual(@as(usize, 0), q2.or_groups.len); + + // Sanity: when every branch yields trigrams, prefiltering is still used. + var q3 = try decomposeRegex("generateText|streamText", testing.allocator); + defer q3.deinit(); + try testing.expectEqual(@as(usize, 1), q3.or_groups.len); +} + test "decomposeRegex: quantifier removes preceding char" { var q = try decomposeRegex("hel+o", testing.allocator); diff --git a/src/test_mcp.zig b/src/test_mcp.zig index 7a851b3..b70beeb 100644 --- a/src/test_mcp.zig +++ b/src/test_mcp.zig @@ -149,6 +149,25 @@ test "issue-93: isPathSafe blocks traversal" { try testing.expect(MCP.isPathSafe("README.md")); } +test "issue-629: projectRelPath accepts absolute paths inside the project root" { + const MCP = @import("mcp.zig"); + const root = "/Users/dev/project"; + + // Regression: codedb returned "path traversal not allowed" for ANY absolute + // path, so agents that hold absolute paths abandoned codedb for bash (see + // the #626 trajectory: codedb!,codedb! then seven bash calls). An absolute + // path inside the project root must resolve to its project-relative form. + try testing.expectEqualStrings("src/main.zig", MCP.projectRelPath("/Users/dev/project/src/main.zig", root).?); + // A safe relative path passes through unchanged. + try testing.expectEqualStrings("src/main.zig", MCP.projectRelPath("src/main.zig", root).?); + + // Security must still hold: + try testing.expect(MCP.projectRelPath("/etc/passwd", root) == null); // outside the root + try testing.expect(MCP.projectRelPath("/Users/dev/project/../secret", root) == null); // escapes via .. + try testing.expect(MCP.projectRelPath("../../../etc/passwd", root) == null); // relative traversal + try testing.expect(MCP.projectRelPath("/Users/dev/projectile/secret", root) == null); // sibling prefix, not a child +} + test "auto-update: shouldRunAutoUpdate gates correctly" { const day_ms: i64 = 24 * 60 * 60 * 1000;