Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ the query hot path.

Beyond the graph, this cut also lands a **warm CLI daemon** (near-MCP latency
from the plain `codedb` CLI), a **faster fuzzy `find`**, **hardened CLI** parsing
and exit codes (#529), and **ReScript** `.res`/`.resi` support (#532).
and exit codes (#529), **ReScript** `.res`/`.resi` support (#532), and **OCaml** `.ml`/`.mli` support.

### Graph-aware ranking (call-graph centrality)

Expand Down
202 changes: 202 additions & 0 deletions src/explore.zig
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ pub const Language = enum(u8) {
mlir,
tablegen,
rescript,
ocaml,
};

pub fn detectLanguage(path: []const u8) Language {
Expand Down Expand Up @@ -232,6 +233,7 @@ pub fn detectLanguage(path: []const u8) Language {
if (std.mem.endsWith(u8, path, ".mlir")) return .mlir;
if (std.mem.endsWith(u8, path, ".td")) return .tablegen;
if (std.mem.endsWith(u8, path, ".res") or std.mem.endsWith(u8, path, ".resi")) return .rescript;
if (std.mem.endsWith(u8, path, ".ml") or std.mem.endsWith(u8, path, ".mli")) return .ocaml;
return .unknown;
}

Expand Down Expand Up @@ -1838,6 +1840,7 @@ pub const Explorer = struct {
var php_state: PhpParseState = .{};
var in_py_docstring = false;
var in_block_comment = false;
var ocaml_comment_depth: u32 = 0;
var in_go_import_block = false;
var c_brace_depth: u32 = 0;
var lines = std.mem.splitScalar(u8, content, '\n');
Expand Down Expand Up @@ -1908,6 +1911,82 @@ pub const Explorer = struct {
}
}

// OCaml nested (* ... *) block comments. Comment delimiters are ignored
// inside string and char literals so that `let pattern = "(*"` does not
// throw the scanner out of sync (PR review).
if (outline.language == .ocaml) {
if (ocaml_comment_depth > 0) {
// We're inside a multi-line comment — scan for closers
var i: usize = 0;
var in_string: u8 = 0;
while (i + 1 < trimmed.len) : (i += 1) {
const c = trimmed[i];
if (in_string != 0) {
if (c == '\\' and i + 1 < trimmed.len) {
i += 1;
} else if (c == in_string) {
in_string = 0;
}
continue;
}
if (c == '"' or c == '\'') {
in_string = c;
continue;
}
if (std.mem.startsWith(u8, trimmed[i..], "(*")) {
ocaml_comment_depth += 1;
i += 1;
} else if (std.mem.startsWith(u8, trimmed[i..], "*)")) {
if (ocaml_comment_depth > 0) ocaml_comment_depth -= 1;
i += 1;
}
}
if (ocaml_comment_depth > 0) continue;
// Depth reached zero mid-line — keep text after the last *)
// Find the last *) in the line
if (std.mem.lastIndexOf(u8, trimmed, "*)")) |close_pos| {
const after = std.mem.trimStart(u8, trimmed[close_pos + 2 ..], " \t");
if (after.len == 0) continue;
trimmed = after;
} else continue;
} else {
// Not currently in a comment — scan for openers
var i: usize = 0;
var in_string: u8 = 0;
while (i + 1 < trimmed.len) : (i += 1) {
const c = trimmed[i];
if (in_string != 0) {
if (c == '\\' and i + 1 < trimmed.len) {
i += 1;
} else if (c == in_string) {
in_string = 0;
}
continue;
}
if (c == '"' or c == '\'') {
in_string = c;
continue;
}
if (std.mem.startsWith(u8, trimmed[i..], "(*")) {
ocaml_comment_depth += 1;
Comment on lines +1970 to +1971

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Skip OCaml comment markers inside strings

For valid OCaml such as let pattern = "(*", this scan treats the string contents as a real comment opener, leaves ocaml_comment_depth nonzero, and then skips all subsequent definitions until a later *). OCaml comment delimiters are ignored inside string/char literals, so files containing these characters in strings lose most of their outline; the comment scanner should ignore quoted literals before updating the depth.

Useful? React with 👍 / 👎.

i += 1;
} else if (std.mem.startsWith(u8, trimmed[i..], "*)")) {
if (ocaml_comment_depth > 0) ocaml_comment_depth -= 1;
i += 1;
}
}
if (ocaml_comment_depth > 0) {
// Comment opened but not closed on this line.
// Check if there's material before the first (*.
if (std.mem.indexOf(u8, trimmed, "(*")) |open_pos| {
const before = std.mem.trimEnd(u8, trimmed[0..open_pos], " \t");
if (before.len == 0) continue;
trimmed = before;
} else continue;
}
}
}

if (outline.language == .zig) {
try parser.parseZigLine(trimmed, line_num, &outline);
} else if (outline.language == .python) {
Expand Down Expand Up @@ -1967,6 +2046,8 @@ pub const Explorer = struct {
try parser.parseTableGenLine(trimmed, line_num, &outline);
} else if (outline.language == .rescript) {
try parser.parseRescriptLine(trimmed, line_num, &outline);
} else if (outline.language == .ocaml) {
try parser.parseOcamlLine(trimmed, line_num, &outline);
}

prev_line_trimmed = trimmed;
Expand Down Expand Up @@ -6489,6 +6570,87 @@ pub const Explorer = struct {
try appendOutlineSymbol(a, outline, name, kind, line_num, line);
}

/// OCaml (.ml / .mli).
/// `open`/`include` → import; `module` → struct_def, `module type` → interface_def;
/// `type` → type_alias; `external` → function; `exception` → enum_def;
/// `val` → function (.mli value signature); `let`/`and` → function (→ present in line
/// or RHS starts with `fun `/`function`) else constant.
/// Leading attributes ([@@...], [@...], [@@@...]) are stripped first.
fn parseOcamlLine(self: *Explorer, line: []const u8, line_num: u32, outline: *FileOutline) !void {
const a = self.allocator;
const code = stripLeadingOcamlAttributes(line);
if (code.len == 0) return;

if (startsWith(code, "open ")) {
const name = ocamlModulePath(std.mem.trimStart(u8, code[5..], " \t"));
if (name.len > 0) {
try appendImportPath(a, outline, name);
try appendOutlineSymbol(a, outline, name, .import, line_num, null);
}
return;
}
if (startsWith(code, "include ")) {
const name = ocamlModulePath(std.mem.trimStart(u8, code[8..], " \t"));
if (name.len > 0) {
try appendImportPath(a, outline, name);
try appendOutlineSymbol(a, outline, name, .import, line_num, null);
}
return;
}
if (startsWith(code, "module ")) {
var rest = std.mem.trimStart(u8, code[7..], " \t");
var kind: SymbolKind = .struct_def;
if (startsWith(rest, "type ")) {
kind = .interface_def;
rest = std.mem.trimStart(u8, rest[5..], " \t");
}
const name = ocamlIdent(rest);
if (name.len > 0) try appendOutlineSymbol(a, outline, name, kind, line_num, line);
return;
}
if (startsWith(code, "type ")) {
var rest = std.mem.trimStart(u8, code[5..], " \t");
if (startsWith(rest, "rec ")) rest = std.mem.trimStart(u8, rest[4..], " \t");
const name = ocamlIdent(rest);
if (name.len > 0) try appendOutlineSymbol(a, outline, name, .type_alias, line_num, line);
return;
}
if (startsWith(code, "external ")) {
const name = ocamlIdent(std.mem.trimStart(u8, code[9..], " \t"));
if (name.len > 0) try appendOutlineSymbol(a, outline, name, .function, line_num, line);
return;
}
if (startsWith(code, "exception ")) {
const name = ocamlIdent(std.mem.trimStart(u8, code[10..], " \t"));
if (name.len > 0) try appendOutlineSymbol(a, outline, name, .enum_def, line_num, line);
return;
}
if (startsWith(code, "val ")) {
// .mli value signature
const name = ocamlIdent(std.mem.trimStart(u8, code[4..], " \t"));
if (name.len > 0) try appendOutlineSymbol(a, outline, name, .function, line_num, line);
return;
}
var rest: []const u8 = undefined;
if (startsWith(code, "let ")) {
rest = std.mem.trimStart(u8, code[4..], " \t");
if (startsWith(rest, "rec ")) rest = std.mem.trimStart(u8, rest[4..], " \t");
} else if (startsWith(code, "and ")) {
rest = std.mem.trimStart(u8, code[4..], " \t");
} else return;
const name = ocamlIdent(rest);
if (name.len == 0) return;
// Heuristic: if the line contains ->, or the RHS after = begins with fun / function, treat as function.
const kind: SymbolKind = if (std.mem.indexOf(u8, code, "->") != null) .function else blk: {
if (std.mem.indexOfScalar(u8, code, '=')) |eq_pos| {
const rhs = std.mem.trimStart(u8, code[eq_pos + 1 ..], " \t");
if (startsWith(rhs, "fun ") or startsWith(rhs, "function")) break :blk .function;
}
break :blk .constant;
Comment on lines +6644 to +6649

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Classify curried OCaml lets as functions

In OCaml, the common function form is let make name age = ... or let rec fib n = ..., but this heuristic only marks bindings as functions when the line contains -> or the RHS starts with fun/function. As a result, most ordinary OCaml functions are indexed as constants, which makes the new language support report misleading symbol kinds for normal .ml files; after parsing the binding name, check whether another identifier/pattern precedes the = before falling back to .constant.

Useful? React with 👍 / 👎.

};
try appendOutlineSymbol(a, outline, name, kind, line_num, line);
}

fn rebuildDepsFor(self: *Explorer, path: []const u8, outline: *FileOutline) !void {
var deps: std.ArrayList([]const u8) = .empty;
errdefer deps.deinit(self.allocator);
Expand Down Expand Up @@ -6910,6 +7072,7 @@ pub fn isCommentOrBlank(line: []const u8, language: Language) bool {
.fortran => std.mem.startsWith(u8, trimmed, "!"),
.llvm_ir => std.mem.startsWith(u8, trimmed, ";"),
.rescript => std.mem.startsWith(u8, trimmed, "//") or std.mem.startsWith(u8, trimmed, "/*") or std.mem.startsWith(u8, trimmed, "*"),
.ocaml => std.mem.startsWith(u8, trimmed, "(*"),
else => false,
};
}
Expand Down Expand Up @@ -7383,6 +7546,45 @@ fn stripLeadingResDecorators(line: []const u8) []const u8 {
return s;
}

inline fn ocamlIsIdentStart(c: u8) bool {
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
}
inline fn ocamlIsIdentChar(c: u8) bool {
return ocamlIsIdentStart(c) or (c >= '0' and c <= '9') or c == '\'';
}
/// Leading OCaml identifier (letter/_/digit/prime), or "" if none.
fn ocamlIdent(s: []const u8) []const u8 {
if (s.len == 0 or !ocamlIsIdentStart(s[0])) return "";
var i: usize = 1;
while (i < s.len and ocamlIsIdentChar(s[i])) i += 1;
return s[0..i];
}
/// Leading dotted module path (Foo.Bar.Baz), trailing dots trimmed, or "".
fn ocamlModulePath(s: []const u8) []const u8 {
if (s.len == 0 or !ocamlIsIdentStart(s[0])) return "";
var i: usize = 1;
while (i < s.len and (ocamlIsIdentChar(s[i]) or s[i] == '.')) i += 1;
var end = i;
while (end > 0 and s[end - 1] == '.') end -= 1;
return s[0..end];
}
/// Strip leading OCaml attributes ([@@...], [@...], [@@@...]) and trailing space.
fn stripLeadingOcamlAttributes(line: []const u8) []const u8 {
var s = std.mem.trimStart(u8, line, " \t");
while (s.len > 0 and s[0] == '[') {
// Check for attribute forms: [@@, [@@@, [@ (with at least one @ after [)
const after_bracket = s[1..];
if (after_bracket.len == 0) break;
if (after_bracket[0] != '@') break;
// Walk past the closing ]
var i: usize = 2;
while (i < s.len and s[i] != ']') i += 1;
if (i >= s.len) break; // unclosed — bail out
s = std.mem.trimStart(u8, s[i + 1 ..], " \t");
}
return s;
}

fn appendImportSymbol(
allocator: std.mem.Allocator,
outline: *FileOutline,
Expand Down
67 changes: 67 additions & 0 deletions src/test_parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1872,6 +1872,73 @@ test "issue-532: ReScript parser" {
try expectOutlineImport(&outline, "Belt");
}

test "ocaml: parser" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);

try explorer.indexFile("src/User.ml",
\\open Base
\\
\\module User = struct
\\ type t = { name : string; age : int }
\\end
\\
\\type status = Active | Inactive
\\
\\let make name age = { name; age }
\\
\\let rec fib n = if n < 2 then n else fib (n - 1) + fib (n - 2)
\\
\\external get_env : string -> string = "c_get_env"
\\
\\include Core
);

var outline = (try explorer.getOutline("src/User.ml", testing.allocator)) orelse return error.TestUnexpectedResult;
defer outline.deinit();

try testing.expectEqualStrings("ocaml", @tagName(outline.language));

try expectOutlineImport(&outline, "Base");
try expectOutlineSymbol(&outline, "User", .struct_def);
try expectOutlineSymbol(&outline, "t", .type_alias);
try expectOutlineSymbol(&outline, "status", .type_alias);
// make: no ->, RHS = { name; age } → constant
try expectOutlineSymbol(&outline, "make", .constant);
// fib: no ->, RHS = if ... → constant (line-based heuristic)
try expectOutlineSymbol(&outline, "fib", .constant);
// get_env: line contains -> → function
try expectOutlineSymbol(&outline, "get_env", .function);
try expectOutlineImport(&outline, "Core");
}

test "ocaml: comment delimiters inside strings are ignored" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
var explorer = Explorer.init(arena.allocator(), Explorer.DEFAULT_CONTENT_CACHE_CAPACITY);

// "(*" inside a string must NOT start a comment — definitions after it
// must still be parsed.
try explorer.indexFile("src/str_test.ml",
\\let pattern = "(* not a comment *)"
\\
\\let greet name = "hello " ^ name
\\
\\(* real comment *)
\\
\\let x = 42
);

var outline = (try explorer.getOutline("src/str_test.ml", testing.allocator)) orelse return error.TestUnexpectedResult;
defer outline.deinit();

try testing.expectEqualStrings("ocaml", @tagName(outline.language));
// "(*" inside a string must not swallow subsequent definitions
try expectOutlineSymbol(&outline, "pattern", .constant);
try expectOutlineSymbol(&outline, "greet", .constant);
try expectOutlineSymbol(&outline, "x", .constant);
}

// ─── audit (2026-06-09): latent-issue sweep — parser/deps fixes ───

Expand Down