Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 101 additions & 7 deletions libs/openant-core/parsers/zig/call_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,14 +152,20 @@ def build_call_graph(self) -> None:

name_to_ids = self._build_name_index()

# Build per-file simple const fn-alias bindings (`const f = handler;`)
# so that a later `f()` resolves to `handler`.
alias_to_target = self._build_alias_index(name_to_ids)

for func_id, func_info in self.functions.items():
code = func_info.get("code", "")
file_path = func_info.get("file_path", "")

calls = self._find_calls_in_code(code)
calls = self._find_calls_in_code(code, file_path)

for call_name in calls:
resolved_ids = self._resolve_call(call_name, file_path, name_to_ids)
resolved_ids = self._resolve_call(
call_name, file_path, name_to_ids, alias_to_target
)
for resolved_id in resolved_ids:
if resolved_id != func_id: # No self-calls
if resolved_id not in call_graph[func_id]:
Expand Down Expand Up @@ -261,7 +267,59 @@ def _build_name_index(self) -> Dict[str, List[str]]:

return name_to_ids

def _find_calls_in_code(self, code: str) -> Set[str]:
def _build_alias_index(
self, name_to_ids: Dict[str, List[str]]
) -> Dict[str, Dict[str, str]]:
"""Index simple const fn-aliases per file: `const f = handler;` -> {f: handler}.

Only bindings whose right-hand side is a bare identifier naming a known
function are tracked (a genuine fn alias), so arbitrary const dataflow
(`const x = 1;`) is ignored. Scoped per file to avoid cross-file leaks.
"""
alias_to_target: Dict[str, Dict[str, str]] = defaultdict(dict)

for func_info in self.functions.values():
file_path = func_info.get("file_path", "")
code = func_info.get("code", "")
if not code:
continue
try:
tree = self.parser.parse(code.encode("utf-8"))
except Exception:
continue
self._collect_aliases_from_node(
tree.root_node,
code.encode("utf-8"),
name_to_ids,
alias_to_target[file_path],
)

return alias_to_target

def _collect_aliases_from_node(
self,
node: Node,
source: bytes,
name_to_ids: Dict[str, List[str]],
aliases: Dict[str, str],
) -> None:
"""Collect `const <alias> = <known-fn>;` bindings from a parse tree."""
if node.type in ("variable_declaration", "VarDecl"):
ident_children = [
c for c in node.children if c.type in ("identifier", "IDENTIFIER")
]
# A simple alias is exactly: const <alias> = <target-identifier>;
if len(ident_children) == 2:
alias_name = self._get_node_text(ident_children[0], source)
target_name = self._get_node_text(ident_children[1], source)
# Only record when the target is a known function name.
if alias_name and target_name in name_to_ids:
aliases[alias_name] = target_name

for child in node.children:
self._collect_aliases_from_node(child, source, name_to_ids, aliases)

def _find_calls_in_code(self, code: str, caller_file: str = "") -> Set[str]:
"""Find all function calls in a code snippet."""
calls = set()

Expand All @@ -272,11 +330,32 @@ def _find_calls_in_code(self, code: str) -> Set[str]:
# Fallback to regex-based extraction
calls = self._find_calls_with_regex(code)

# Filter out builtins
calls = {c for c in calls if c not in self.ZIG_BUILTINS and not c.startswith("@")}
# Filter out builtins, but NEVER filter a name that a same-file user
# function actually defines. A user fn whose name collides with a
# ZIG_BUILTINS entry (e.g. `expect`) must keep its edge. Scope the
# shadow check to the caller's own file so a builtin call is not
# spuriously linked to an unrelated same-named user fn elsewhere.
shadowing = self._same_file_function_names(caller_file)
calls = {
c
for c in calls
if c in shadowing or (c not in self.ZIG_BUILTINS and not c.startswith("@"))
}

return calls

def _same_file_function_names(self, caller_file: str) -> Set[str]:
"""Names of user functions defined in `caller_file` (same-file scope)."""
if not caller_file:
return set()
names: Set[str] = set()
for func_info in self.functions.values():
if func_info.get("file_path") == caller_file:
name = func_info.get("name", "")
if name:
names.add(name)
return names

def _extract_calls_from_node(
self, node: Node, source: bytes, calls: Set[str]
) -> None:
Expand All @@ -288,9 +367,16 @@ def _extract_calls_from_node(
if callee is not None and callee.type in ("identifier", "IDENTIFIER"):
calls.add(self._get_node_text(callee, source))
elif callee is not None and callee.type in ("field_expression", "field_access"):
# The method name is the trailing identifier child. Prefer that
# over text-splitting, which is brittle when the receiver itself
# contains punctuation (e.g. `C{}.m`).
method_name = None
for sub in callee.children:
if sub.type in ("identifier", "IDENTIFIER"):
method_name = self._get_node_text(sub, source)
text = self._get_node_text(callee, source)
calls.add(text.split(".")[-1]) # trailing member (method / func name)
calls.add(text) # also the full dotted form
calls.add(method_name if method_name else text.split(".")[-1])
calls.add(text) # also the full dotted form
elif node.type == "builtin_function":
# @call(.modifier, realFn, argsTuple): the wrapped function is the real call target;
# other @builtins are filtered out downstream.
Expand Down Expand Up @@ -352,6 +438,7 @@ def _resolve_call(
call_name: str,
caller_file: str,
name_to_ids: Dict[str, List[str]],
alias_to_target: Dict[str, Dict[str, str]] | None = None,
) -> List[str]:
"""
Resolve a call name to function ID(s).
Expand All @@ -361,6 +448,13 @@ def _resolve_call(
2. Imported files
3. Unique name match
"""
# Resolve a same-file const fn-alias (`const f = handler; f()`) to its
# target function name before looking up candidates.
if alias_to_target is not None:
target = alias_to_target.get(caller_file, {}).get(call_name)
if target is not None:
call_name = target

candidates = name_to_ids.get(call_name, [])

if not candidates:
Expand Down
10 changes: 7 additions & 3 deletions libs/openant-core/parsers/zig/function_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ def _extract_struct_from_var_decl(
if child.type in ("identifier", "IDENTIFIER"):
if name is None:
name = self._get_node_text(child, source)
elif child.type == "struct_declaration":
elif child.type in ("struct_declaration", "enum_declaration",
"union_declaration", "opaque_declaration"):
is_struct = True

if name and is_struct:
Expand Down Expand Up @@ -260,8 +261,11 @@ def _classify_function(self, name: str, file_path: str) -> str:
"""Classify the function type based on name and context."""
name_lower = name.lower()

# Test functions
if name_lower.startswith("test") or "_test" in name_lower:
# Test functions. Anchor on the underscore-delimited test convention
# (`test_foo`, `foo_test`, or a bare `test`). A camelCase identifier
# that merely starts with "test" (e.g. `testConnection`) is an ordinary
# function, not a zig `test "..." {}` block.
if name_lower == "test" or name_lower.startswith("test_") or name_lower.endswith("_test"):
return "test"

# Init/constructor patterns
Expand Down
132 changes: 132 additions & 0 deletions libs/openant-core/tests/parsers/zig/test_call_graph_builder_u13.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""Regression tests for the Zig call graph builder (u13).

Three confirmed call-graph recall bugs, each reproduced through the REAL
extractor -> builder pipeline (FunctionExtractor.extract() feeding
CallGraphBuilder(...).build()), asserting the dropped edge is present.

- [BUG 3] local-type dispatch: `const o = Foo{}; o.method()` (and the
direct `Foo{}.method()`) produces no `caller -> method` edge,
because call-name extraction never recognises a tree-sitter
`field_expression` callee, so the method name is never emitted.
- [BUG 17] builtin-filter leak: a user-defined fn whose name collides with
a ZIG_BUILTINS entry (e.g. `expect`) is dropped by the builtin
filter before resolution, even though a same-file user function
of that name exists.
- [BUG 41] const-alias dataflow: `const f = handler; f()` loses the edge to
`handler`, because the name index maps only fn-decl names, never
the simple const alias binding.
"""

import os
import sys
import tempfile
from pathlib import Path

_CORE_ROOT = Path(__file__).resolve().parents[3]
sys.path.insert(0, str(_CORE_ROOT))

from parsers.zig.function_extractor import FunctionExtractor
from parsers.zig.call_graph_builder import CallGraphBuilder


def _run_pipeline(src: str) -> dict:
"""Run the real extractor -> builder pipeline on a single zig source file."""
workdir = tempfile.mkdtemp()
file_path = os.path.join(workdir, "m.zig")
with open(file_path, "w") as fh:
fh.write(src)
scan_results = {"files": [{"path": "m.zig"}]}
extractor_output = FunctionExtractor(workdir, scan_results).extract()
return CallGraphBuilder(extractor_output).build()


def test_bug3_local_type_dispatch_method_call_edge():
"""`const o = C{}; o.m()` must yield an `f -> C.m` call-graph edge.

Note: the target id is the QUALIFIED `m.zig:C.m`. Prior to the u14 [BUG 37]
fix, struct methods were (incorrectly) emitted under their bare name, so this
assertion read `m.zig:m`. The method is now correctly keyed by its qualified
`Container.method` id; the edge itself is unchanged.
"""
src = (
"const C = struct { fn m(self: C) i32 { _ = self; return 1; } };\n"
"fn f() i32 { const o = C{}; return o.m(); }\n"
)
cg = _run_pipeline(src)["call_graph"]
assert "m.zig:C.m" in cg.get("m.zig:f", []), (
f"Expected f -> C.m method-call edge, got call_graph={cg}"
)


def test_bug3_direct_struct_init_method_call_edge():
"""The direct `C{}.m()` form must also yield an `f -> C.m` edge.

See the qualified-id note on test_bug3_local_type_dispatch_method_call_edge.
"""
src = (
"const C = struct { fn m(self: C) i32 { _ = self; return 1; } };\n"
"fn f() i32 { return C{}.m(); }\n"
)
cg = _run_pipeline(src)["call_graph"]
assert "m.zig:C.m" in cg.get("m.zig:f", []), (
f"Expected f -> C.m direct-init method-call edge, got call_graph={cg}"
)


def test_bug17_user_fn_shadowing_builtin_is_not_filtered():
"""A user fn named `expect` (a ZIG_BUILTINS name) must keep its edge."""
src = (
"fn expect(ok: bool) void {\n"
" _ = ok;\n"
"}\n"
"\n"
"fn main() void {\n"
" expect(true);\n"
"}\n"
)
cg = _run_pipeline(src)["call_graph"]
assert "m.zig:expect" in cg.get("m.zig:main", []), (
f"Expected main -> expect edge (user fn shadows builtin), got call_graph={cg}"
)


def test_bug17_genuine_builtin_call_is_still_filtered():
"""Scope guard: a builtin call with NO same-file user fn stays filtered.

`@import` is a genuine builtin and there is no user `@import` function,
so it must not appear as an edge — the fix only un-filters builtins that
are shadowed by a same-file user definition.
"""
src = (
"fn main() void {\n"
" const std = @import(\"std\");\n"
" _ = std;\n"
"}\n"
)
cg = _run_pipeline(src)["call_graph"]
# No user fn named @import / import exists, so main has no resolvable edge.
assert cg.get("m.zig:main", []) == [], (
f"Genuine builtin call should not produce an edge, got call_graph={cg}"
)


def test_bug41_const_alias_call_edge():
"""`const f = handler; f()` must yield a `viaAlias -> handler` edge."""
src = (
"fn handler() void {}\n"
"fn viaAlias() void {\n"
" const f = handler;\n"
" f();\n"
"}\n"
"fn direct() void {\n"
" handler();\n"
"}\n"
)
cg = _run_pipeline(src)["call_graph"]
assert "m.zig:handler" in cg.get("m.zig:viaAlias", []), (
f"Expected viaAlias -> handler alias edge, got call_graph={cg}"
)
# Control: the direct call must keep working too.
assert "m.zig:handler" in cg.get("m.zig:direct", []), (
f"Direct call edge regressed, got call_graph={cg}"
)
Loading
Loading