From e66d5887786bb02d2345a6c60d32e7f5eca10a62 Mon Sep 17 00:00:00 2001 From: Gadi Evron Date: Wed, 17 Jun 2026 13:58:18 +0300 Subject: [PATCH 1/2] fix(zig,php parsers): attribute generic-container & anonymous-class methods correctly Zig: methods inside the generic-container idiom 'fn List(comptime T: type) type { return struct {...} }' were emitted as bare top-level functions (class_name None) and distinct containers' same-named methods collided. Add _returns_type() and thread the container fn name as struct context so they qualify as List.method. PHP: methods inside 'new class {}' anonymous classes had class_name None and collided across distinct anon classes (data loss via overwrite). Add an anonymous_class branch synthesizing a stable class@anonymous:: identity (line+col so multiple anon classes on one line stay distinct). Tests: 5 new across both parsers (qualified-name + collision regressions, incl. same-line). zig+php suites: 73 passed. Independent + judge verified, no regression. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../parsers/php/function_extractor.py | 46 +++++++++++ .../parsers/zig/function_extractor.py | 26 +++++++ .../php/test_function_extractor_anon_class.py | 78 +++++++++++++++++++ ...st_function_extractor_generic_container.py | 65 ++++++++++++++++ 4 files changed, 215 insertions(+) create mode 100644 libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py create mode 100644 libs/openant-core/tests/parsers/zig/test_function_extractor_generic_container.py diff --git a/libs/openant-core/parsers/php/function_extractor.py b/libs/openant-core/parsers/php/function_extractor.py index 582a689..236e06a 100644 --- a/libs/openant-core/parsers/php/function_extractor.py +++ b/libs/openant-core/parsers/php/function_extractor.py @@ -437,6 +437,52 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path, stack.append((child, class_name, new_namespace_name)) continue # Don't walk children again + elif node.type == 'anonymous_class': + # `new class { ... }` (PHP 7+) has no source name. Without a synthetic + # identity its methods fall through the catch-all else with the OUTER + # class_name (None at top level), so they're keyed as bare functions and + # two distinct anonymous classes that both define e.g. handle() collide on + # one id (the later silently overwrites the earlier). Synthesize a stable, + # location-based name so each anonymous class is distinct and its methods + # are qualified (class@anonymous::.method). Line AND column are + # both needed: two `new class {}` on one physical line share a start line, + # so column is what keeps them distinct (else they'd still collide). + anon_name = ( + f"class@anonymous:{node.start_point[0] + 1}:{node.start_point[1]}" + ) + body_node = None + for child in node.children: + if child.type == 'declaration_list': + body_node = child + break + + if body_node: + methods = [] + for child in body_node.children: + if child.type == 'method_declaration': + mname = self._get_function_name(child, source) + if mname: + if self._is_static_method(child, source): + methods.append(f"static:{mname}") + else: + methods.append(mname) + + self.classes[f"{relative_path}:{anon_name}"] = { + 'name': anon_name, + 'file_path': relative_path, + 'start_line': node.start_point[0] + 1, + 'end_line': node.end_point[0] + 1, + 'methods': methods, + 'superclass': None, + 'interfaces': [], + 'namespace_name': namespace_name, + } + self.stats['total_classes'] += 1 + + for child in reversed(body_node.children): + stack.append((child, anon_name, namespace_name)) + continue # Don't walk children again + else: for child in reversed(node.children): stack.append((child, class_name, namespace_name)) diff --git a/libs/openant-core/parsers/zig/function_extractor.py b/libs/openant-core/parsers/zig/function_extractor.py index 7e5c2f5..10b7521 100644 --- a/libs/openant-core/parsers/zig/function_extractor.py +++ b/libs/openant-core/parsers/zig/function_extractor.py @@ -131,6 +131,15 @@ def _walk_node( if func_info: func_id = f"{file_path}:{func_info['qualified_name']}" functions[func_id] = func_info + # Zig's generic-container idiom is a type-returning function: + # `fn List(comptime T: type) type { return struct { fn push() ... }; }`. + # The returned struct is anonymous in the AST (not a `const Name = + # struct {...}` variable_declaration), so without this its methods would + # recurse with current_struct unchanged and be emitted as bare top-level + # functions. Thread the function name as the struct context so they + # qualify as List.push and distinct containers' methods don't collide. + if self._returns_type(node, source): + child_struct = func_info["name"] elif node.type == "variable_declaration": # `const Foo = struct { ... };` -- a named struct/enum definition. @@ -206,6 +215,23 @@ def _extract_function( "unit_type": unit_type, } + def _returns_type(self, node: Node, source: bytes) -> bool: + """True if a function_declaration's return type is the builtin `type` — Zig's + generic-container idiom (`fn Foo(...) type { return struct {...} }`). + + The return type is the function_declaration's direct child that follows the + `parameters` node (a `builtin_type`). This deliberately inspects only direct + children, so the `type` inside a `comptime T: type` parameter (nested under + `parameters`) is not mistaken for the return type. + """ + seen_params = False + for child in node.children: + if child.type in ("parameters", "ParamDeclList"): + seen_params = True + elif seen_params and child.type == "builtin_type": + return self._get_node_text(child, source).strip() == "type" + return False + def _extract_parameters(self, node: Node, source: bytes) -> List[str]: """Extract parameter names from a parameter list node.""" params = [] diff --git a/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py b/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py new file mode 100644 index 0000000..a4db9e4 --- /dev/null +++ b/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py @@ -0,0 +1,78 @@ +"""Regression test for the PHP anonymous-class method-attribution bug. + +`new class { ... }` (PHP 7+) produces a tree-sitter `anonymous_class` node, which had +no handler in _extract_functions_from_tree and fell through the catch-all `else` — so +its methods were emitted with class_name=None (bare top-level functions). Two distinct +anonymous classes that both define e.g. handle() then collided on one unit id and the +later silently overwrote the earlier (data loss). + +Driven through the REAL extractor (FunctionExtractor.extract_all) on a temp .php file. +""" + +import os +import sys +import tempfile +from pathlib import Path + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +from parsers.php.function_extractor import FunctionExtractor + + +def _extract(php_source: str, filename: str = "anon.php") -> dict: + repo = tempfile.mkdtemp() + with open(os.path.join(repo, filename), "w") as fh: + fh.write(php_source) + return FunctionExtractor(repo).extract_all([filename]) + + +def test_anon_class_method_attributed_to_synthetic_class(): + src = ( + " dict: + workdir = tempfile.mkdtemp() + with open(os.path.join(workdir, "m.zig"), "w") as fh: + fh.write(src) + return FunctionExtractor(workdir, {"files": [{"path": "m.zig"}]}).extract() + + +def test_generic_container_method_qualified_to_container(): + src = ( + "pub fn List(comptime T: type) type {\n" + " return struct {\n" + " pub fn push(self: *@This(), x: T) void { _ = self; _ = x; }\n" + " };\n" + "}\n" + "fn ordinary() void {}\n" + ) + out = _extract(src) + funcs = out["functions"] + assert "m.zig:List.push" in funcs, f"List.push missing; keys = {sorted(funcs)}" + info = funcs["m.zig:List.push"] + assert info["class_name"] == "List" + assert info["qualified_name"] == "List.push" + assert info["unit_type"] == "method" + # The method must NOT leak as a bare top-level function. + assert "m.zig:push" not in funcs, f"unqualified push leaked: {sorted(funcs)}" + # The plain function is unaffected. + assert "m.zig:ordinary" in funcs, sorted(funcs) + + +def test_two_generic_containers_methods_no_collision(): + src = ( + "pub fn List(comptime T: type) type {\n" + " return struct { pub fn len(self: *@This()) usize { _ = self; return 0; } };\n" + "}\n" + "pub fn Ring(comptime T: type) type {\n" + " return struct { pub fn len(self: *@This()) usize { _ = self; return 1; } };\n" + "}\n" + ) + funcs = _extract(src)["functions"] + assert "m.zig:List.len" in funcs, f"keys = {sorted(funcs)}" + assert "m.zig:Ring.len" in funcs, f"silent collision/data-loss; keys = {sorted(funcs)}" From 4058a1c3c7d54cd95db201987864cd464d9e464c Mon Sep 17 00:00:00 2001 From: Gadi Evron Date: Wed, 17 Jun 2026 15:26:59 +0300 Subject: [PATCH 2/2] docs(zig,php parsers): surface grammar-alignment prerequisite in regression tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the cross-PR dependency self-documenting so a human reviewer OR an agent running the suite on the wrong base sees WHY, not a cryptic failure: - zig test: skipif-guard that probes the PREREQUISITE behavior (a named struct's method extracting — provided by the tree-sitter-zig>=1.1.2 grammar-alignment PRs, not by this fix). On a stale-grammar base the tests skip with an explanatory message instead of failing. Non-circular: the probe uses a named struct, the tests cover the distinct generic-container case. - php test: module-docstring note stating the dependency on the reworked _extract_functions_from_tree traversal. This change is NOT landable on raw upstream master; see Depends-on: PR-87/PR-110/322920e in the PR description. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../php/test_function_extractor_anon_class.py | 6 ++++ ...st_function_extractor_generic_container.py | 30 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py b/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py index a4db9e4..5fc1418 100644 --- a/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py +++ b/libs/openant-core/tests/parsers/php/test_function_extractor_anon_class.py @@ -7,6 +7,12 @@ later silently overwrote the earlier (data loss). Driven through the REAL extractor (FunctionExtractor.extract_all) on a temp .php file. + +DEPENDENCY (human reviewers + agents): this fix assumes the reworked +`_extract_functions_from_tree` traversal added by upstream PR #111 (PHP parser). On raw +`master` the PHP extractor has a materially different shape and these tests fail — this +change is NOT landable on master standalone. Depends-on: #111. Base this on +staging/parser-fix-stack (which already contains #111) to run it green. """ import os diff --git a/libs/openant-core/tests/parsers/zig/test_function_extractor_generic_container.py b/libs/openant-core/tests/parsers/zig/test_function_extractor_generic_container.py index 8affd12..372c775 100644 --- a/libs/openant-core/tests/parsers/zig/test_function_extractor_generic_container.py +++ b/libs/openant-core/tests/parsers/zig/test_function_extractor_generic_container.py @@ -16,6 +16,8 @@ import tempfile from pathlib import Path +import pytest + _CORE_ROOT = Path(__file__).resolve().parents[3] sys.path.insert(0, str(_CORE_ROOT)) @@ -29,6 +31,34 @@ def _extract(src: str) -> dict: return FunctionExtractor(workdir, {"files": [{"path": "m.zig"}]}).extract() +def _zig_parser_is_grammar_aligned() -> bool: + """Probe the PREREQUISITE behavior (not this fix's): does a *named* struct's method + extract as Container.method? That capability is provided by the tree-sitter-zig + grammar-alignment work (>=1.1.2 node names struct_declaration/variable_declaration; + PRs 87/110, commit 322920e), independent of the generic-container fix under test. + On a base whose parser still matches stale node names (VarDecl/container_decl), no + struct methods extract at all, so these tests cannot pass for reasons unrelated to + the fix.""" + probe = "const _Probe = struct {\n pub fn _m(self: _Probe) void { _ = self; }\n};\n" + return "m.zig:_Probe._m" in _extract(probe)["functions"] + + +# Skip (not fail) with an explanatory message when run on a base that lacks the +# grammar-alignment prerequisite — so a human or agent running this on raw master sees +# *why* instead of a cryptic assertion failure. Supported base: staging/parser-fix-stack, +# which carries upstream PR #110 (Zig parser realignment) AND the tree-sitter-zig>=1.1.2 +# grammar pin. This is NOT landable on master standalone. +pytestmark = pytest.mark.skipif( + not _zig_parser_is_grammar_aligned(), + reason=( + "Zig parser not grammar-aligned (needs tree-sitter-zig>=1.1.2 node names " + "struct_declaration/variable_declaration, from upstream PR #110 + the grammar " + "pin). On such a base no struct methods extract, so the generic-container fix " + "cannot pass. Supported base: staging/parser-fix-stack — not landable on master." + ), +) + + def test_generic_container_method_qualified_to_container(): src = ( "pub fn List(comptime T: type) type {\n"