diff --git a/libs/openant-core/parsers/php/call_graph_builder.py b/libs/openant-core/parsers/php/call_graph_builder.py index 4354631..4e3184c 100644 --- a/libs/openant-core/parsers/php/call_graph_builder.py +++ b/libs/openant-core/parsers/php/call_graph_builder.py @@ -174,19 +174,27 @@ def _extract_calls_from_code(self, code: str, caller_id: str) -> Set[str]: caller_class = caller_func.get('class_name') caller_namespace = caller_func.get('namespace_name') + # The extractor stores each function/method body as a raw PHP fragment + # WITHOUT a leading " Set[str]: def _resolve_call_node(self, node, source: bytes, caller_file: str, caller_class: Optional[str], - caller_namespace: Optional[str] = None) -> Optional[str]: + caller_namespace: Optional[str] = None, + root=None) -> Optional[str]: """Resolve a tree-sitter call node to a function ID.""" if node.type == 'function_call_expression': return self._resolve_function_call(node, source, caller_file, caller_class, - caller_namespace) + caller_namespace, root) elif node.type == 'member_call_expression': return self._resolve_member_call(node, source, caller_file, caller_class) elif node.type == 'scoped_call_expression': @@ -210,7 +219,8 @@ def _resolve_call_node(self, node, source: bytes, caller_file: str, def _resolve_function_call(self, node, source: bytes, caller_file: str, caller_class: Optional[str], - caller_namespace: Optional[str] = None) -> Optional[str]: + caller_namespace: Optional[str] = None, + root=None) -> Optional[str]: """Resolve a simple function call like func().""" func_name = None @@ -224,6 +234,12 @@ def _resolve_function_call(self, node, source: bytes, caller_file: str, if '\\' in func_name: func_name = func_name.rsplit('\\', 1)[-1] break + elif child.type == 'variable_name': + # Variable-function call like $f(). Follow a single + # string-literal binding ($f = 'helper';) to recover the name. + var_name = source[child.start_byte:child.end_byte].decode('utf-8', errors='replace') + func_name = self._resolve_variable_function(var_name, root, source) + break if not func_name: return None @@ -292,6 +308,56 @@ def _resolve_new(self, node, source: bytes, caller_file: str, return None return self._resolve_class_call(class_name, '__construct', caller_file) + def _resolve_variable_function(self, var_name: str, root, + source: bytes) -> Optional[str]: + """Follow a single string-literal binding for a $var() callee. + + Scans the enclosing function body for assignments to ``var_name``. + Only a single, unambiguous string-literal binding + (``$f = 'helper';``) is followed; if the variable is assigned more + than once, or from a non-literal, resolution is declined for + precision (no guessing). + """ + if root is None: + return None + literal_names: Set[str] = set() + non_literal = False + + stack = [root] + while stack: + n = stack.pop() + if n.type == 'assignment_expression': + children = [c for c in n.children if c.type not in ('=',)] + # Shape: = + if len(children) >= 2 and children[0].type == 'variable_name': + lhs = source[children[0].start_byte:children[0].end_byte].decode( + 'utf-8', errors='replace') + if lhs == var_name: + rhs = children[1] + literal = self._string_literal_value(rhs, source) + if literal is not None: + literal_names.add(literal) + else: + non_literal = True + stack.extend(n.children) + + # Single unambiguous string binding only. + if non_literal or len(literal_names) != 1: + return None + return next(iter(literal_names)) + + @staticmethod + def _string_literal_value(node, source: bytes) -> Optional[str]: + """Return the content of a string-literal node, else None.""" + if node.type != 'string': + return None + for child in node.children: + if child.type == 'string_content': + return source[child.start_byte:child.end_byte].decode( + 'utf-8', errors='replace') + # Empty string literal ('') has no string_content child. + return '' + def _resolve_member_call(self, node, source: bytes, caller_file: str, caller_class: Optional[str]) -> Optional[str]: """Resolve a member call like $obj->method().""" @@ -348,27 +414,18 @@ def _resolve_scoped_call(self, node, source: bytes, caller_file: str, if scope in ('self', 'static') and caller_class: return self._resolve_self_call(method_name, caller_file, caller_class) - # parent::method() - resolve in the superclass (extends), not the caller's own class. + # parent::method() - the method is inherited from the parent class, + # which may be defined in a different file. Resolve via the + # class->parent index, then a cross-file class-method lookup. if scope == 'parent' and caller_class: - superclass = self._superclass_of(caller_file, caller_class) - if not superclass: - return None - if '\\' in superclass: - superclass = superclass.rsplit('\\', 1)[-1] # `extends App\Base` -> match `Base` - # Resolve in the superclass only; do NOT fall back to the caller's own class, which would - # mis-link an overriding child's parent:: call to the child's own method. - return self._resolve_class_call(superclass, method_name, caller_file) + parent_class = self._resolve_parent_class(caller_file, caller_class) + if parent_class: + return self._resolve_class_call(parent_class, method_name, caller_file) + return None # ClassName::method() return self._resolve_class_call(scope, method_name, caller_file) - def _superclass_of(self, caller_file: str, caller_class: str) -> Optional[str]: - """Return the superclass (extends) name of caller_class defined in caller_file, or None.""" - for class_data in self.classes.values(): - if class_data.get('name') == caller_class and class_data.get('file_path') == caller_file: - return class_data.get('superclass') - return None - def _resolve_simple_call(self, func_name: str, caller_file: str, caller_class: Optional[str], caller_namespace: Optional[str] = None) -> Optional[str]: @@ -461,6 +518,33 @@ def _resolve_class_call(self, class_name: str, method_name: str, return None + def _resolve_parent_class(self, caller_file: str, + caller_class: str) -> Optional[str]: + """Return the parent (superclass) name of caller_class, if known. + + The class index records each class's ``superclass`` (the ``extends`` + target). The parent class may be defined in a different file, so a + same-file lookup is tried first, then any file declaring the class. + """ + # Same-file class declaration first (most precise). + class_data = self.classes.get(f"{caller_file}:{caller_class}") + if class_data and class_data.get('superclass'): + return self._strip_namespace(class_data['superclass']) + + # Fall back to any class with this name across files. + for key, data in self.classes.items(): + if key.endswith(f":{caller_class}") and data.get('superclass'): + return self._strip_namespace(data['superclass']) + + return None + + @staticmethod + def _strip_namespace(name: str) -> str: + """Reduce a possibly namespace-qualified class name to its last segment.""" + if '\\' in name: + return name.rsplit('\\', 1)[-1] + return name + def _extract_calls_regex(self, code: str, caller_id: str) -> Set[str]: """Fallback regex-based call extraction for unparseable code.""" calls = set() diff --git a/libs/openant-core/parsers/php/function_extractor.py b/libs/openant-core/parsers/php/function_extractor.py index 582a689..3d3cc65 100644 --- a/libs/openant-core/parsers/php/function_extractor.py +++ b/libs/openant-core/parsers/php/function_extractor.py @@ -420,6 +420,50 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path, stack.append((child, new_trait_name, namespace_name)) continue + elif node.type == 'enum_declaration': + # PHP 8.1+ enums are class-like callable containers; register + # the enum as a class so its methods get qualified ids + # (Enum.method) and class context, mirroring trait/interface. + name_node = node.child_by_field_name('name') + new_enum_name = self._node_text(name_node, source) if name_node else None + + if new_enum_name: + class_id = f"{relative_path}:{new_enum_name}" + methods = [] + body_node = node.child_by_field_name('body') + if body_node is None: + for child in node.children: + if child.type == 'enum_declaration_list': + body_node = child + break + + if body_node: + for child in body_node.children: + if child.type == 'method_declaration': + mname = self._get_function_name(child, source) + if mname: + if self._is_static_method(child, source): + methods.append(f"static:{mname}") + else: + methods.append(mname) + + self.classes[class_id] = { + 'name': new_enum_name, + 'file_path': relative_path, + 'start_line': node.start_point[0] + 1, + 'end_line': node.end_point[0] + 1, + 'methods': methods, + 'superclass': None, + 'interfaces': [], + 'namespace_name': namespace_name, + } + self.stats['total_classes'] += 1 + + if body_node: + for child in reversed(body_node.children): + stack.append((child, new_enum_name, namespace_name)) + continue + elif node.type == 'namespace_definition': # Extract namespace name name_node = node.child_by_field_name('name') @@ -428,18 +472,38 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path, # Recurse into namespace body body_node = node.child_by_field_name('body') if body_node is None: - # Namespace without braces covers rest of file; recurse children directly - for child in reversed(node.children): - if child.type not in ('namespace', 'name', ';'): - stack.append((child, class_name, new_namespace_name)) + # Braceless 'namespace App\Svc;': the declarations it covers + # are SIBLINGS of this node, not children -- they are pushed + # by the container's traversal (see _push_children, which + # carries the braceless namespace forward to siblings). The + # node's own children (namespace/name/;) have nothing to + # extract, so there is nothing to recurse here. + pass else: for child in reversed(body_node.children): stack.append((child, class_name, new_namespace_name)) continue # Don't walk children again else: - for child in reversed(node.children): - stack.append((child, class_name, namespace_name)) + self._push_children(stack, node.children, source, class_name, namespace_name) + + def _push_children(self, stack, children, source: bytes, + class_name: Optional[str], namespace_name: Optional[str]) -> None: + """Push a node's children onto the traversal stack, honoring a braceless + ``namespace App\\X;`` declaration: such a node has no body, and the + declarations it governs are its FOLLOWING SIBLINGS (until the next + namespace declaration). Propagate that namespace to those siblings.""" + current_ns = namespace_name + ns_overrides = {} + for child in children: + if child.type == 'namespace_definition' and child.child_by_field_name('body') is None: + name_node = child.child_by_field_name('name') + if name_node is not None: + current_ns = self._node_text(name_node, source) + continue + ns_overrides[id(child)] = current_ns + for child in reversed(children): + stack.append((child, class_name, ns_overrides.get(id(child), namespace_name))) def _process_function_node(self, node, source: bytes, relative_path: str, class_name: Optional[str], namespace_name: Optional[str], @@ -450,7 +514,16 @@ def _process_function_node(self, node, source: bytes, relative_path: str, return code = self._node_text(node, source) - start_line = node.start_point[0] + 1 # tree-sitter is 0-indexed + # tree-sitter is 0-indexed. The method_declaration node spans any + # leading PHP8 attribute_list (e.g. #[Route(...)]), so node.start_point + # would point at the attribute line. Anchor start_line at the actual + # declaration (first non-attribute child) instead. + decl_node = node + for child in node.children: + if child.type != 'attribute_list': + decl_node = child + break + start_line = decl_node.start_point[0] + 1 end_line = node.end_point[0] + 1 parameters = self._get_parameters(node, source) diff --git a/libs/openant-core/parsers/php/unit_generator.py b/libs/openant-core/parsers/php/unit_generator.py index 63f9fff..c787f71 100644 --- a/libs/openant-core/parsers/php/unit_generator.py +++ b/libs/openant-core/parsers/php/unit_generator.py @@ -160,7 +160,11 @@ def create_unit(self, func_id: str, func_data: Dict) -> Dict: file_path = func_data.get('file_path', '') func_name = func_data.get('name', '') class_name = func_data.get('class_name') - namespace = func_data.get('namespace') + # The extractor (function_extractor.py) writes the declared namespace + # under 'namespace_name'; read that canonical key so it reaches the + # unit instead of always being None (key-drift bug). + namespace = func_data.get('namespace_name') + is_static = func_data.get('is_static', False) unit_type = func_data.get('unit_type', 'function') # Get upstream dependencies (functions this calls) @@ -239,6 +243,7 @@ def create_unit(self, func_id: str, func_data: Dict) -> Dict: 'metadata': { 'visibility': func_data.get('visibility', 'public'), 'namespace': namespace, + 'is_static': is_static, 'parameters': func_data.get('parameters', []), 'generator': 'php_unit_generator.py', 'direct_calls': direct_calls, @@ -307,7 +312,8 @@ def generate_analyzer_output(self) -> Dict: 'endLine': func_data.get('end_line', 0), 'visibility': func_data.get('visibility', 'public'), 'isExported': True, # PHP doesn't have explicit exports - 'namespace': func_data.get('namespace'), + 'namespace': func_data.get('namespace_name'), + 'isStatic': func_data.get('is_static', False), 'parameters': func_data.get('parameters', []), 'className': func_data.get('class_name'), } diff --git a/libs/openant-core/tests/parsers/php/test_call_graph_builder_u5.py b/libs/openant-core/tests/parsers/php/test_call_graph_builder_u5.py new file mode 100644 index 0000000..0da7342 --- /dev/null +++ b/libs/openant-core/tests/parsers/php/test_call_graph_builder_u5.py @@ -0,0 +1,166 @@ +"""Regression tests for two confirmed PHP CallGraphBuilder edge-resolution bugs. + +Both drive the FULL real pipeline (FunctionExtractor -> CallGraphBuilder) on +real PHP source and assert that the expected call-graph *edge* is present, +matching the way the bugs were reproduced on the live parser. + +[BUG 20] variable-function dataflow loss + ``$f = 'helper'; $f();`` -- the call node is a ``function_call_expression`` + whose callee child is a ``variable_name`` (``$f``), not a ``name`` / + ``qualified_name``. ``_resolve_function_call`` only matched ``name`` / + ``identifier`` / ``qualified_name`` callees, so ``func_name`` stayed None + and the call was dropped -- the edge caller -> helper was never emitted. + Fix: when the callee is a ``variable_name``, follow a single unconditional + string-literal binding (``$f = 'helper';``) in the caller's body and resolve + the bound name. + +[BUG 52] parent:: scoped-call dispatch + ``parent::greet()`` -- the scope child is a ``relative_scope`` node (text + ``parent``), not a ``name`` / ``qualified_name``. ``_resolve_scoped_call`` + only populated ``scope`` from ``name`` / ``qualified_name``, so both + ``scope`` and ``method_name`` stayed None and the call was dropped. The + pre-existing ``parent`` branch was unreachable dead code. ``parent`` + additionally cannot be resolved by the same-class self-call resolver: the + method lives on the PARENT class, possibly in another file. Fix: read the + ``relative_scope``; for ``parent`` look up the caller class's ``superclass`` + (class->parent index) and resolve the method on that parent class via the + cross-file class-method resolver. +""" + +import sys +import tempfile +from pathlib import Path + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +from parsers.php.function_extractor import FunctionExtractor +from parsers.php.call_graph_builder import CallGraphBuilder + + +def _build_graph(files: dict) -> dict: + """Run the real PHP pipeline on {filename: source}; return the call_graph.""" + repo = tempfile.mkdtemp() + for name, src in files.items(): + Path(repo, name).write_text(src) + + extractor = FunctionExtractor(repo) + extractor_output = extractor.extract_all(list(files.keys())) + + builder = CallGraphBuilder(extractor_output) + builder.build_call_graph() + return builder.call_graph + + +# --- BUG 20: variable-function dataflow ------------------------------------ + +_VARFN_SOURCE = ( + " helper edge.""" + graph = _build_graph({"main.php": _VARFN_SOURCE}) + edges = graph.get("main.php:caller", []) + assert "main.php:helper" in edges, ( + "variable-function edge dropped: $f='helper';$f() lost the edge to " + "helper (callee is a variable_name; string binding not followed).\n" + f" caller edges: {edges!r}" + ) + + +def test_variable_function_conditional_binding_not_followed(): + """Precision guard: a non-unique / reassigned binding must NOT be resolved.""" + src = ( + "parent cross-file lookup).\n" + f" Child.run edges: {edges!r}" + ) + + +def test_parent_scoped_call_same_file_emits_edge(): + """`parent::greet()` must also resolve when parent is in the same file.""" + src = ( + " dict: + """Run the real extractor on a source string; return the full export dict.""" + repo = tempfile.mkdtemp() + Path(repo, filename).write_text(php_source) + extractor = FunctionExtractor(repo) + return extractor.extract_all([filename]) + + +# --- BUG 7: nested function never extracted ------------------------------- + +_NESTED_SOURCE = ( + " how to read the mapped value off the assembled unit. +# Each entry is (extractor_func_data_key, unit_location_reader). Extend this map +# whenever the extractor grows an analysis-relevant field that the unit exposes. +_CONTRACT = { + "name": lambda u: u["code"]["primary_origin"]["function_name"], + "class_name": lambda u: u["code"]["primary_origin"]["class_name"], + "unit_type": lambda u: u["unit_type"], + "start_line": lambda u: u["code"]["primary_origin"]["start_line"], + "end_line": lambda u: u["code"]["primary_origin"]["end_line"], + "parameters": lambda u: u["metadata"]["parameters"], + # The two field-drift legs this guard exists for: + "namespace_name": lambda u: u["metadata"]["namespace"], # BUG 43 + "is_static": lambda u: u["metadata"]["is_static"], # BUG 28 +} + + +def test_extractor_to_unit_field_contract(): + """Every contracted producer field must reach its mapped unit location.""" + functions, units = _run_pipeline(_SOURCE) + assert functions, "extractor produced no functions" + + failures = [] + for func_id, func_data in functions.items(): + unit = units.get(func_id) + assert unit is not None, f"no unit generated for {func_id}" + for producer_key, reader in _CONTRACT.items(): + produced = func_data.get(producer_key) + exposed = reader(unit) + if exposed != produced: + failures.append( + f"{func_id}: key {producer_key!r} produced={produced!r} " + f"but unit exposes {exposed!r}" + ) + + assert not failures, "Field-contract drift detected:\n " + "\n ".join(failures) + + +def test_visibility_is_a_known_producer_gap(): + """KNOWN PRODUCER GAP (out of scope here, recorded as a regression marker). + + The extractor computes ``_get_visibility`` but ``_process_function_node`` + never stores a ``visibility`` key in ``func_data``; the unit_generator then + fabricates a default ``'public'`` via ``func_data.get('visibility', + 'public')``. So a ``private`` method is reported as ``public``. This is a + PRODUCER omission (function_extractor.py), distinct from the two CONSUMER + field-drift bugs fixed in this run -- left for a separate fix. When the + extractor starts emitting ``visibility``, this test will fail, prompting a + move of ``visibility`` into the ``_CONTRACT`` map above. + """ + functions, units = _run_pipeline(_SOURCE) + # The private method's true visibility is 'private', but the producer never + # emits the key, so the unit currently mis-reports it as the default. + priv = units["m.php:C.p"] + assert "visibility" not in functions["m.php:C.p"], ( + "Extractor now emits 'visibility' -- promote it into _CONTRACT and " + "drop this gap marker." + ) + assert priv["metadata"]["visibility"] == "public", ( + "Unit still defaults visibility; gap unchanged." + ) + + +def test_static_and_namespace_present_in_contract(): + """Self-check: the two bug-driven keys are actually in the contract map. + + Guards against someone silently deleting the drift-prone entries and + leaving a green-but-toothless test. + """ + assert "is_static" in _CONTRACT + assert "namespace_name" in _CONTRACT diff --git a/libs/openant-core/tests/parsers/php/test_unit_generator_u7.py b/libs/openant-core/tests/parsers/php/test_unit_generator_u7.py new file mode 100644 index 0000000..50445e4 --- /dev/null +++ b/libs/openant-core/tests/parsers/php/test_unit_generator_u7.py @@ -0,0 +1,120 @@ +"""Regression tests for the PHP unit generator's metadata field-contract. + +Two confirmed producer->consumer field-drift bugs between +``parsers/php/function_extractor.py`` (PRODUCER, writes ``func_data``) and +``parsers/php/unit_generator.py`` (CONSUMER, ``UnitGenerator.create_unit``): + +[BUG 28] ``is_static`` + The extractor computes ``func_data["is_static"]`` (function_extractor.py, + ``_process_function_node``), but ``create_unit`` never copies it into the + assembled unit's ``metadata``. The unit cannot tell static methods apart. + +[BUG 43] ``namespace`` key-drift + The extractor writes the declared namespace under the key + ``func_data["namespace_name"]``, but ``create_unit`` reads + ``func_data.get("namespace")`` -- a key-name mismatch -- so the unit's + ``metadata["namespace"]`` is ALWAYS ``None``. + +These tests drive the FULL real pipeline (FunctionExtractor -> +CallGraphBuilder -> UnitGenerator) on real PHP source, matching the way the +bugs were reproduced on the live parser. + +Construct-isolation note (BUG 43): + A *non-braced* ``namespace App\\Foo;`` declaration is, in the tree-sitter + grammar, a sibling of the following ``class_declaration`` rather than its + parent, and the extractor's traversal does NOT propagate the namespace to + those siblings -- a SEPARATE extractor traversal bug that also yields a + ``None`` namespace. To test the key-drift in ``create_unit`` in isolation + (and avoid confounding it with that traversal bug), these tests use a + *braced* ``namespace App\\Foo { ... }`` block, for which the extractor + demonstrably produces a correct ``namespace_name``. See the module-level + note in the report for the separate non-braced finding. +""" + +import sys +import tempfile +from pathlib import Path + +_CORE_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_CORE_ROOT)) + +from parsers.php.function_extractor import FunctionExtractor +from parsers.php.call_graph_builder import CallGraphBuilder +from parsers.php.unit_generator import UnitGenerator + + +def _generate_units(php_source: str, filename: str = "m.php") -> dict: + """Run the real PHP pipeline on a source string; return units keyed by id.""" + repo = tempfile.mkdtemp() + Path(repo, filename).write_text(php_source) + + extractor = FunctionExtractor(repo) + extractor_output = extractor.extract_all([filename]) + + builder = CallGraphBuilder(extractor_output) + builder.build_call_graph() + + result = UnitGenerator(builder.export()).generate_units() + return {u["id"]: u for u in result["units"]} + + +# --- BUG 43: namespace key-drift ------------------------------------------ + +_NS_SOURCE = ( + "