Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 105 additions & 21 deletions libs/openant-core/parsers/php/call_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,19 +174,27 @@ def _extract_calls_from_code(self, code: str, caller_id: str) -> Set[str]:
caller_class = caller_func.get('class_name')
caller_namespace = caller_func.get('namespace_name')

# The extractor stores each function/method body as a raw PHP fragment
# WITHOUT a leading "<?php" open tag. tree-sitter-php treats untagged
# input as inline HTML 'text' and yields no call nodes, so prepend an
# open tag before re-parsing. All node byte offsets used below are
# relative to this tagged buffer, so resolution stays consistent.
if not code.lstrip().startswith('<?'):
code = '<?php ' + code
code_bytes = code.encode('utf-8', errors='replace')
try:
tree = self.php_parser.parse(code_bytes)
except Exception:
return self._extract_calls_regex(code, caller_id)

stack = [tree.root_node]
root = tree.root_node
stack = [root]
while stack:
node = stack.pop()
if node.type in ('function_call_expression', 'member_call_expression',
'scoped_call_expression', 'object_creation_expression'):
resolved = self._resolve_call_node(node, code_bytes, caller_file,
caller_class, caller_namespace)
caller_class, caller_namespace, root)
if resolved:
calls.add(resolved)
stack.extend(reversed(node.children))
Expand All @@ -195,11 +203,12 @@ def _extract_calls_from_code(self, code: str, caller_id: str) -> Set[str]:

def _resolve_call_node(self, node, source: bytes, caller_file: str,
caller_class: Optional[str],
caller_namespace: Optional[str] = None) -> Optional[str]:
caller_namespace: Optional[str] = None,
root=None) -> Optional[str]:
"""Resolve a tree-sitter call node to a function ID."""
if node.type == 'function_call_expression':
return self._resolve_function_call(node, source, caller_file, caller_class,
caller_namespace)
caller_namespace, root)
elif node.type == 'member_call_expression':
return self._resolve_member_call(node, source, caller_file, caller_class)
elif node.type == 'scoped_call_expression':
Expand All @@ -210,7 +219,8 @@ def _resolve_call_node(self, node, source: bytes, caller_file: str,

def _resolve_function_call(self, node, source: bytes, caller_file: str,
caller_class: Optional[str],
caller_namespace: Optional[str] = None) -> Optional[str]:
caller_namespace: Optional[str] = None,
root=None) -> Optional[str]:
"""Resolve a simple function call like func()."""
func_name = None

Expand All @@ -224,6 +234,12 @@ def _resolve_function_call(self, node, source: bytes, caller_file: str,
if '\\' in func_name:
func_name = func_name.rsplit('\\', 1)[-1]
break
elif child.type == 'variable_name':
# Variable-function call like $f(). Follow a single
# string-literal binding ($f = 'helper';) to recover the name.
var_name = source[child.start_byte:child.end_byte].decode('utf-8', errors='replace')
func_name = self._resolve_variable_function(var_name, root, source)
break

if not func_name:
return None
Expand Down Expand Up @@ -292,6 +308,56 @@ def _resolve_new(self, node, source: bytes, caller_file: str,
return None
return self._resolve_class_call(class_name, '__construct', caller_file)

def _resolve_variable_function(self, var_name: str, root,
source: bytes) -> Optional[str]:
"""Follow a single string-literal binding for a $var() callee.

Scans the enclosing function body for assignments to ``var_name``.
Only a single, unambiguous string-literal binding
(``$f = 'helper';``) is followed; if the variable is assigned more
than once, or from a non-literal, resolution is declined for
precision (no guessing).
"""
if root is None:
return None
literal_names: Set[str] = set()
non_literal = False

stack = [root]
while stack:
n = stack.pop()
if n.type == 'assignment_expression':
children = [c for c in n.children if c.type not in ('=',)]
# Shape: <variable_name> = <rhs>
if len(children) >= 2 and children[0].type == 'variable_name':
lhs = source[children[0].start_byte:children[0].end_byte].decode(
'utf-8', errors='replace')
if lhs == var_name:
rhs = children[1]
literal = self._string_literal_value(rhs, source)
if literal is not None:
literal_names.add(literal)
else:
non_literal = True
stack.extend(n.children)

# Single unambiguous string binding only.
if non_literal or len(literal_names) != 1:
return None
return next(iter(literal_names))

@staticmethod
def _string_literal_value(node, source: bytes) -> Optional[str]:
"""Return the content of a string-literal node, else None."""
if node.type != 'string':
return None
for child in node.children:
if child.type == 'string_content':
return source[child.start_byte:child.end_byte].decode(
'utf-8', errors='replace')
# Empty string literal ('') has no string_content child.
return ''

def _resolve_member_call(self, node, source: bytes, caller_file: str,
caller_class: Optional[str]) -> Optional[str]:
"""Resolve a member call like $obj->method()."""
Expand Down Expand Up @@ -348,27 +414,18 @@ def _resolve_scoped_call(self, node, source: bytes, caller_file: str,
if scope in ('self', 'static') and caller_class:
return self._resolve_self_call(method_name, caller_file, caller_class)

# parent::method() - resolve in the superclass (extends), not the caller's own class.
# parent::method() - the method is inherited from the parent class,
# which may be defined in a different file. Resolve via the
# class->parent index, then a cross-file class-method lookup.
if scope == 'parent' and caller_class:
superclass = self._superclass_of(caller_file, caller_class)
if not superclass:
return None
if '\\' in superclass:
superclass = superclass.rsplit('\\', 1)[-1] # `extends App\Base` -> match `Base`
# Resolve in the superclass only; do NOT fall back to the caller's own class, which would
# mis-link an overriding child's parent:: call to the child's own method.
return self._resolve_class_call(superclass, method_name, caller_file)
parent_class = self._resolve_parent_class(caller_file, caller_class)
if parent_class:
return self._resolve_class_call(parent_class, method_name, caller_file)
return None

# ClassName::method()
return self._resolve_class_call(scope, method_name, caller_file)

def _superclass_of(self, caller_file: str, caller_class: str) -> Optional[str]:
"""Return the superclass (extends) name of caller_class defined in caller_file, or None."""
for class_data in self.classes.values():
if class_data.get('name') == caller_class and class_data.get('file_path') == caller_file:
return class_data.get('superclass')
return None

def _resolve_simple_call(self, func_name: str, caller_file: str,
caller_class: Optional[str],
caller_namespace: Optional[str] = None) -> Optional[str]:
Expand Down Expand Up @@ -461,6 +518,33 @@ def _resolve_class_call(self, class_name: str, method_name: str,

return None

def _resolve_parent_class(self, caller_file: str,
caller_class: str) -> Optional[str]:
"""Return the parent (superclass) name of caller_class, if known.

The class index records each class's ``superclass`` (the ``extends``
target). The parent class may be defined in a different file, so a
same-file lookup is tried first, then any file declaring the class.
"""
# Same-file class declaration first (most precise).
class_data = self.classes.get(f"{caller_file}:{caller_class}")
if class_data and class_data.get('superclass'):
return self._strip_namespace(class_data['superclass'])

# Fall back to any class with this name across files.
for key, data in self.classes.items():
if key.endswith(f":{caller_class}") and data.get('superclass'):
return self._strip_namespace(data['superclass'])

return None

@staticmethod
def _strip_namespace(name: str) -> str:
"""Reduce a possibly namespace-qualified class name to its last segment."""
if '\\' in name:
return name.rsplit('\\', 1)[-1]
return name

def _extract_calls_regex(self, code: str, caller_id: str) -> Set[str]:
"""Fallback regex-based call extraction for unparseable code."""
calls = set()
Expand Down
87 changes: 80 additions & 7 deletions libs/openant-core/parsers/php/function_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,50 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path,
stack.append((child, new_trait_name, namespace_name))
continue

elif node.type == 'enum_declaration':
# PHP 8.1+ enums are class-like callable containers; register
# the enum as a class so its methods get qualified ids
# (Enum.method) and class context, mirroring trait/interface.
name_node = node.child_by_field_name('name')
new_enum_name = self._node_text(name_node, source) if name_node else None

if new_enum_name:
class_id = f"{relative_path}:{new_enum_name}"
methods = []
body_node = node.child_by_field_name('body')
if body_node is None:
for child in node.children:
if child.type == 'enum_declaration_list':
body_node = child
break

if body_node:
for child in body_node.children:
if child.type == 'method_declaration':
mname = self._get_function_name(child, source)
if mname:
if self._is_static_method(child, source):
methods.append(f"static:{mname}")
else:
methods.append(mname)

self.classes[class_id] = {
'name': new_enum_name,
'file_path': relative_path,
'start_line': node.start_point[0] + 1,
'end_line': node.end_point[0] + 1,
'methods': methods,
'superclass': None,
'interfaces': [],
'namespace_name': namespace_name,
}
self.stats['total_classes'] += 1

if body_node:
for child in reversed(body_node.children):
stack.append((child, new_enum_name, namespace_name))
continue

elif node.type == 'namespace_definition':
# Extract namespace name
name_node = node.child_by_field_name('name')
Expand All @@ -428,18 +472,38 @@ def _extract_functions_from_tree(self, tree, source: bytes, file_path: Path,
# Recurse into namespace body
body_node = node.child_by_field_name('body')
if body_node is None:
# Namespace without braces covers rest of file; recurse children directly
for child in reversed(node.children):
if child.type not in ('namespace', 'name', ';'):
stack.append((child, class_name, new_namespace_name))
# Braceless 'namespace App\Svc;': the declarations it covers
# are SIBLINGS of this node, not children -- they are pushed
# by the container's traversal (see _push_children, which
# carries the braceless namespace forward to siblings). The
# node's own children (namespace/name/;) have nothing to
# extract, so there is nothing to recurse here.
pass
else:
for child in reversed(body_node.children):
stack.append((child, class_name, new_namespace_name))
continue # Don't walk children again

else:
for child in reversed(node.children):
stack.append((child, class_name, namespace_name))
self._push_children(stack, node.children, source, class_name, namespace_name)

def _push_children(self, stack, children, source: bytes,
class_name: Optional[str], namespace_name: Optional[str]) -> None:
"""Push a node's children onto the traversal stack, honoring a braceless
``namespace App\\X;`` declaration: such a node has no body, and the
declarations it governs are its FOLLOWING SIBLINGS (until the next
namespace declaration). Propagate that namespace to those siblings."""
current_ns = namespace_name
ns_overrides = {}
for child in children:
if child.type == 'namespace_definition' and child.child_by_field_name('body') is None:
name_node = child.child_by_field_name('name')
if name_node is not None:
current_ns = self._node_text(name_node, source)
continue
ns_overrides[id(child)] = current_ns
for child in reversed(children):
stack.append((child, class_name, ns_overrides.get(id(child), namespace_name)))

def _process_function_node(self, node, source: bytes, relative_path: str,
class_name: Optional[str], namespace_name: Optional[str],
Expand All @@ -450,7 +514,16 @@ def _process_function_node(self, node, source: bytes, relative_path: str,
return

code = self._node_text(node, source)
start_line = node.start_point[0] + 1 # tree-sitter is 0-indexed
# tree-sitter is 0-indexed. The method_declaration node spans any
# leading PHP8 attribute_list (e.g. #[Route(...)]), so node.start_point
# would point at the attribute line. Anchor start_line at the actual
# declaration (first non-attribute child) instead.
decl_node = node
for child in node.children:
if child.type != 'attribute_list':
decl_node = child
break
start_line = decl_node.start_point[0] + 1
end_line = node.end_point[0] + 1
parameters = self._get_parameters(node, source)

Expand Down
10 changes: 8 additions & 2 deletions libs/openant-core/parsers/php/unit_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,11 @@ def create_unit(self, func_id: str, func_data: Dict) -> Dict:
file_path = func_data.get('file_path', '')
func_name = func_data.get('name', '')
class_name = func_data.get('class_name')
namespace = func_data.get('namespace')
# The extractor (function_extractor.py) writes the declared namespace
# under 'namespace_name'; read that canonical key so it reaches the
# unit instead of always being None (key-drift bug).
namespace = func_data.get('namespace_name')
is_static = func_data.get('is_static', False)
unit_type = func_data.get('unit_type', 'function')

# Get upstream dependencies (functions this calls)
Expand Down Expand Up @@ -239,6 +243,7 @@ def create_unit(self, func_id: str, func_data: Dict) -> Dict:
'metadata': {
'visibility': func_data.get('visibility', 'public'),
'namespace': namespace,
'is_static': is_static,
'parameters': func_data.get('parameters', []),
'generator': 'php_unit_generator.py',
'direct_calls': direct_calls,
Expand Down Expand Up @@ -307,7 +312,8 @@ def generate_analyzer_output(self) -> Dict:
'endLine': func_data.get('end_line', 0),
'visibility': func_data.get('visibility', 'public'),
'isExported': True, # PHP doesn't have explicit exports
'namespace': func_data.get('namespace'),
'namespace': func_data.get('namespace_name'),
'isStatic': func_data.get('is_static', False),
'parameters': func_data.get('parameters', []),
'className': func_data.get('class_name'),
}
Expand Down
Loading
Loading