From ab8485a3f2ee7bcdf8d677b365280b7538cae4d7 Mon Sep 17 00:00:00 2001 From: Minidoracat Date: Sun, 12 Apr 2026 23:56:06 +0800 Subject: [PATCH 1/4] =?UTF-8?q?feat(parser):=20comprehensive=20PHP/Laravel?= =?UTF-8?q?=20support=20=E2=80=94=20fix=20PHP=20infrastructure=20+=20add?= =?UTF-8?q?=20Laravel=20semantic=20edges?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHP's core parsing infrastructure (CALLS, INHERITS, IMPORTS edges) was completely non-functional because `_get_call_name()` could not match tree-sitter-php's `name` node type, `_get_bases()` had no PHP branch, and `_extract_import()` fell through to a raw-text fallback. This commit fixes the PHP foundation and adds Laravel-specific semantic analysis on top: **Phase 1 — PHP infrastructure fix:** - `_get_call_name()`: add PHP-specific branches for all 4 call expression types (function_call, member_call, scoped_call, object_creation) - `_get_bases()`: add PHP branch for `base_clause` (extends) and `class_interface_clause` (implements) - `_extract_import()`: add PHP branch handling simple, grouped, and aliased `use` statements with proper AST traversal - `_CLASS_TYPES["php"]`: add `trait_declaration`, `enum_declaration` - `_CALL_TYPES["php"]`: add `scoped_call_expression`, `object_creation_expression` **Phase 2 — Entry points + Blade detection:** - `_LANG_ENTRY_NAME_PATTERNS`: language-scoped entry-point patterns so PHP-specific names (handle, boot, register, up, down) don't pollute other languages - `detect_language()`: handle `.blade.php` compound extension before the generic suffix lookup **Phase 3 — Laravel semantic edges:** - `_extract_php_constructs()`: detect Route definitions (`Route::get('/path', [Controller::class, 'method'])`) and emit CALLS edges to controller methods - Detect Eloquent relationships (`hasMany`, `belongsTo`, etc.) and emit REFERENCES edges to target models - `_php_class_from_class_access()`: correctly extract class names from both short (`Post::class`) and FQCN (`\App\Models\Post::class`) forms **Phase 4 — Blade templates + PSR-4:** - `_parse_blade()`: regex-based extraction of `@extends`, `@include`, `@component`, `@livewire` directives as IMPORTS_FROM/REFERENCES edges - `_find_php_composer_psr4()`: resolve PHP namespaces to file paths via `composer.json` autoload PSR-4 mappings with caching **Tested on real Laravel 9 and 12 projects:** - CALLS edges: 0 → 9,369 (Laravel 12 project), 4,962 → 35,771 (Laravel 9) - INHERITS edges: 0 → 481 / 0 → 346 - REFERENCES edges: 2 → 74 / 9 → 54 - Total edges: +226% / +266% 26 new tests covering all phases. 761 total tests pass, 0 regressions. --- code_review_graph/flows.py | 29 +- code_review_graph/parser.py | 497 +++++++++++++++++++++++++++++- tests/fixtures/sample.blade.php | 19 ++ tests/fixtures/sample.php | 35 +++ tests/fixtures/sample_laravel.php | 84 +++++ tests/test_flows.py | 47 +++ tests/test_multilang.py | 181 ++++++++++- 7 files changed, 885 insertions(+), 7 deletions(-) create mode 100644 tests/fixtures/sample.blade.php create mode 100644 tests/fixtures/sample_laravel.php diff --git a/code_review_graph/flows.py b/code_review_graph/flows.py index 193171e4..f8486b21 100644 --- a/code_review_graph/flows.py +++ b/code_review_graph/flows.py @@ -107,8 +107,21 @@ r"^(componentDidMount|componentDidUpdate|componentWillUnmount" r"|shouldComponentUpdate|render)$" ), + # PHP magic invokable class entry point + re.compile(r"^__invoke$"), ] +# Language-specific entry-point name patterns. Checked only when the +# node's language matches the key. This avoids polluting all 19+ +# languages with framework-specific names like ``handle`` or ``boot``. +_LANG_ENTRY_NAME_PATTERNS: dict[str, list[re.Pattern[str]]] = { + "php": [ + re.compile(r"^(register|boot)$"), # Service Provider + re.compile(r"^handle$"), # Command / Middleware / Job + re.compile(r"^(up|down)$"), # Migration + ], +} + # --------------------------------------------------------------------------- # Entry-point detection @@ -129,11 +142,21 @@ def _has_framework_decorator(node: GraphNode) -> bool: return False -def _matches_entry_name(node: GraphNode) -> bool: - """Return True if *node*'s name matches a conventional entry-point pattern.""" +def _matches_entry_name( + node: GraphNode, language: Optional[str] = None, +) -> bool: + """Return True if *node*'s name matches a conventional entry-point pattern. + + When *language* is provided, language-specific patterns from + ``_LANG_ENTRY_NAME_PATTERNS`` are also checked. + """ for pat in _ENTRY_NAME_PATTERNS: if pat.search(node.name): return True + if language: + for pat in _LANG_ENTRY_NAME_PATTERNS.get(language, []): + if pat.search(node.name): + return True return False @@ -185,7 +208,7 @@ def detect_entry_points( is_entry = True # Conventional name match. - if _matches_entry_name(node): + if _matches_entry_name(node, language=node.language): is_entry = True if is_entry and node.qualified_name not in seen_qn: diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index 31af17f7..d39f533a 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -140,7 +140,10 @@ class EdgeInfo: "perl": ["package_statement", "class_statement", "role_statement"], "kotlin": ["class_declaration", "object_declaration"], "swift": ["class_declaration", "struct_declaration", "protocol_declaration"], - "php": ["class_declaration", "interface_declaration"], + "php": [ + "class_declaration", "interface_declaration", + "trait_declaration", "enum_declaration", + ], "scala": [ "class_definition", "trait_definition", "object_definition", "enum_definition", ], @@ -275,7 +278,10 @@ class EdgeInfo: ], "kotlin": ["call_expression"], "swift": ["call_expression"], - "php": ["function_call_expression", "member_call_expression"], + "php": [ + "function_call_expression", "member_call_expression", + "scoped_call_expression", "object_creation_expression", + ], "scala": ["call_expression", "instance_expression", "generic_function"], "solidity": ["call_expression"], "lua": ["function_call"], @@ -371,6 +377,8 @@ def __init__(self) -> None: self._tsconfig_resolver = TsconfigResolver() # Per-parse cache of Dart pubspec root lookups; see #87 self._dart_pubspec_cache: dict[tuple[str, str], Optional[Path]] = {} + # Per-parse cache of PHP composer.json PSR-4 mappings + self._php_composer_cache: dict[str, Optional[dict[str, str]]] = {} def _get_parser(self, language: str): # type: ignore[arg-type] if language not in self._parsers: @@ -383,6 +391,10 @@ def _get_parser(self, language: str): # type: ignore[arg-type] return self._parsers[language] def detect_language(self, path: Path) -> Optional[str]: + # Blade templates use compound extension (.blade.php); Path.suffix + # only returns the last part (.php), so check the full name first. + if path.name.endswith(".blade.php"): + return "blade" return EXTENSION_TO_LANGUAGE.get(path.suffix.lower()) def parse_file(self, path: Path) -> tuple[list[NodeInfo], list[EdgeInfo]]: @@ -403,6 +415,10 @@ def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[E if not language: return [], [] + # Blade templates: regex-based extraction (no tree-sitter grammar) + if language == "blade": + return self._parse_blade(path, source) + # Vue SFCs: parse with vue parser, then delegate script blocks to JS/TS if language == "vue": return self._parse_vue(path, source) @@ -476,6 +492,51 @@ def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[E return nodes, edges + # Blade directive patterns for extracting template references. + _BLADE_DIRECTIVE_RE = re.compile( + r"""@(extends|include|component|livewire)\s*\(\s*['"]([^'"]+)['"]\s*\)""", + ) + + def _parse_blade( + self, path: Path, source: bytes, + ) -> tuple[list[NodeInfo], list[EdgeInfo]]: + """Parse a Blade template using regex (no tree-sitter grammar). + + Extracts ``@extends``, ``@include``, ``@component``, and + ``@livewire`` directives as IMPORTS_FROM / REFERENCES edges. + """ + file_path = str(path) + text = source.decode("utf-8", errors="replace") + + nodes: list[NodeInfo] = [ + NodeInfo( + kind="File", + name=path.name, + file_path=file_path, + line_start=1, + line_end=text.count("\n") + 1, + language="blade", + ), + ] + edges: list[EdgeInfo] = [] + + for match in self._BLADE_DIRECTIVE_RE.finditer(text): + directive = match.group(1) + target_dotpath = match.group(2) + line = text[:match.start()].count("\n") + 1 + + # @livewire produces REFERENCES; others produce IMPORTS_FROM + kind = "REFERENCES" if directive == "livewire" else "IMPORTS_FROM" + edges.append(EdgeInfo( + kind=kind, + source=file_path, + target=target_dotpath, + file_path=file_path, + line=line, + )) + + return nodes, edges + def _parse_vue( self, path: Path, source: bytes, ) -> tuple[list[NodeInfo], list[EdgeInfo]]: @@ -1211,6 +1272,20 @@ def _extract_from_tree( ) continue + # --- PHP Laravel-specific constructs --- + # Route definitions and Eloquent relationships need semantic + # edges beyond the generic CALLS edge. When matched, produces + # both the standard CALLS edge and extra semantic edges, then + # returns True so the generic path is skipped. + if language == "php" and node_type in ( + "scoped_call_expression", "member_call_expression", + ): + if self._extract_php_constructs( + child, source, file_path, edges, + enclosing_class, enclosing_func, + ): + continue + # --- Calls --- if node_type in call_types: if self._extract_calls( @@ -2686,6 +2761,237 @@ def _ref_from_arguments( line=ch.start_point[0] + 1, ) + # ------------------------------------------------------------------ + # PHP / Laravel semantic constructs + # ------------------------------------------------------------------ + + _ELOQUENT_RELATIONS = frozenset({ + "hasMany", "hasOne", "belongsTo", "belongsToMany", + "morphTo", "morphMany", "morphOne", "morphToMany", + "morphedByMany", "hasManyThrough", "hasOneThrough", + }) + + _ROUTE_VERBS = frozenset({ + "get", "post", "put", "patch", "delete", "options", + "any", "match", "resource", "apiResource", + }) + + @staticmethod + def _php_class_from_class_access(node) -> Optional[str]: + """Extract the class name from a ``class_constant_access_expression``. + + Handles both short names (``Post::class`` → ``Post``) and fully + qualified names (``\\App\\Models\\Post::class`` → ``Post``). + The literal ``class`` keyword child is skipped. + """ + for child in node.children: + if child.type == "qualified_name": + # FQCN: extract last segment + text = child.text.decode("utf-8", errors="replace") + return text.rsplit("\\", 1)[-1] + if child.type == "name": + text = child.text.decode("utf-8", errors="replace") + if text != "class": + return text + return None + + def _extract_php_constructs( + self, + node, + source: bytes, + file_path: str, + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + ) -> bool: + """Handle Laravel-specific PHP patterns. + + Returns True if the node was fully handled (caller should + ``continue``); False to let the generic CALLS path proceed. + + Patterns handled: + - Route::get('/path', [Controller::class, 'method']) — produces + CALLS edge to Controller.method + - $this->hasMany(Post::class) — produces REFERENCES edge to Post + """ + # --- Route definitions --- + # scoped_call_expression: Route::get('/path', [...]) + if node.type == "scoped_call_expression": + names = [c for c in node.children if c.type == "name"] + if len(names) >= 2: + scope = names[0].text.decode("utf-8", errors="replace") + method = names[1].text.decode("utf-8", errors="replace") + if scope == "Route" and method in self._ROUTE_VERBS: + self._extract_laravel_route( + node, source, file_path, edges, + enclosing_class, enclosing_func, + scope, method, + ) + return True + return False + + # --- Eloquent relationships --- + # member_call_expression: $this->hasMany(Post::class) + if node.type == "member_call_expression": + for child in reversed(node.children): + if child.type == "name": + method_name = child.text.decode( + "utf-8", errors="replace" + ) + if method_name in self._ELOQUENT_RELATIONS: + self._extract_eloquent_relation( + node, source, file_path, edges, + enclosing_class, enclosing_func, + method_name, + ) + return True + break + return False + + return False + + def _extract_laravel_route( + self, + node, + source: bytes, + file_path: str, + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + scope: str, + method: str, + ) -> None: + """Extract CALLS edge from Route::verb to controller method.""" + caller = self._qualify( + enclosing_func or enclosing_class, file_path, + enclosing_class if enclosing_func else None, + ) if (enclosing_func or enclosing_class) else file_path + + # Emit generic CALLS edge to Route::verb + edges.append(EdgeInfo( + kind="CALLS", + source=caller, + target=f"{scope}.{method}", + file_path=file_path, + line=node.start_point[0] + 1, + )) + + # Try to extract [Controller::class, 'method'] from arguments + for child in node.children: + if child.type == "arguments": + self._extract_route_controller_target( + child, file_path, edges, caller, + node.start_point[0] + 1, + ) + break + + def _extract_route_controller_target( + self, + args_node, + file_path: str, + edges: list[EdgeInfo], + caller: str, + line: int, + ) -> None: + """Parse [Controller::class, 'method'] array in route arguments.""" + for child in args_node.children: + if child.type == "argument": + for sub in child.children: + if sub.type == "array_creation_expression": + self._parse_route_array( + sub, file_path, edges, caller, line, + ) + + def _parse_route_array( + self, array_node, file_path, edges, caller, line, + ) -> None: + """Extract controller::class + 'method' from array literal.""" + class_name = None + method_name = None + for child in array_node.children: + if child.type == "array_element_initializer": + for sub in child.children: + if sub.type == "class_constant_access_expression": + class_name = ( + self._php_class_from_class_access(sub) + or class_name + ) + if sub.type in ("string", "encapsed_string"): + txt = sub.text.decode( + "utf-8", errors="replace" + ).strip("'\"") + if txt: + method_name = txt + # Also handle direct children (no array_element_initializer) + if child.type == "class_constant_access_expression": + class_name = ( + self._php_class_from_class_access(child) + or class_name + ) + if child.type in ("string", "encapsed_string"): + txt = child.text.decode( + "utf-8", errors="replace" + ).strip("'\"") + if txt: + method_name = txt + + if class_name and method_name: + target = f"{class_name}.{method_name}" + edges.append(EdgeInfo( + kind="CALLS", + source=caller, + target=target, + file_path=file_path, + line=line, + )) + + def _extract_eloquent_relation( + self, + node, + source: bytes, + file_path: str, + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + method_name: str, + ) -> None: + """Extract REFERENCES edge from Eloquent relationship call.""" + caller = self._qualify( + enclosing_func or enclosing_class, file_path, + enclosing_class if enclosing_func else None, + ) if (enclosing_func or enclosing_class) else file_path + + # Emit generic CALLS edge for the relationship method + edges.append(EdgeInfo( + kind="CALLS", + source=caller, + target=method_name, + file_path=file_path, + line=node.start_point[0] + 1, + )) + + # Extract the target model from ::class argument + for child in node.children: + if child.type == "arguments": + for arg in child.children: + if arg.type == "argument": + for sub in arg.children: + if sub.type == ( + "class_constant_access_expression" + ): + model = self._php_class_from_class_access( + sub, + ) + if model: + edges.append(EdgeInfo( + kind="REFERENCES", + source=caller, + target=model, + file_path=file_path, + line=node.start_point[0] + 1, + )) + return + def _extract_solidity_constructs( self, child, @@ -3108,6 +3414,28 @@ def _do_resolve_module( # ``dart:core`` / ``dart:async`` etc. are SDK libraries we do # not track; fall through to return None. + elif language == "php": + # PSR-4: resolve namespace to file via composer.json autoload. + # e.g. ``App\Models\User`` -> ``app/Models/User.php`` + psr4 = self._find_php_composer_psr4(caller_dir) + if psr4: + for prefix, base_dir in psr4.items(): + ns_prefix = prefix.rstrip("\\") + if module == ns_prefix or module.startswith( + ns_prefix + "\\" + ): + relative = module[len(ns_prefix):].lstrip("\\") + rel_path = relative.replace("\\", "/") + ".php" + target = Path(base_dir) / rel_path + try: + if target.is_file(): + return str(target.resolve()) + except (OSError, ValueError) as exc: + logger.debug( + "PSR-4 resolve failed for %s -> %s: %s", + module, target, exc, + ) + return None def _find_dart_pubspec_root( @@ -3141,6 +3469,56 @@ def _find_dart_pubspec_root( self._dart_pubspec_cache[cache_key] = None return None + def _find_php_composer_psr4( + self, start: Path, + ) -> Optional[dict[str, str]]: + """Walk up from *start* to find ``composer.json`` and parse its + ``autoload.psr-4`` (and ``autoload-dev.psr-4``) mappings. + + Returns a dict mapping namespace prefix to absolute directory path, + or None if no composer.json is found. Results are cached per + directory so repeated lookups are cheap. + """ + cache_key = str(start) + if cache_key in self._php_composer_cache: + return self._php_composer_cache[cache_key] + + current = start + for _ in range(20): + composer = current / "composer.json" + if composer.is_file(): + try: + data = json.loads( + composer.read_text(encoding="utf-8", errors="replace") + ) + except (OSError, json.JSONDecodeError) as exc: + logger.warning( + "Failed to parse %s: %s", composer, exc, + ) + self._php_composer_cache[cache_key] = None + return None + + mappings: dict[str, str] = {} + for section in ("autoload", "autoload-dev"): + psr4 = data.get(section, {}).get("psr-4", {}) + for prefix, rel_dir in psr4.items(): + if isinstance(rel_dir, list): + rel_dir = rel_dir[0] if rel_dir else "" + if not isinstance(rel_dir, str): + continue + abs_dir = str((current / rel_dir).resolve()) + mappings[prefix] = abs_dir + + self._php_composer_cache[cache_key] = mappings or None + return mappings or None + + if current.parent == current: + break + current = current.parent + + self._php_composer_cache[cache_key] = None + return None + def _resolve_call_target( self, call_name: str, @@ -3537,6 +3915,32 @@ def _get_bases(self, node, language: str, source: bytes) -> list[str]: ident.text.decode("utf-8", errors="replace") ) break + elif language == "php": + # class Foo extends Bar implements Baz, Qux { ... } + # AST: base_clause contains [extends, name], class_interface_clause + # contains [implements, name, ...]. tree-sitter-php uses `name` + # (not `type_identifier`) for class/interface references. + for child in node.children: + if child.type == "base_clause": + for sub in child.children: + if sub.type == "name": + bases.append( + sub.text.decode("utf-8", errors="replace") + ) + elif sub.type == "qualified_name": + bases.append( + sub.text.decode("utf-8", errors="replace") + ) + elif child.type == "class_interface_clause": + for sub in child.children: + if sub.type == "name": + bases.append( + sub.text.decode("utf-8", errors="replace") + ) + elif sub.type == "qualified_name": + bases.append( + sub.text.decode("utf-8", errors="replace") + ) return bases def _extract_import(self, node, language: str, source: bytes) -> list[str]: @@ -3650,6 +4054,58 @@ def _find_string_literal(n) -> Optional[str]: val = _find_string_literal(node) if val: imports.append(val) + elif language == "php": + # PHP namespace use declarations have three forms: + # 1. Simple: use App\Models\User; + # AST: namespace_use_declaration > namespace_use_clause > + # qualified_name + # 2. Grouped: use App\Models\{User, Post}; + # AST: namespace_use_declaration > namespace_name + + # namespace_use_group > { namespace_use_clause* } + # 3. Alias: use App\Models\User as BaseUser; + # AST: same as simple but with alias clause + prefix = "" + group_found = False + for child in node.children: + if child.type == "namespace_name": + prefix = child.text.decode( + "utf-8", errors="replace" + ).rstrip("\\") + elif child.type == "namespace_use_group": + group_found = True + for sub in child.children: + if sub.type == "namespace_use_clause": + name = sub.children[0] if sub.children else None + if name is not None and name.type in ( + "qualified_name", "name", + ): + val = name.text.decode( + "utf-8", errors="replace" + ) + if prefix: + imports.append(f"{prefix}\\{val}") + else: + imports.append(val) + elif child.type == "namespace_use_clause": + qn = None + for sub in child.children: + if sub.type == "qualified_name": + qn = sub.text.decode( + "utf-8", errors="replace" + ) + break + if sub.type == "name": + qn = sub.text.decode( + "utf-8", errors="replace" + ) + break + if qn: + imports.append(qn) + if not imports and not group_found: + # Last-resort fallback: strip `use` keyword and semicolons + cleaned = text.removeprefix("use").strip().rstrip(";").strip() + if cleaned: + imports.append(cleaned) else: # Fallback: just record the text imports.append(text) @@ -3708,6 +4164,43 @@ def _get_call_name(self, node, language: str, source: bytes) -> Optional[str]: return child.text.decode("utf-8", errors="replace") return None # method child not found + # PHP-specific call handling: tree-sitter-php uses `name` as the + # node type for identifiers (not `identifier`), and each call + # expression type has a distinct child layout. + if language == "php": + if node.type == "scoped_call_expression": + # Class::method(args) — children: [name, ::, name, arguments] + # Use dot notation (Class.method) to match the graph's + # qualified name format (file.php::Class.method). + names = [c for c in node.children if c.type == "name"] + if len(names) >= 2: + cls = names[0].text.decode("utf-8", errors="replace") + method = names[1].text.decode("utf-8", errors="replace") + return f"{cls}.{method}" + if names: + return names[0].text.decode("utf-8", errors="replace") + return None + if node.type == "object_creation_expression": + # new ClassName(args) — children: [new, name, arguments] + for child in node.children: + if child.type == "name": + return child.text.decode("utf-8", errors="replace") + if child.type == "qualified_name": + return child.text.decode("utf-8", errors="replace") + return None + if node.type == "member_call_expression": + # $obj->method(args) — children: [variable_name, ->, name, arguments] + for child in reversed(node.children): + if child.type == "name": + return child.text.decode("utf-8", errors="replace") + return None + # function_call_expression: func(args) — children: [name, arguments] + if first.type == "name": + return first.text.decode("utf-8", errors="replace") + if first.type == "qualified_name": + return first.text.decode("utf-8", errors="replace") + return None + # Simple call: func_name(args) # Kotlin uses "simple_identifier" instead of "identifier". if first.type in ("identifier", "simple_identifier"): diff --git a/tests/fixtures/sample.blade.php b/tests/fixtures/sample.blade.php new file mode 100644 index 00000000..ba701722 --- /dev/null +++ b/tests/fixtures/sample.blade.php @@ -0,0 +1,19 @@ +@extends('layouts.app') + +@section('title', 'Home') + +@section('content') +
+

Welcome

+ + @include('partials.header') + + @component('components.alert') + Attention! Something important. + @endcomponent + + @include('partials.footer') + + @livewire('components.counter') +
+@endsection diff --git a/tests/fixtures/sample.php b/tests/fixtures/sample.php index 724b6faf..645da367 100644 --- a/tests/fixtures/sample.php +++ b/tests/fixtures/sample.php @@ -3,6 +3,26 @@ namespace App\Models; use Exception; +use App\Contracts\{Loggable, Cacheable}; +use App\Services\UserService as Service; + +trait Timestampable { + public function getCreatedAt(): string { + return $this->created_at; + } +} + +enum Status: string { + case Active = 'active'; + case Inactive = 'inactive'; + + public function label(): string { + return match($this) { + self::Active => 'Active', + self::Inactive => 'Inactive', + }; + } +} interface Repository { public function findById(int $id): ?User; @@ -10,6 +30,8 @@ public function save(User $user): void; } class User { + use Timestampable; + public int $id; public string $name; @@ -21,6 +43,10 @@ public function __construct(int $id, string $name) { public function toString(): string { return "User({$this->id}, {$this->name})"; } + + public static function find(int $id): ?self { + return null; + } } class InMemoryRepo implements Repository { @@ -36,6 +62,15 @@ public function save(User $user): void { } } +class UserController extends Controller implements Loggable { + public function index(): array { + $users = User::find(1); + $repo = new InMemoryRepo(); + $repo->save(new User(1, 'Alice')); + return []; + } +} + function createUser(Repository $repo, string $name): User { $user = new User(count($repo->users ?? []) + 1, $name); $repo->save($user); diff --git a/tests/fixtures/sample_laravel.php b/tests/fixtures/sample_laravel.php new file mode 100644 index 00000000..691e39a7 --- /dev/null +++ b/tests/fixtures/sample_laravel.php @@ -0,0 +1,84 @@ +toArray(); + } + + public function show(int $id): ?User + { + return User::find($id); + } +} + +// --- Route definitions --- + +Route::get('/users', [UserController::class, 'index']); +Route::post('/users', [UserController::class, 'store']); +Route::get('/users/{id}', [UserController::class, 'show']); + +// --- Eloquent Model with relationships --- + +namespace App\Models; + +use Illuminate\Database\Eloquent\Model; +use Illuminate\Database\Eloquent\Relations\HasMany; +use Illuminate\Database\Eloquent\Relations\BelongsTo; + +class User extends Model +{ + public function posts(): HasMany + { + return $this->hasMany(Post::class); + } + + public function comments() + { + return $this->hasMany(Comment::class); + } +} + +class Post extends Model +{ + public function user(): BelongsTo + { + return $this->belongsTo(User::class); + } + + public function tags() + { + return $this->belongsToMany(Tag::class); + } +} + +// --- Service Provider --- + +class AppServiceProvider extends ServiceProvider +{ + public function register(): void + { + // service bindings + } + + public function boot(): void + { + // bootstrapping + } +} + +// --- Artisan Command --- + +class SendEmails extends Command +{ + public function handle(): void + { + // command logic + } +} diff --git a/tests/test_flows.py b/tests/test_flows.py index 34cfd05d..f8f9b964 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -109,6 +109,53 @@ def test_detect_entry_points_name_pattern(self): assert "handle_request" in ep_names assert "regular_func" not in ep_names + def test_php_entry_point_patterns(self): + """PHP-specific names (handle, boot, register, up, down) are entry + points only when the node's language is 'php'.""" + # PHP nodes — should be entry points + for name in ("handle", "boot", "register", "up", "down"): + node = NodeInfo( + kind="Function", name=name, file_path="app.php", + line_start=1, line_end=5, language="php", + extra={}, + ) + self.store.upsert_node(node, file_hash="ph") + # Same names but Python — should NOT be entry points via lang patterns + for name in ("handle", "boot", "register", "up"): + node = NodeInfo( + kind="Function", name=name, file_path="app.py", + line_start=1, line_end=5, language="python", + extra={}, + ) + self.store.upsert_node(node, file_hash="py") + self.store.commit() + + # Make all nodes called so they aren't roots by default + for name in ("handle", "boot", "register", "up", "down"): + self._add_call("app.php::caller", f"app.php::{name}", "app.php") + for name in ("handle", "boot", "register", "up"): + self._add_call("app.py::caller", f"app.py::{name}") + + eps = detect_entry_points(self.store) + ep_names_php = { + ep.name for ep in eps if ep.language == "php" + } + ep_names_py = { + ep.name for ep in eps + if ep.language == "python" and ep.name in ( + "handle", "boot", "register", "up", + ) + } + # PHP names match via _LANG_ENTRY_NAME_PATTERNS + assert "handle" in ep_names_php + assert "boot" in ep_names_php + assert "register" in ep_names_php + assert "up" in ep_names_php + # Python versions should NOT match (they are called, so not roots, + # and "handle"/"boot" etc. don't match the universal patterns). + assert "handle" not in ep_names_py + assert "boot" not in ep_names_py + # --------------------------------------------------------------- # detect_entry_points -- expanded decorator patterns # --------------------------------------------------------------- diff --git a/tests/test_multilang.py b/tests/test_multilang.py index 1264dc9c..00bc98d8 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -261,12 +261,189 @@ def test_detects_language(self): def test_finds_classes(self): classes = [n for n in self.nodes if n.kind == "Class"] names = {c.name for c in classes} - assert "User" in names or "InMemoryRepo" in names + assert "User" in names + assert "InMemoryRepo" in names + assert "UserController" in names def test_finds_functions(self): funcs = [n for n in self.nodes if n.kind == "Function"] names = {f.name for f in funcs} - assert len(names) > 0 + assert "createUser" in names + + def test_finds_traits(self): + classes = [n for n in self.nodes if n.kind == "Class"] + names = {c.name for c in classes} + assert "Timestampable" in names + + def test_finds_enums(self): + classes = [n for n in self.nodes if n.kind == "Class"] + names = {c.name for c in classes} + assert "Status" in names + + def test_finds_calls(self): + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + # member_call_expression: $repo->save(...) + assert any("save" in t for t in targets), f"No 'save' call in {targets}" + + def test_finds_static_calls(self): + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + # scoped_call_expression uses dot notation: User.find + assert any("User.find" in t for t in targets), ( + f"No static call to 'User.find' in {targets}" + ) + + def test_finds_new_expression(self): + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + assert any("InMemoryRepo" in t for t in targets), ( + f"No 'new InMemoryRepo()' call in {targets}" + ) + + def test_finds_inheritance(self): + inherits = [e for e in self.edges if e.kind in ("INHERITS", "IMPLEMENTS")] + targets = {e.target for e in inherits} + sources = {e.source for e in inherits} + # InMemoryRepo implements Repository + assert any("Repository" in t for t in targets), ( + f"No INHERITS/IMPLEMENTS to Repository in {inherits}" + ) + # UserController extends Controller + assert any("Controller" in t for t in targets), ( + f"No INHERITS to Controller in {inherits}" + ) + + def test_finds_imports(self): + imp_edges = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imp_edges} + assert any("Exception" in t for t in targets), ( + f"No import of Exception in {targets}" + ) + # Should not be raw "use Exception;" text + assert not any(t.startswith("use ") for t in targets), ( + f"Import target is raw text: {targets}" + ) + + def test_finds_grouped_imports(self): + imp_edges = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imp_edges} + assert any("Loggable" in t for t in targets), ( + f"No grouped import of Loggable in {targets}" + ) + assert any("Cacheable" in t for t in targets), ( + f"No grouped import of Cacheable in {targets}" + ) + + def test_finds_alias_import(self): + imp_edges = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imp_edges} + assert any("UserService" in t for t in targets), ( + f"No alias import of UserService in {targets}" + ) + + def test_blade_detected_as_blade(self): # Phase 2 + assert self.parser.detect_language(Path("home.blade.php")) == "blade" + + def test_regular_php_not_affected_by_blade(self): + assert self.parser.detect_language(Path("index.php")) == "php" + + +class TestLaravelParsing: + """Phase 3: Laravel-specific semantic tests.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file( + FIXTURES / "sample_laravel.php" + ) + + def test_route_creates_calls_to_controller(self): + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + assert any( + "UserController.index" in t for t in targets + ), f"No Route->Controller CALLS edge in {targets}" + + def test_eloquent_relation_creates_references(self): + refs = [e for e in self.edges if e.kind == "REFERENCES"] + targets = {e.target for e in refs} + assert any( + "Post" in t for t in targets + ), f"No Eloquent REFERENCES to Post in {targets}" + + def test_belongs_to_creates_references(self): + refs = [e for e in self.edges if e.kind == "REFERENCES"] + targets = {e.target for e in refs} + assert any( + "User" in t for t in targets + ), f"No belongsTo REFERENCES to User in {targets}" + + def test_belongs_to_many_creates_references(self): + refs = [e for e in self.edges if e.kind == "REFERENCES"] + targets = {e.target for e in refs} + assert any( + "Tag" in t for t in targets + ), f"No belongsToMany REFERENCES to Tag in {targets}" + + def test_route_also_has_generic_calls(self): + calls = [e for e in self.edges if e.kind == "CALLS"] + targets = {e.target for e in calls} + assert any( + "Route.get" in t for t in targets + ), f"No generic Route.get CALLS edge in {targets}" + + +class TestBladeParsing: + """Phase 4: Blade template tests.""" + + def setup_method(self): + self.parser = CodeParser() + self.nodes, self.edges = self.parser.parse_file( + FIXTURES / "sample.blade.php" + ) + + def test_blade_creates_file_node(self): + file_nodes = [n for n in self.nodes if n.kind == "File"] + assert len(file_nodes) == 1 + assert file_nodes[0].language == "blade" + + def test_blade_extends_creates_import(self): + imp_edges = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imp_edges} + assert "layouts.app" in targets, ( + f"No @extends import in {targets}" + ) + + def test_blade_include_creates_import(self): + imp_edges = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imp_edges} + assert "partials.header" in targets, ( + f"No @include import in {targets}" + ) + assert "partials.footer" in targets, ( + f"No @include footer import in {targets}" + ) + + def test_blade_component_creates_import(self): + imp_edges = [e for e in self.edges if e.kind == "IMPORTS_FROM"] + targets = {e.target for e in imp_edges} + assert "components.alert" in targets, ( + f"No @component import in {targets}" + ) + + def test_blade_livewire_creates_references(self): + refs = [e for e in self.edges if e.kind == "REFERENCES"] + targets = {e.target for e in refs} + assert "components.counter" in targets, ( + f"No @livewire REFERENCES in {targets}" + ) + + def test_regular_php_not_affected(self): + parser = CodeParser() + nodes, edges = parser.parse_file(FIXTURES / "sample.php") + classes = [n for n in nodes if n.kind == "Class"] + assert any(c.name == "User" for c in classes) class TestKotlinParsing: From 6987c02b631d167c1bb50170cace514534e8ab99 Mon Sep 17 00:00:00 2001 From: Minidoracat Date: Mon, 13 Apr 2026 00:08:21 +0800 Subject: [PATCH 2/4] docs: update README with PHP/Laravel framework-aware parsing Update limitations section to reflect PHP/Laravel entry-point detection and add framework-aware parsing row to the features table. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index aa389d12..1735b044 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,7 @@ The blast-radius analysis never misses an actually impacted file (perfect recall - **Small single-file changes:** Graph context can exceed naive file reads for trivial edits (see express results above). The overhead is the structural metadata that enables multi-file analysis. - **Search quality (MRR 0.35):** Keyword search finds the right result in the top-4 for most queries, but ranking needs improvement. Express queries return 0 hits due to module-pattern naming. -- **Flow detection (33% recall):** Only reliably detects entry points in Python repos (fastapi, httpx) where framework patterns are recognized. JavaScript and Go flow detection needs work. +- **Flow detection (33% recall):** Reliably detects entry points in Python repos (fastapi, httpx) and PHP/Laravel (controllers, commands, middleware, migrations). JavaScript and Go flow detection needs work. - **Precision vs recall trade-off:** Impact analysis is deliberately conservative. It flags files that *might* be affected, which means some false positives in large dependency graphs. @@ -209,6 +209,7 @@ The blast-radius analysis never misses an actually impacted file (perfect recall | **Token benchmarking** | Measure naive full-corpus tokens vs graph query tokens with per-question ratios | | **Memory loop** | Persist Q&A results as markdown for re-ingestion, so the graph grows from queries | | **Community auto-split** | Oversized communities (>25% of graph) are recursively split via Leiden | +| **Framework-aware parsing** | Laravel Route→Controller mapping, Eloquent relationship edges, Blade template references, PSR-4 namespace resolution. Python decorator detection for Flask/FastAPI/Django/Celery. | | **Execution flows** | Trace call chains from entry points, sorted by weighted criticality | | **Community detection** | Cluster related code via Leiden algorithm with resolution scaling for large graphs | | **Architecture overview** | Auto-generated architecture map with coupling warnings | From a3d6d5f39cc3bef4b8ff75c287d71eda774a61b9 Mon Sep 17 00:00:00 2001 From: Minidoracat Date: Tue, 14 Apr 2026 23:04:00 +0800 Subject: [PATCH 3/4] docs: add Framework-aware parsing row to all language READMEs Sync zh-CN, ja-JP, ko-KR, hi-IN with the Framework-aware parsing feature row added to the English README in the previous commit. --- README.hi-IN.md | 1 + README.ja-JP.md | 1 + README.ko-KR.md | 1 + README.zh-CN.md | 1 + 4 files changed, 4 insertions(+) diff --git a/README.hi-IN.md b/README.hi-IN.md index 3be93e6b..a3d18e56 100644 --- a/README.hi-IN.md +++ b/README.hi-IN.md @@ -146,6 +146,7 @@ Build the code review graph for this project | **टोकन बेंचमार्किंग** | प्रति-प्रश्न अनुपात के साथ नैव फ़ुल-कॉर्पस टोकन बनाम ग्राफ क्वेरी टोकन मापें | | **मेमोरी लूप** | री-इन्जेशन के लिए Q&A परिणामों को मार्कडाउन के रूप में सहेजें, ताकि ग्राफ क्वेरीज़ से बढ़े | | **कम्युनिटी ऑटो-स्प्लिट** | बड़ी कम्युनिटीज़ (ग्राफ का >25%) को Leiden के ज़रिए पुनरावर्ती रूप से विभाजित किया जाता है | +| **फ्रेमवर्क-अवेयर पार्सिंग** | Laravel Route→Controller मैपिंग, Eloquent रिलेशनशिप एज, Blade टेम्प्लेट संदर्भ, PSR-4 नेमस्पेस रिज़ॉल्यूशन। Flask/FastAPI/Django/Celery के लिए Python डेकोरेटर डिटेक्शन। | | **एक्ज़ीक्यूशन फ़्लोज़** | भारित क्रिटिकैलिटी के अनुसार क्रमबद्ध, एंट्री पॉइंट्स से कॉल चेन ट्रेस करें | | **कम्युनिटी डिटेक्शन** | बड़े ग्राफ़ के लिए रेज़ोल्यूशन स्केलिंग के साथ Leiden एल्गोरिदम से संबंधित कोड क्लस्टर करें | | **आर्किटेक्चर ओवरव्यू** | कपलिंग चेतावनियों के साथ स्वतः-जनित आर्किटेक्चर मैप | diff --git a/README.ja-JP.md b/README.ja-JP.md index c7946176..901daeb1 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -148,6 +148,7 @@ gitコミットやファイル保存のたびにフックが起動します。 | **トークンベンチマーク** | ナイーブな全ファイル読み込みとグラフクエリのトークン数を質問ごとに比較 | | **メモリループ** | Q&A結果をMarkdownとして保存し再取り込み。クエリからグラフが成長 | | **コミュニティ自動分割** | グラフの25%を超えるコミュニティはLeidenアルゴリズムで再帰的に分割 | +| **フレームワーク対応パース** | Laravel Route→Controller マッピング、Eloquent リレーションエッジ、Blade テンプレート参照、PSR-4 名前空間解決。Flask/FastAPI/Django/Celery 向け Python デコレータ検出。 | | **実行フロー** | エントリーポイントからの呼び出しチェーンを重み付き重要度でソートしてトレース | | **コミュニティ検出** | Leidenアルゴリズムで関連コードをクラスタリング。大規模グラフ向け解像度スケーリング対応 | | **アーキテクチャ概要** | コミュニティ構造から自動生成されるアーキテクチャマップ(結合度警告付き) | diff --git a/README.ko-KR.md b/README.ko-KR.md index 90b3917d..2623a2aa 100644 --- a/README.ko-KR.md +++ b/README.ko-KR.md @@ -148,6 +148,7 @@ git 커밋이나 파일 저장마다 훅이 실행됩니다. 그래프는 변경 | **토큰 벤치마킹** | 전체 코퍼스 토큰 대비 그래프 쿼리 토큰을 질문별 비율로 측정 | | **메모리 루프** | Q&A 결과를 마크다운으로 저장하여 재수집, 쿼리로 그래프가 성장 | | **커뮤니티 자동 분할** | 과대 커뮤니티(그래프의 25% 초과)를 Leiden 알고리즘으로 재귀적 분할 | +| **프레임워크 인식 파싱** | Laravel Route→Controller 매핑, Eloquent 관계 엣지, Blade 템플릿 참조, PSR-4 네임스페이스 해석. Flask/FastAPI/Django/Celery용 Python 데코레이터 감지. | | **실행 흐름** | 가중 중요도 순으로 정렬된 진입점에서의 호출 체인 추적 | | **커뮤니티 감지** | 대규모 그래프를 위한 해상도 스케일링이 포함된 Leiden 알고리즘으로 관련 코드 클러스터링 | | **아키텍처 개요** | 결합 경고가 포함된 자동 생성 아키텍처 맵 | diff --git a/README.zh-CN.md b/README.zh-CN.md index 65ecebac..9d572dde 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -146,6 +146,7 @@ Build the code review graph for this project | **Token 基准测试** | 测量朴素全量 token 与图查询 token,附带逐题比率 | | **记忆循环** | 将问答结果持久化为 Markdown 以供重新摄入,使图从查询中不断成长 | | **社区自动分割** | 过大的社区(>图的 25%)通过 Leiden 算法递归分割 | +| **框架感知解析** | Laravel Route→Controller 映射、Eloquent 关联边、Blade 模板引用、PSR-4 命名空间解析。支持 Flask/FastAPI/Django/Celery 的 Python 装饰器检测。 | | **执行流** | 从入口点追踪调用链,按加权关键度排序 | | **社区检测** | 通过 Leiden 算法聚类相关代码,大型图自动调节分辨率 | | **架构概览** | 自动生成架构图,附带耦合警告 | From 66a48c7a8e2b330512e61492b86fa41079a1d045 Mon Sep 17 00:00:00 2001 From: Minidoracat Date: Tue, 14 Apr 2026 23:09:02 +0800 Subject: [PATCH 4/4] fix(tests): update PHP entry-point test for upstream handle pattern MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream added ^handle$ to the universal _ENTRY_NAME_PATTERNS, so 'handle' now matches all languages — not just PHP. Narrow the negative assertion to boot/register/up which remain PHP-specific. --- tests/test_flows.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_flows.py b/tests/test_flows.py index f8f9b964..4616320d 100644 --- a/tests/test_flows.py +++ b/tests/test_flows.py @@ -110,8 +110,9 @@ def test_detect_entry_points_name_pattern(self): assert "regular_func" not in ep_names def test_php_entry_point_patterns(self): - """PHP-specific names (handle, boot, register, up, down) are entry - points only when the node's language is 'php'.""" + """PHP-specific names (boot, register, up, down) are entry points only + when the node's language is 'php'. Note: 'handle' is now a universal + entry-point pattern (added upstream) so it matches all languages.""" # PHP nodes — should be entry points for name in ("handle", "boot", "register", "up", "down"): node = NodeInfo( @@ -120,8 +121,8 @@ def test_php_entry_point_patterns(self): extra={}, ) self.store.upsert_node(node, file_hash="ph") - # Same names but Python — should NOT be entry points via lang patterns - for name in ("handle", "boot", "register", "up"): + # Same names but Python — boot/register/up should NOT be entry points + for name in ("boot", "register", "up"): node = NodeInfo( kind="Function", name=name, file_path="app.py", line_start=1, line_end=5, language="python", @@ -133,7 +134,7 @@ def test_php_entry_point_patterns(self): # Make all nodes called so they aren't roots by default for name in ("handle", "boot", "register", "up", "down"): self._add_call("app.php::caller", f"app.php::{name}", "app.php") - for name in ("handle", "boot", "register", "up"): + for name in ("boot", "register", "up"): self._add_call("app.py::caller", f"app.py::{name}") eps = detect_entry_points(self.store) @@ -142,19 +143,18 @@ def test_php_entry_point_patterns(self): } ep_names_py = { ep.name for ep in eps - if ep.language == "python" and ep.name in ( - "handle", "boot", "register", "up", - ) + if ep.language == "python" and ep.name in ("boot", "register", "up") } - # PHP names match via _LANG_ENTRY_NAME_PATTERNS + # PHP names match via _LANG_ENTRY_NAME_PATTERNS (or universal patterns) assert "handle" in ep_names_php assert "boot" in ep_names_php assert "register" in ep_names_php assert "up" in ep_names_php - # Python versions should NOT match (they are called, so not roots, - # and "handle"/"boot" etc. don't match the universal patterns). - assert "handle" not in ep_names_py + # Python versions should NOT match: boot/register/up are PHP-specific + # and these nodes are called (not roots), so they won't be entry points. assert "boot" not in ep_names_py + assert "register" not in ep_names_py + assert "up" not in ep_names_py # --------------------------------------------------------------- # detect_entry_points -- expanded decorator patterns