Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions code_review_graph/lang/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Per-language parsing handlers."""

from ._base import BaseLanguageHandler
from ._c_cpp import CHandler, CppHandler
from ._csharp import CSharpHandler
from ._dart import DartHandler
from ._go import GoHandler
from ._java import JavaHandler
from ._javascript import JavaScriptHandler, TsxHandler, TypeScriptHandler
from ._kotlin import KotlinHandler
from ._lua import LuaHandler, LuauHandler
from ._perl import PerlHandler
from ._php import PhpHandler
from ._python import PythonHandler
from ._r import RHandler
from ._ruby import RubyHandler
from ._rust import RustHandler
from ._scala import ScalaHandler
from ._solidity import SolidityHandler
from ._swift import SwiftHandler

ALL_HANDLERS: list[BaseLanguageHandler] = [
GoHandler(),
PythonHandler(),
JavaScriptHandler(),
TypeScriptHandler(),
TsxHandler(),
RustHandler(),
CHandler(),
CppHandler(),
JavaHandler(),
CSharpHandler(),
KotlinHandler(),
ScalaHandler(),
SolidityHandler(),
RubyHandler(),
DartHandler(),
SwiftHandler(),
PhpHandler(),
PerlHandler(),
RHandler(),
LuaHandler(),
LuauHandler(),
]

__all__ = [
"BaseLanguageHandler", "ALL_HANDLERS",
"GoHandler", "PythonHandler",
"JavaScriptHandler", "TypeScriptHandler", "TsxHandler",
"RustHandler", "CHandler", "CppHandler",
"JavaHandler", "CSharpHandler", "KotlinHandler",
"ScalaHandler", "SolidityHandler",
"RubyHandler", "DartHandler",
"SwiftHandler", "PhpHandler", "PerlHandler",
"RHandler", "LuaHandler", "LuauHandler",
]
62 changes: 62 additions & 0 deletions code_review_graph/lang/_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
"""Base class for language-specific parsing handlers."""

from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from ..parser import CodeParser, EdgeInfo, NodeInfo


class BaseLanguageHandler:
"""Override methods where a language differs from default CodeParser logic.

Methods returning ``NotImplemented`` signal 'use the default code path'.
Subclasses only need to override what they actually customise.
"""

language: str = ""
class_types: list[str] = []
function_types: list[str] = []
import_types: list[str] = []
call_types: list[str] = []
builtin_names: frozenset[str] = frozenset()

def get_name(self, node, kind: str) -> str | None:
return NotImplemented

def get_bases(self, node, source: bytes) -> list[str]:
return NotImplemented

def extract_import_targets(self, node, source: bytes) -> list[str]:
return NotImplemented

def collect_import_names(self, node, file_path: str, import_map: dict[str, str]) -> bool:
"""Populate import_map from an import node. Return True if handled."""
return False

def resolve_module(self, module: str, caller_file: str) -> str | None:
"""Resolve a module path to a file path. Return NotImplemented to fall back."""
return NotImplemented

def extract_constructs(
self,
child,
node_type: str,
parser: CodeParser,
source: bytes,
file_path: str,
nodes: list[NodeInfo],
edges: list[EdgeInfo],
enclosing_class: str | None,
enclosing_func: str | None,
import_map: dict[str, str] | None,
defined_names: set[str] | None,
depth: int,
) -> bool:
"""Handle language-specific AST constructs.

Returns True if the child was fully handled (skip generic dispatch).
Default: returns False (no language-specific handling).
"""
return False
41 changes: 41 additions & 0 deletions code_review_graph/lang/_c_cpp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""C / C++ language handlers."""

from __future__ import annotations

from ._base import BaseLanguageHandler


class _CBase(BaseLanguageHandler):
"""Shared handler logic for C and C++."""

import_types = ["preproc_include"]
call_types = ["call_expression"]

def extract_import_targets(self, node, source: bytes) -> list[str]:
imports = []
for child in node.children:
if child.type in ("system_lib_string", "string_literal"):
val = child.text.decode("utf-8", errors="replace").strip("<>\"")
imports.append(val)
return imports


class CHandler(_CBase):
language = "c"
class_types = ["struct_specifier", "type_definition"]
function_types = ["function_definition"]


class CppHandler(_CBase):
language = "cpp"
class_types = ["class_specifier", "struct_specifier"]
function_types = ["function_definition"]

def get_bases(self, node, source: bytes) -> list[str]:
bases = []
for child in node.children:
if child.type == "base_class_clause":
for sub in child.children:
if sub.type == "type_identifier":
bases.append(sub.text.decode("utf-8", errors="replace"))
return bases
33 changes: 33 additions & 0 deletions code_review_graph/lang/_csharp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""C# language handler."""

from __future__ import annotations

from ._base import BaseLanguageHandler


class CSharpHandler(BaseLanguageHandler):
language = "csharp"
class_types = [
"class_declaration", "interface_declaration",
"enum_declaration", "struct_declaration",
]
function_types = ["method_declaration", "constructor_declaration"]
import_types = ["using_directive"]
call_types = ["invocation_expression", "object_creation_expression"]

def extract_import_targets(self, node, source: bytes) -> list[str]:
text = node.text.decode("utf-8", errors="replace").strip()
parts = text.split()
if len(parts) >= 2:
return [parts[-1].rstrip(";")]
return []

def get_bases(self, node, source: bytes) -> list[str]:
bases = []
for child in node.children:
if child.type in (
"superclass", "super_interfaces", "extends_type",
"implements_type", "type_identifier", "supertype",
):
bases.append(child.text.decode("utf-8", errors="replace"))
return bases
65 changes: 65 additions & 0 deletions code_review_graph/lang/_dart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Dart language handler."""

from __future__ import annotations

from typing import Optional

from ._base import BaseLanguageHandler


class DartHandler(BaseLanguageHandler):
language = "dart"
class_types = ["class_definition", "mixin_declaration", "enum_declaration"]
# function_signature covers both top-level functions and class methods
# (class methods appear as method_signature > function_signature pairs;
# the parser recurses into method_signature generically and then matches
# function_signature inside it).
function_types = ["function_signature"]
# import_or_export wraps library_import > import_specification > configurable_uri
import_types = ["import_or_export"]
call_types: list[str] = [] # Dart uses call_expression from fallback

def get_name(self, node, kind: str) -> str | None:
# function_signature has a return-type node before the identifier;
# search only for 'identifier' to avoid returning the return type name.
if node.type == "function_signature":
for child in node.children:
if child.type == "identifier":
return child.text.decode("utf-8", errors="replace")
return None
return NotImplemented

def extract_import_targets(self, node, source: bytes) -> list[str]:
val = self._find_string_literal(node)
if val:
return [val]
return []

@staticmethod
def _find_string_literal(node) -> Optional[str]:
if node.type == "string_literal":
return node.text.decode("utf-8", errors="replace").strip("'\"")
for child in node.children:
result = DartHandler._find_string_literal(child)
if result is not None:
return result
return None

def get_bases(self, node, source: bytes) -> list[str]:
bases = []
for child in node.children:
if child.type == "superclass":
for sub in child.children:
if sub.type == "type_identifier":
bases.append(sub.text.decode("utf-8", errors="replace"))
elif sub.type == "mixins":
for m in sub.children:
if m.type == "type_identifier":
bases.append(
m.text.decode("utf-8", errors="replace"),
)
elif child.type == "interfaces":
for sub in child.children:
if sub.type == "type_identifier":
bases.append(sub.text.decode("utf-8", errors="replace"))
return bases
73 changes: 73 additions & 0 deletions code_review_graph/lang/_go.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""Go language handler."""

from __future__ import annotations

from ._base import BaseLanguageHandler


class GoHandler(BaseLanguageHandler):
language = "go"
class_types = ["type_declaration"]
function_types = ["function_declaration", "method_declaration"]
import_types = ["import_declaration"]
call_types = ["call_expression"]
builtin_names = frozenset({
"len", "cap", "make", "new", "delete", "append", "copy",
"close", "panic", "recover", "print", "println",
})

def get_name(self, node, kind: str) -> str | None:
# Go type_declaration wraps type_spec which holds the identifier
if node.type == "type_declaration":
for child in node.children:
if child.type == "type_spec":
for sub in child.children:
if sub.type in ("identifier", "name", "type_identifier"):
return sub.text.decode("utf-8", errors="replace")
return None
return NotImplemented # fall back to default for function_declaration etc.

def get_bases(self, node, source: bytes) -> list[str]:
# Embedded structs / interface composition
# Embedded fields are field_declaration nodes with only a type_identifier
# (no field name), e.g. `type Child struct { Parent }`
bases = []
for child in node.children:
if child.type == "type_spec":
for sub in child.children:
if sub.type in ("struct_type", "interface_type"):
for field_node in sub.children:
if field_node.type == "field_declaration_list":
for f in field_node.children:
if f.type == "field_declaration":
children = [
c for c in f.children
if c.type not in ("comment",)
]
if (
len(children) == 1
and children[0].type == "type_identifier"
):
bases.append(
children[0].text.decode(
"utf-8", errors="replace",
)
)
return bases

def extract_import_targets(self, node, source: bytes) -> list[str]:
imports = []
for child in node.children:
if child.type == "import_spec_list":
for spec in child.children:
if spec.type == "import_spec":
for s in spec.children:
if s.type == "interpreted_string_literal":
val = s.text.decode("utf-8", errors="replace")
imports.append(val.strip('"'))
elif child.type == "import_spec":
for s in child.children:
if s.type == "interpreted_string_literal":
val = s.text.decode("utf-8", errors="replace")
imports.append(val.strip('"'))
return imports
30 changes: 30 additions & 0 deletions code_review_graph/lang/_java.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""Java language handler."""

from __future__ import annotations

from ._base import BaseLanguageHandler


class JavaHandler(BaseLanguageHandler):
language = "java"
class_types = ["class_declaration", "interface_declaration", "enum_declaration"]
function_types = ["method_declaration", "constructor_declaration"]
import_types = ["import_declaration"]
call_types = ["method_invocation", "object_creation_expression"]

def extract_import_targets(self, node, source: bytes) -> list[str]:
text = node.text.decode("utf-8", errors="replace").strip()
parts = text.split()
if len(parts) >= 2:
return [parts[-1].rstrip(";")]
return []

def get_bases(self, node, source: bytes) -> list[str]:
bases = []
for child in node.children:
if child.type in (
"superclass", "super_interfaces", "extends_type",
"implements_type", "type_identifier", "supertype",
):
bases.append(child.text.decode("utf-8", errors="replace"))
return bases
Loading
Loading