|
| 1 | +################################################################################ |
| 2 | +# Copyright IBM Corporation 2026 |
| 3 | +# |
| 4 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +# you may not use this file except in compliance with the License. |
| 6 | +# You may obtain a copy of the License at |
| 7 | +# |
| 8 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +# |
| 10 | +# Unless required by applicable law or agreed to in writing, software |
| 11 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +# See the License for the specific language governing permissions and |
| 14 | +# limitations under the License. |
| 15 | +################################################################################ |
| 16 | + |
| 17 | +"""Analysis-pass superset abstraction. |
| 18 | +
|
| 19 | +A whole-application pass that runs after the symbol table and the base |
| 20 | +(Jedi/CodeQL) call graph are built. A pass may contribute *entrypoints* |
| 21 | +(framework-dispatched roots) and/or *synthetic call edges* (dispatch the |
| 22 | +static call graph cannot see — e.g. Odoo ORM ``write()`` -> a |
| 23 | +``@api.depends`` compute method). |
| 24 | +
|
| 25 | +Entrypoint-finding is one kind of pass: ``AbstractEntrypointFinder`` |
| 26 | +(in ``codeanalyzer.frameworks._base``) is a thin ``AnalysisPass`` |
| 27 | +subclass. Out-of-tree packages register their own passes via the |
| 28 | +``codeanalyzer.analysis_passes`` entry-point group; the registry orders |
| 29 | +all passes by declared ``requires``/``provides`` capabilities. |
| 30 | +
|
| 31 | +Core never interprets pass-defined vocabulary. ``PyEntrypoint`` and |
| 32 | +``PyCallEdge`` carry it in their open ``detection_source``/``provenance`` |
| 33 | +fields and free-form ``tags`` dicts so a persisted ``analysis.json`` |
| 34 | +round-trips regardless of which passes were installed. |
| 35 | +""" |
| 36 | + |
| 37 | +from __future__ import annotations |
| 38 | + |
| 39 | +from abc import ABC, abstractmethod |
| 40 | +from dataclasses import dataclass, field |
| 41 | +from typing import Any, Callable, ClassVar, Dict, FrozenSet, List, Optional |
| 42 | + |
| 43 | +from typing_extensions import Literal |
| 44 | + |
| 45 | +from codeanalyzer.schema.py_schema import ( |
| 46 | + PyApplication, |
| 47 | + PyCallEdge, |
| 48 | + PyEntrypoint, |
| 49 | +) |
| 50 | + |
| 51 | +#: Entry-point group out-of-tree packages declare in their ``pyproject.toml`` |
| 52 | +#: under ``[project.entry-points."codeanalyzer.analysis_passes"]``. Each |
| 53 | +#: entry point must resolve to an ``AnalysisPass`` subclass. |
| 54 | +ANALYSIS_PASS_ENTRYPOINT_GROUP = "codeanalyzer.analysis_passes" |
| 55 | + |
| 56 | + |
| 57 | +BindingKind = Literal[ |
| 58 | + "url_resolver", # Django path() / re_path() / url() / include() |
| 59 | + "router_mount", # FastAPI app.include_router / app.mount |
| 60 | + "blueprint", # Flask register_blueprint |
| 61 | + # --- command-line bindings. conceptually similar to web frameworks --- |
| 62 | + "lambda_template", # AWS SAM / serverless.yml |
| 63 | + "typer_subapp", # Typer app.add_typer |
| 64 | + "click_add_command", # Click cli.add_command(my_func) |
| 65 | + "argparse_dispatch", # argparse parser.set_defaults(func=my_handler) |
| 66 | +] |
| 67 | + |
| 68 | + |
| 69 | +@dataclass(frozen=True) |
| 70 | +class BindingFact: |
| 71 | + """One external->internal binding resolved by a routing pre-pass. |
| 72 | +
|
| 73 | + Stored in ``AnalysisContext.external_bindings`` keyed by the target |
| 74 | + callable's ``PyCallable.signature``. Multiple facts per signature are |
| 75 | + permitted (one function bound under several routes). |
| 76 | + """ |
| 77 | + |
| 78 | + framework: str |
| 79 | + binding_kind: BindingKind |
| 80 | + source_file: str |
| 81 | + route_path: Optional[str] = None |
| 82 | + http_methods: List[str] = field(default_factory=list) |
| 83 | + extra: Dict[str, str] = field(default_factory=dict) |
| 84 | + |
| 85 | + |
| 86 | +@dataclass(frozen=True) |
| 87 | +class AnalysisContext: |
| 88 | + """Project-wide context handed to every pass. |
| 89 | +
|
| 90 | + Built once by the registry after the symbol table and base call graph |
| 91 | + are ready; immutable thereafter so passes cannot mutate global state |
| 92 | + mid-pipeline. The current ``PyApplication`` is passed separately to |
| 93 | + ``AnalysisPass.run`` (it accumulates upstream passes' results), so the |
| 94 | + context only carries derived helpers. |
| 95 | +
|
| 96 | + * ``external_bindings`` — output of a routing pre-pass; keyed by the |
| 97 | + target callable's ``PyCallable.signature``. Empty for non-web / |
| 98 | + non-CLI projects (no routing pre-pass is wired in core yet). |
| 99 | + * ``resolve_base_chain`` — given a class's fully-qualified name, |
| 100 | + returns the transitive FQCN inheritance chain starting with the |
| 101 | + class itself. Used by inheritance-based finders (Tornado, Django |
| 102 | + CBV, gRPC ``Servicer``); decorator/convention finders ignore it. |
| 103 | + * ``shared`` — inter-pass handoff scratch space. This is the channel |
| 104 | + that makes ``provides``/``requires`` meaningful: a pass declaring |
| 105 | + ``provides={"odoo.model_identity"}`` writes its derived facts to |
| 106 | + ``shared["odoo.model_identity"]``; a pass declaring |
| 107 | + ``requires={"odoo.model_identity"}`` reads them back. Keyed by |
| 108 | + capability token. The dataclass is frozen (passes cannot rebind |
| 109 | + the field) but this dict is intentionally mutable. Never |
| 110 | + serialized; never interpreted by core. |
| 111 | + """ |
| 112 | + |
| 113 | + external_bindings: Dict[str, List[BindingFact]] |
| 114 | + resolve_base_chain: Callable[[str], List[str]] |
| 115 | + shared: Dict[str, Any] = field(default_factory=dict) |
| 116 | + |
| 117 | + |
| 118 | +@dataclass |
| 119 | +class AnalysisResult: |
| 120 | + """What a single pass contributed. |
| 121 | +
|
| 122 | + Mutable and cheap: the registry merges each result into the running |
| 123 | + ``PyApplication`` before the next pass runs, so a downstream pass sees |
| 124 | + upstream entrypoints and synthetic edges. |
| 125 | + """ |
| 126 | + |
| 127 | + entrypoints: List[PyEntrypoint] = field(default_factory=list) |
| 128 | + call_edges: List[PyCallEdge] = field(default_factory=list) |
| 129 | + |
| 130 | + def extend(self, other: "AnalysisResult") -> None: |
| 131 | + self.entrypoints.extend(other.entrypoints) |
| 132 | + self.call_edges.extend(other.call_edges) |
| 133 | + |
| 134 | + |
| 135 | +class AnalysisPass(ABC): |
| 136 | + """A whole-application analysis pass. |
| 137 | +
|
| 138 | + Concrete passes must set ``name`` and implement ``run``. |
| 139 | + ``provides``/``requires`` are capability tokens (free-form strings) |
| 140 | + the registry topologically sorts on: a pass declaring |
| 141 | + ``requires={"odoo.model_identity"}`` is ordered after whichever pass |
| 142 | + declares ``provides={"odoo.model_identity"}``. An unsatisfied |
| 143 | + requirement or a cycle is a hard error. |
| 144 | +
|
| 145 | + Passes should be cheap to instantiate and free of per-project state — |
| 146 | + all project facts arrive via ``run``'s arguments. |
| 147 | + """ |
| 148 | + |
| 149 | + #: Stable identifier, used in ordering errors and logs. |
| 150 | + name: ClassVar[str] = "" |
| 151 | + #: Capability tokens this pass makes available to later passes. |
| 152 | + provides: ClassVar[FrozenSet[str]] = frozenset() |
| 153 | + #: Capability tokens this pass needs satisfied before it runs. |
| 154 | + requires: ClassVar[FrozenSet[str]] = frozenset() |
| 155 | + |
| 156 | + @abstractmethod |
| 157 | + def run(self, app: PyApplication, ctx: AnalysisContext) -> AnalysisResult: |
| 158 | + """Analyze ``app`` and return contributed entrypoints / edges. |
| 159 | +
|
| 160 | + ``app`` already contains the symbol table, the base call graph, |
| 161 | + and the results of every pass ordered before this one. Treat it |
| 162 | + as read-only — return contributions in an ``AnalysisResult``; the |
| 163 | + registry is responsible for merging them in. |
| 164 | + """ |
0 commit comments