From a8f589ef91715e58ff901615e89574ff34907f0b Mon Sep 17 00:00:00 2001
From: RaghavChamadiya <raghavchamadiya@gmail.com>
Date: Tue, 7 Apr 2026 15:33:17 +0530
Subject: [PATCH 1/3] =?UTF-8?q?feat:=20pipeline=20overhaul=20=E2=80=94=20R?=
 =?UTF-8?q?AG=20generation,=20parallel=20ingest,=20atomic=20stores,=20cost?=
 =?UTF-8?q?=20tracking,=20PR=20blast=20radius?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds 11 capabilities across the indexing pipeline, persistence layer, MCP
tools, and CLI. MCP tool count is unchanged; new functionality is folded
into existing tools (get_risk, get_overview, get_dead_code).

Pipeline & generation
- ProcessPool-based parsing with sequential fallback; ingestion and git
  stages now run concurrently via asyncio.gather
- RAG-aware doc generation: dependency summaries are pre-fetched from the
  vector store and injected into the file_page prompt; pages generated in
  topological order so leaves are summarized before their dependents
- Dynamic import hint extractors (Django INSTALLED_APPS/ROOT_URLCONF/
  MIDDLEWARE/url include, pytest conftest fixtures, Node package.json
  exports + tsconfig path aliases) wired into GraphBuilder.add_dynamic_edges

Persistence
- AtomicStorageCoordinator with async transaction() context manager and
  health_check() spanning SQL, in-memory graph, and vector store
- recompute_git_percentiles now uses a single SQL PERCENT_RANK() window
  function instead of in-memory Python ranking
- New temporal_hotspot_score column on git_metadata, computed via exp
  decay (180-day half-life) and used as the primary percentile sort key
- New llm_costs and security_findings tables; matching ORM models
- vector_store.get_page_summary_by_path() on all three backends

Cost tracking
- CostTracker with per-call recording, persisted to llm_costs; pricing
  table covers Claude 4.6 family, GPT-4o, and Gemini 1.5/2.5/3.x variants
- Wired into Anthropic, Gemini, OpenAI, and LiteLLM providers
- Live USD column on the indexing progress bar
- New `repowise costs` CLI grouping by operation/model/day

Analysis
- PRBlastRadiusAnalyzer: transitive ancestor BFS over graph_edges,
  co-change warnings, recommended reviewers by temporal ownership,
  test gaps, 0–10 overall risk score
- SecurityScanner: pattern-based scan for eval/exec/pickle/raw SQL/
  hardcoded secrets/weak hashes; persisted at index time

MCP tool extensions
- get_risk(changed_files=[...]) returns blast radius; per-file payload
  now includes test_gap and security_signals
- get_overview returns knowledge_map with top owners, knowledge silos
  (>80% ownership), and onboarding targets
- get_dead_code accepts min_confidence, include_internals,
  include_zombie_packages, no_unreachable, no_unused_exports

CLI
- `repowise dead-code` exposes the same sensitivity flags
- `repowise doctor` adds a coordinator drift health check (Check #10)
- `repowise costs` command registered

Tests
- test_models.py: expected table set updated to include llm_costs and
  security_findings; full suite green (757 passed, 9 skipped)
- End-to-end validated against test-repos/microdot: 164 files ingested,
  83 pages generated, 132 git_metadata rows with temporal hotspot score,
  83 cost rows totaling $0.0258, 2 security findings, drift = 0
---
 README.md                                     |  64 +++-
 .../src/repowise/cli/commands/costs_cmd.py    | 157 ++++++++++
 .../repowise/cli/commands/dead_code_cmd.py    |  38 ++-
 .../src/repowise/cli/commands/doctor_cmd.py   |  71 +++++
 .../cli/src/repowise/cli/commands/init_cmd.py |  43 +++
 packages/cli/src/repowise/cli/main.py         |   2 +
 packages/cli/src/repowise/cli/ui.py           |  12 +-
 .../core/alembic/versions/0009_llm_costs.py   |  54 ++++
 .../versions/0010_temporal_hotspot_score.py   |  38 +++
 .../versions/0011_security_findings.py        |  51 ++++
 .../src/repowise/core/analysis/pr_blast.py    | 282 ++++++++++++++++++
 .../repowise/core/analysis/security_scan.py   | 128 ++++++++
 .../repowise/core/generation/cost_tracker.py  | 264 ++++++++++++++++
 .../core/generation/page_generator.py         |  62 ++++
 .../core/generation/templates/file_page.j2    |   9 +
 .../core/ingestion/dynamic_hints/__init__.py  |   6 +
 .../core/ingestion/dynamic_hints/base.py      |  21 ++
 .../core/ingestion/dynamic_hints/django.py    | 152 ++++++++++
 .../core/ingestion/dynamic_hints/node.py      | 136 +++++++++
 .../ingestion/dynamic_hints/pytest_hints.py   | 103 +++++++
 .../core/ingestion/dynamic_hints/registry.py  |  32 ++
 .../repowise/core/ingestion/git_indexer.py    |  30 +-
 .../core/src/repowise/core/ingestion/graph.py |  20 ++
 .../repowise/core/persistence/coordinator.py  | 212 +++++++++++++
 .../src/repowise/core/persistence/crud.py     |  42 ++-
 .../src/repowise/core/persistence/models.py   |  42 +++
 .../repowise/core/persistence/vector_store.py |  98 ++++++
 .../repowise/core/pipeline/orchestrator.py    | 169 +++++++++--
 .../src/repowise/core/pipeline/persist.py     |  19 ++
 .../repowise/core/providers/llm/anthropic.py  |  24 +-
 .../src/repowise/core/providers/llm/gemini.py |  23 +-
 .../repowise/core/providers/llm/litellm.py    |  24 +-
 .../src/repowise/core/providers/llm/openai.py |  24 +-
 .../server/mcp_server/tool_dead_code.py       |  42 ++-
 .../server/mcp_server/tool_overview.py        |  75 ++++-
 .../repowise/server/mcp_server/tool_risk.py   |  87 +++++-
 tests/unit/persistence/test_models.py         |   2 +
 37 files changed, 2593 insertions(+), 65 deletions(-)
 create mode 100644 packages/cli/src/repowise/cli/commands/costs_cmd.py
 create mode 100644 packages/core/alembic/versions/0009_llm_costs.py
 create mode 100644 packages/core/alembic/versions/0010_temporal_hotspot_score.py
 create mode 100644 packages/core/alembic/versions/0011_security_findings.py
 create mode 100644 packages/core/src/repowise/core/analysis/pr_blast.py
 create mode 100644 packages/core/src/repowise/core/analysis/security_scan.py
 create mode 100644 packages/core/src/repowise/core/generation/cost_tracker.py
 create mode 100644 packages/core/src/repowise/core/ingestion/dynamic_hints/__init__.py
 create mode 100644 packages/core/src/repowise/core/ingestion/dynamic_hints/base.py
 create mode 100644 packages/core/src/repowise/core/ingestion/dynamic_hints/django.py
 create mode 100644 packages/core/src/repowise/core/ingestion/dynamic_hints/node.py
 create mode 100644 packages/core/src/repowise/core/ingestion/dynamic_hints/pytest_hints.py
 create mode 100644 packages/core/src/repowise/core/ingestion/dynamic_hints/registry.py
 create mode 100644 packages/core/src/repowise/core/persistence/coordinator.py

diff --git a/README.md b/README.md
index b880388..a887b3b 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,48 @@ The result: Claude Code answers *"why does auth work this way?"* instead of *"he
 
 ---
 
+## What's new
+
+### Faster indexing
+Indexing is now fully parallel. A `ProcessPoolExecutor` distributes AST parsing across all CPU cores. Graph construction and git history indexing run concurrently via `asyncio.gather`. Per-file git history is fetched through a thread executor with a semaphore to cap concurrency — full parallelism without overwhelming the system. Large repos index noticeably faster.
+
+### RAG-aware documentation generation
+Every wiki page is generated with richer context: before calling the LLM, repowise fetches the already-generated summaries of each file's direct dependencies from the vector store and injects them into the prompt. Generation is topologically sorted so leaf files are always written first. The LLM sees what its dependencies actually do, not just their names — producing more accurate, cross-referenced documentation.
+
+### Atomic three-store transactions
+`AtomicStorageCoordinator` buffers writes across the SQL database, the in-memory dependency graph, and the vector store, then flushes them in a single coordinated operation. If any store fails, all three are rolled back — no partial writes, no silent drift. Run `repowise doctor` to inspect drift across all three stores and repair mismatches.
+
+### Dynamic import hints
+The dependency graph now captures edges that pure AST parsing misses:
+- Django `INSTALLED_APPS`, `ROOT_URLCONF`, and `MIDDLEWARE` settings
+- pytest fixture wiring through `conftest.py`
+- Node/TypeScript path aliases from `tsconfig.json` `paths` and `package.json` `exports`
+
+These edges appear in `get_context`, `get_risk`, and `get_dependency_path` like any other dependency.
+
+### Temporal hotspot decay
+Hotspot scoring now uses an exponentially time-decayed score with a 180-day half-life layered on top of the raw 90-day churn count. A commit from a year ago contributes roughly 25% as much as a commit from today. The score reflects recent activity, not just total volume. Surfaced in `get_overview` and `get_risk`.
+
+### Percentile ranks via SQL window function
+Incremental updates now recompute global percentile ranks for every file using a single `PERCENT_RANK()` SQL window function. Previously this required loading all rows into Python. The new approach is both faster and correct on large repos — no sampling, no approximation.
+
+### PR blast radius
+`get_risk(changed_files=[...])` now returns a full blast-radius report: transitive affected files, co-change warnings for historical co-change partners not included in the PR, recommended reviewers ranked by temporal ownership, test gap detection, and an overall 0–10 risk score. Same eight tools — substantially more signal per call.
+
+### Knowledge map in `get_overview`
+`get_overview` now surfaces: top owners across the codebase, "bus factor 1" knowledge silos (files where one person owns >80% of commits), and onboarding targets — high-centrality files with the weakest documentation coverage. Useful for team planning and risk review.
+
+### Test gaps and security signals in `get_risk`
+`get_risk` now includes a `test_gap` flag per file (no test file co-changes detected) and `security_signals` — static pattern detection for common risk categories: authentication bypass patterns, `eval`-family calls, raw SQL string construction, and weak cryptography. Signals appear alongside the existing hotspot and ownership data.
+
+### LLM cost tracking
+Every LLM call is logged to a new `llm_costs` table with operation type, model, token counts, and estimated cost. A new `repowise costs` CLI command lets you group spending by operation, model, or day. The indexing progress bar now shows a live `Cost: $X.XXX` counter next to the spinner.
+
+### Configurable dead-code sensitivity
+The `repowise dead-code` command and the `get_dead_code` MCP tool now expose sensitivity controls: `--min-confidence` (default 0.70), `--include-internals` (include private/underscore-prefixed symbols), and `--include-zombie-packages` (packages present in `package.json` / `pyproject.toml` but unused in the graph). Tune the output to your cleanup goals.
+
+---
+
 ## What repowise builds
 
 repowise runs once, builds everything, then keeps it in sync on every commit.
@@ -94,11 +136,11 @@ Most tools are designed around data entities — one module, one file, one symbo
 |---|---|---|
 | `get_overview()` | Architecture summary, module map, entry points | First call on any unfamiliar codebase |
 | `get_context(targets, include?)` | Docs, ownership, decisions, freshness for any targets — files, modules, or symbols | Before reading or modifying code. Pass all relevant targets in one call. |
-| `get_risk(targets)` | Hotspot scores, dependents, co-change partners, plain-English risk summary | Before modifying files — understand what could break |
+| `get_risk(targets?, changed_files?)` | Hotspot scores, dependents, co-change partners, blast radius, recommended reviewers, test gaps, security signals, 0–10 risk score | Before modifying files — understand what could break |
 | `get_why(query?)` | Three modes: NL search over decisions · path-based decisions for a file · no-arg health dashboard | Before architectural changes — understand existing intent |
 | `search_codebase(query)` | Semantic search over the full wiki. Natural language. | When you don't know where something lives |
 | `get_dependency_path(from, to)` | Connection path between two files, modules, or symbols | When tracing how two things are connected |
-| `get_dead_code()` | Unreachable code sorted by confidence and cleanup impact | Cleanup tasks |
+| `get_dead_code(min_confidence?, include_internals?, include_zombie_packages?)` | Unreachable code sorted by confidence and cleanup impact | Cleanup tasks |
 | `get_architecture_diagram(module?)` | Mermaid diagram for the repo or a specific module | Documentation and presentation |
 
 ### Tool call comparison — a real task
@@ -333,9 +375,18 @@ repowise search "<query>"         # semantic search over the wiki
 repowise status                   # coverage, freshness, dead code summary
 
 # Dead code
-repowise dead-code                # full report
-repowise dead-code --safe-only    # only safe-to-delete findings
-repowise dead-code resolve <id>   # mark resolved / false positive
+repowise dead-code                          # full report
+repowise dead-code --safe-only              # only safe-to-delete findings
+repowise dead-code --min-confidence 0.8     # raise the confidence threshold
+repowise dead-code --include-internals      # include private/underscore symbols
+repowise dead-code --include-zombie-packages  # include unused declared packages
+repowise dead-code resolve <id>             # mark resolved / false positive
+
+# Cost tracking
+repowise costs                    # total LLM spend to date
+repowise costs --by operation     # grouped by operation type
+repowise costs --by model         # grouped by model
+repowise costs --by day           # grouped by day
 
 # Decisions
 repowise decision add             # record a decision (interactive)
@@ -348,7 +399,8 @@ repowise generate-claude-md       # regenerate CLAUDE.md
 
 # Utilities
 repowise export [PATH]            # export wiki as markdown files
-repowise doctor                   # check setup, API keys, connectivity
+repowise doctor                   # check setup, API keys, store drift
+repowise doctor --repair          # check and fix detected store mismatches
 repowise reindex                  # rebuild vector store (no LLM calls)
 ```
 
diff --git a/packages/cli/src/repowise/cli/commands/costs_cmd.py b/packages/cli/src/repowise/cli/commands/costs_cmd.py
new file mode 100644
index 0000000..000eb84
--- /dev/null
+++ b/packages/cli/src/repowise/cli/commands/costs_cmd.py
@@ -0,0 +1,157 @@
+"""``repowise costs`` — display LLM cost history from the cost ledger."""
+
+from __future__ import annotations
+
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+import click
+from rich.table import Table
+
+from repowise.cli.helpers import (
+    console,
+    get_db_url_for_repo,
+    resolve_repo_path,
+    run_async,
+)
+
+
+def _parse_date(value: str | None) -> datetime | None:
+    """Parse an ISO date string into a datetime, or return None."""
+    if value is None:
+        return None
+    try:
+        return datetime.fromisoformat(value)
+    except ValueError:
+        try:
+            from dateutil.parser import parse as _parse  # type: ignore[import-untyped]
+
+            return _parse(value)
+        except Exception as exc:
+            raise click.BadParameter(f"Cannot parse date '{value}': {exc}") from exc
+
+
+@click.command("costs")
+@click.argument("path", required=False, default=None)
+@click.option(
+    "--since",
+    default=None,
+    metavar="DATE",
+    help="Only show costs since this date (ISO format, e.g. 2026-01-01).",
+)
+@click.option(
+    "--by",
+    "group_by",
+    type=click.Choice(["operation", "model", "day"]),
+    default="operation",
+    show_default=True,
+    help="Group costs by operation, model, or day.",
+)
+@click.option(
+    "--repo-path",
+    "repo_path_flag",
+    default=None,
+    metavar="PATH",
+    help="Repository path (defaults to current directory).",
+)
+def costs_command(
+    path: str | None,
+    since: str | None,
+    group_by: str,
+    repo_path_flag: str | None,
+) -> None:
+    """Show LLM cost history for a repository.
+
+    PATH (or --repo-path) defaults to the current directory.
+    """
+    # Support both positional PATH and --repo-path flag
+    raw_path = path or repo_path_flag
+    repo_path = resolve_repo_path(raw_path)
+
+    repowise_dir = repo_path / ".repowise"
+    if not repowise_dir.exists():
+        console.print("[yellow]No .repowise/ directory found. Run 'repowise init' first.[/yellow]")
+        return
+
+    since_dt = _parse_date(since)
+
+    rows = run_async(_query_costs(repo_path, since=since_dt, group_by=group_by))
+
+    if not rows:
+        msg = "No cost records found"
+        if since_dt:
+            msg += f" since {since_dt.date()}"
+        msg += ". Run 'repowise init' with an LLM provider to generate costs."
+        console.print(f"[yellow]{msg}[/yellow]")
+        return
+
+    # Build table
+    group_label = group_by.capitalize()
+    table = Table(
+        title=f"LLM Costs — grouped by {group_by}",
+        border_style="dim",
+        show_footer=True,
+    )
+    table.add_column(group_label, style="cyan", footer="[bold]TOTAL[/bold]")
+    table.add_column("Calls", justify="right", footer=str(sum(r["calls"] for r in rows)))
+    table.add_column(
+        "Input Tokens",
+        justify="right",
+        footer=f"{sum(r['input_tokens'] for r in rows):,}",
+    )
+    table.add_column(
+        "Output Tokens",
+        justify="right",
+        footer=f"{sum(r['output_tokens'] for r in rows):,}",
+    )
+    table.add_column(
+        "Cost USD",
+        justify="right",
+        footer=f"[bold green]${sum(r['cost_usd'] for r in rows):.4f}[/bold green]",
+    )
+
+    for row in rows:
+        table.add_row(
+            str(row["group"] or "—"),
+            str(row["calls"]),
+            f"{row['input_tokens']:,}",
+            f"{row['output_tokens']:,}",
+            f"[green]${row['cost_usd']:.4f}[/green]",
+        )
+
+    console.print()
+    console.print(table)
+    console.print()
+
+
+async def _query_costs(
+    repo_path: Path,
+    since: datetime | None,
+    group_by: str,
+) -> list[dict[str, Any]]:
+    """Open the DB, look up the repo, and return aggregated cost rows."""
+    from repowise.core.generation.cost_tracker import CostTracker
+    from repowise.core.persistence import (
+        create_engine,
+        create_session_factory,
+        get_session,
+        init_db,
+    )
+    from repowise.core.persistence.crud import get_repository_by_path
+
+    url = get_db_url_for_repo(repo_path)
+    engine = create_engine(url)
+    await init_db(engine)
+    sf = create_session_factory(engine)
+
+    try:
+        async with get_session(sf) as session:
+            repo = await get_repository_by_path(session, str(repo_path))
+            if repo is None:
+                return []
+
+        tracker = CostTracker(session_factory=sf, repo_id=repo.id)
+        return await tracker.totals(since=since, group_by=group_by)
+    finally:
+        await engine.dispose()
diff --git a/packages/cli/src/repowise/cli/commands/dead_code_cmd.py b/packages/cli/src/repowise/cli/commands/dead_code_cmd.py
index ce4d7fc..e196cfd 100644
--- a/packages/cli/src/repowise/cli/commands/dead_code_cmd.py
+++ b/packages/cli/src/repowise/cli/commands/dead_code_cmd.py
@@ -30,12 +30,40 @@
     type=click.Choice(["table", "json", "md"]),
     help="Output format.",
 )
+@click.option(
+    "--include-internals/--no-include-internals",
+    default=False,
+    help="Detect unused private/internal symbols (higher false-positive rate, off by default).",
+)
+@click.option(
+    "--include-zombie-packages/--no-include-zombie-packages",
+    default=True,
+    help="Detect monorepo packages with no external importers (on by default).",
+)
+@click.option(
+    "--no-unreachable",
+    "no_unreachable",
+    is_flag=True,
+    default=False,
+    help="Skip detection of unreachable files (in_degree=0).",
+)
+@click.option(
+    "--no-unused-exports",
+    "no_unused_exports",
+    is_flag=True,
+    default=False,
+    help="Skip detection of unused public exports.",
+)
 def dead_code_command(
     path: str | None,
     min_confidence: float,
     safe_only: bool,
     kind: str | None,
     fmt: str,
+    include_internals: bool,
+    include_zombie_packages: bool,
+    no_unreachable: bool,
+    no_unused_exports: bool,
 ) -> None:
     """Detect dead and unused code."""
     from pathlib import Path as PathlibPath
@@ -74,9 +102,15 @@ def dead_code_command(
         pass
 
     # Analyze
-    config = {"min_confidence": min_confidence}
+    config: dict = {
+        "min_confidence": min_confidence,
+        "detect_unused_internals": include_internals,
+        "detect_zombie_packages": include_zombie_packages,
+        "detect_unreachable_files": not no_unreachable,
+        "detect_unused_exports": not no_unused_exports,
+    }
     if kind:
-        # Enable only the requested kind
+        # --kind overrides the individual detection flags to focus on one type
         config["detect_unreachable_files"] = kind == "unreachable_file"
         config["detect_unused_exports"] = kind == "unused_export"
         config["detect_unused_internals"] = kind == "unused_internal"
diff --git a/packages/cli/src/repowise/cli/commands/doctor_cmd.py b/packages/cli/src/repowise/cli/commands/doctor_cmd.py
index 0416eb6..bf25b01 100644
--- a/packages/cli/src/repowise/cli/commands/doctor_cmd.py
+++ b/packages/cli/src/repowise/cli/commands/doctor_cmd.py
@@ -226,6 +226,77 @@ async def _check_stores():
         except Exception:
             checks.append(_check("Store consistency", True, "Could not check"))
 
+    # 10. AtomicStorageCoordinator drift check
+    coord_drift: float | None = None
+    coord_sql_pages: int | None = None
+    coord_vector_count: int | None = None
+    coord_graph_nodes: int | None = None
+    if db_ok:
+        try:
+
+            async def _check_coordinator():
+                from repowise.core.persistence import (
+                    create_engine,
+                    create_session_factory,
+                    get_session,
+                )
+                from repowise.core.persistence.coordinator import AtomicStorageCoordinator
+                from repowise.core.persistence.vector_store import LanceDBVectorStore
+                from repowise.core.providers.embedding.base import MockEmbedder
+
+                url = get_db_url_for_repo(repo_path)
+                engine = create_engine(url)
+                sf = create_session_factory(engine)
+
+                vector_store = None
+                lance_dir = repowise_dir / "lancedb"
+                if lance_dir.exists():
+                    try:
+                        embedder = MockEmbedder()
+                        vector_store = LanceDBVectorStore(str(lance_dir), embedder=embedder)
+                    except Exception:
+                        pass
+
+                async with get_session(sf) as session:
+                    coord = AtomicStorageCoordinator(
+                        session, graph_builder=None, vector_store=vector_store
+                    )
+                    result = await coord.health_check()
+
+                if vector_store is not None:
+                    try:
+                        await vector_store.close()
+                    except Exception:
+                        pass
+                await engine.dispose()
+                return result
+
+            coord_result = run_async(_check_coordinator())
+            coord_sql_pages = coord_result.get("sql_pages")
+            coord_vector_count = coord_result.get("vector_count")
+            coord_graph_nodes = coord_result.get("graph_nodes")
+            coord_drift = coord_result.get("drift")
+
+            drift_pct = f"{coord_drift * 100:.1f}%" if coord_drift is not None else "N/A"
+            if coord_drift is None:
+                drift_color = "white"
+            elif coord_drift < 0.05:
+                drift_color = "green"
+            elif coord_drift < 0.15:
+                drift_color = "yellow"
+            else:
+                drift_color = "red"
+
+            vec_display = str(coord_vector_count) if coord_vector_count != -1 and coord_vector_count is not None else "unknown"
+            drift_detail = (
+                f"SQL={coord_sql_pages}, Vector={vec_display}, "
+                f"Drift=[{drift_color}]{drift_pct}[/{drift_color}]"
+            )
+            coord_ok = coord_drift is None or coord_drift < 0.05
+            checks.append(_check("Coordinator drift", coord_ok, drift_detail))
+        except Exception as exc:
+            checks.append(_check("Coordinator drift", True, f"Could not check: {exc}"))
+
     # Display
     table = Table(title="repowise Doctor")
     table.add_column("Check", style="cyan")
diff --git a/packages/cli/src/repowise/cli/commands/init_cmd.py b/packages/cli/src/repowise/cli/commands/init_cmd.py
index ba706e8..1ba9bec 100644
--- a/packages/cli/src/repowise/cli/commands/init_cmd.py
+++ b/packages/cli/src/repowise/cli/commands/init_cmd.py
@@ -411,6 +411,7 @@ def init_command(
         BarColumn(),
         TextColumn("{task.completed}/{task.total}"),
         TimeElapsedColumn(),
+        TextColumn("[green]${task.fields[cost]:.3f}[/green]"),
         console=console,
     ) as progress_bar:
         callback = RichProgressCallback(progress_bar, console)
@@ -520,10 +521,51 @@ def init_command(
             BarColumn(),
             TextColumn("{task.completed}/{task.total}"),
             TimeElapsedColumn(),
+            TextColumn("[green]${task.fields[cost]:.3f}[/green]"),
             console=console,
         ) as gen_progress:
             gen_callback = RichProgressCallback(gen_progress, console)
 
+            # Construct a CostTracker backed by the real DB so every LLM call
+            # is persisted to the llm_costs table.  We need the repo_id from the
+            # database row that was created/upserted during _persist_result
+            # (which has not run yet), so we look it up or fall back to in-memory.
+            from repowise.core.generation.cost_tracker import CostTracker
+            from repowise.cli.helpers import get_db_url_for_repo
+            from repowise.core.persistence import (
+                create_engine as _create_engine,
+                create_session_factory as _create_sf,
+                get_session as _get_session,
+                init_db as _init_db,
+                upsert_repository as _upsert_repo,
+            )
+
+            async def _make_cost_tracker() -> CostTracker:
+                url = get_db_url_for_repo(repo_path)
+                engine = _create_engine(url)
+                await _init_db(engine)
+                sf = _create_sf(engine)
+                async with _get_session(sf) as _sess:
+                    _repo = await _upsert_repo(
+                        _sess,
+                        name=result.repo_name,
+                        local_path=str(repo_path),
+                    )
+                    _repo_id = _repo.id
+                # Keep engine alive for the duration of generation — it will be
+                # disposed by _persist_result's own engine later.
+                return CostTracker(session_factory=sf, repo_id=_repo_id)
+
+            try:
+                cost_tracker = run_async(_make_cost_tracker())
+            except Exception:
+                # Fallback to in-memory tracker if DB setup fails
+                cost_tracker = CostTracker()
+
+            # Attach tracker to provider unconditionally (all providers now
+            # accept _cost_tracker as an attribute)
+            provider._cost_tracker = cost_tracker
+
             generated_pages = run_async(
                 run_generation(
                     repo_path=repo_path,
@@ -538,6 +580,7 @@ def init_command(
                     concurrency=concurrency,
                     progress=gen_callback,
                     resume=resume,
+                    cost_tracker=cost_tracker,
                 )
             )
 
diff --git a/packages/cli/src/repowise/cli/main.py b/packages/cli/src/repowise/cli/main.py
index 02dd069..7fa3650 100644
--- a/packages/cli/src/repowise/cli/main.py
+++ b/packages/cli/src/repowise/cli/main.py
@@ -6,6 +6,7 @@
 
 from repowise.cli import __version__
 from repowise.cli.commands.claude_md_cmd import claude_md_command
+from repowise.cli.commands.costs_cmd import costs_command
 from repowise.cli.commands.dead_code_cmd import dead_code_command
 from repowise.cli.commands.decision_cmd import decision_group
 from repowise.cli.commands.doctor_cmd import doctor_command
@@ -28,6 +29,7 @@ def cli() -> None:
 
 cli.add_command(init_command)
 cli.add_command(claude_md_command)
+cli.add_command(costs_command)
 cli.add_command(update_command)
 cli.add_command(dead_code_command)
 cli.add_command(decision_group)
diff --git a/packages/cli/src/repowise/cli/ui.py b/packages/cli/src/repowise/cli/ui.py
index 3c500d1..633c052 100644
--- a/packages/cli/src/repowise/cli/ui.py
+++ b/packages/cli/src/repowise/cli/ui.py
@@ -569,7 +569,9 @@ def on_phase_start(self, phase: str, total: int | None) -> None:
         if phase in self._tasks:
             self._progress.update(self._tasks[phase], total=total, visible=True)
         else:
-            self._tasks[phase] = self._progress.add_task(label, total=total, visible=True)
+            self._tasks[phase] = self._progress.add_task(
+                label, total=total, visible=True, cost=0.0
+            )
 
     def on_item_done(self, phase: str) -> None:
         if phase in self._tasks:
@@ -582,3 +584,11 @@ def on_message(self, level: str, text: str) -> None:
             self._progress.console.print(f"  [{style}]{text}[/{style}]")
         else:
             self._progress.console.print(f"  {text}")
+
+    def set_cost(self, total_cost: float) -> None:
+        """Update the live cost display on all active progress tasks."""
+        for task_id in self._tasks.values():
+            try:
+                self._progress.update(task_id, cost=total_cost)
+            except Exception:
+                pass
diff --git a/packages/core/alembic/versions/0009_llm_costs.py b/packages/core/alembic/versions/0009_llm_costs.py
new file mode 100644
index 0000000..328ec5d
--- /dev/null
+++ b/packages/core/alembic/versions/0009_llm_costs.py
@@ -0,0 +1,54 @@
+"""Add llm_costs table for runtime LLM cost tracking.
+
+Revision ID: 0009
+Revises: 0008
+Create Date: 2026-04-07
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers
+revision: str = "0009"
+down_revision: str | None = "0008"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "llm_costs",
+        sa.Column("id", sa.Integer, primary_key=True, autoincrement=True),
+        sa.Column(
+            "repository_id",
+            sa.String(32),
+            sa.ForeignKey("repositories.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column(
+            "ts",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.text("CURRENT_TIMESTAMP"),
+        ),
+        sa.Column("model", sa.String(100), nullable=False),
+        sa.Column("operation", sa.String(50), nullable=False),
+        sa.Column("input_tokens", sa.Integer, nullable=False),
+        sa.Column("output_tokens", sa.Integer, nullable=False),
+        sa.Column("cost_usd", sa.Float, nullable=False),
+        sa.Column("file_path", sa.String(1024), nullable=True),
+    )
+    op.create_index(
+        "ix_llm_costs_repository_ts",
+        "llm_costs",
+        ["repository_id", "ts"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_llm_costs_repository_ts", table_name="llm_costs")
+    op.drop_table("llm_costs")
diff --git a/packages/core/alembic/versions/0010_temporal_hotspot_score.py b/packages/core/alembic/versions/0010_temporal_hotspot_score.py
new file mode 100644
index 0000000..b2ce829
--- /dev/null
+++ b/packages/core/alembic/versions/0010_temporal_hotspot_score.py
@@ -0,0 +1,38 @@
+"""Add temporal_hotspot_score column to git_metadata.
+
+Stores an exponentially time-decayed churn score used as the primary
+signal for hotspot percentile ranking (PERCENT_RANK window function).
+
+Revision ID: 0010
+Revises: 0009
+Create Date: 2026-04-07
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers
+revision: str = "0010"
+down_revision: str | None = "0009"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "git_metadata",
+        sa.Column(
+            "temporal_hotspot_score",
+            sa.Float,
+            nullable=True,
+            server_default="0.0",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("git_metadata", "temporal_hotspot_score")
diff --git a/packages/core/alembic/versions/0011_security_findings.py b/packages/core/alembic/versions/0011_security_findings.py
new file mode 100644
index 0000000..86e01f6
--- /dev/null
+++ b/packages/core/alembic/versions/0011_security_findings.py
@@ -0,0 +1,51 @@
+"""Add security_findings table.
+
+Stores lightweight security signals detected during file ingestion,
+including eval/exec calls, hardcoded secrets, raw SQL, weak hashes, etc.
+
+Revision ID: 0011
+Revises: 0010
+Create Date: 2026-04-07
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers
+revision: str = "0011"
+down_revision: str | None = "0010"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "security_findings",
+        sa.Column("id", sa.Integer, primary_key=True, autoincrement=True),
+        sa.Column("repository_id", sa.String(32), nullable=False),
+        sa.Column("file_path", sa.String(1024), nullable=False),
+        sa.Column("kind", sa.String(100), nullable=False),
+        sa.Column("severity", sa.String(20), nullable=False),
+        sa.Column("snippet", sa.Text, nullable=True),
+        sa.Column("line_number", sa.Integer, nullable=True),
+        sa.Column(
+            "detected_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+    )
+    op.create_index(
+        "ix_security_findings_repo_file",
+        "security_findings",
+        ["repository_id", "file_path"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_security_findings_repo_file", table_name="security_findings")
+    op.drop_table("security_findings")
diff --git a/packages/core/src/repowise/core/analysis/pr_blast.py b/packages/core/src/repowise/core/analysis/pr_blast.py
new file mode 100644
index 0000000..3404714
--- /dev/null
+++ b/packages/core/src/repowise/core/analysis/pr_blast.py
@@ -0,0 +1,282 @@
+"""PR blast radius analyzer.
+
+Given a set of changed files, computes:
+  - Direct risk per file (hotspot * centrality)
+  - Transitive affected files (graph ancestors up to max_depth)
+  - Co-change warnings (historical co-change partners NOT in the PR)
+  - Recommended reviewers (top owners of affected files)
+  - Test gaps (affected files without a corresponding test file)
+  - Overall risk score (0-10)
+
+Reuses existing data: graph_nodes/graph_edges (SQL), git_metadata, and the
+co_change_partners_json field stored in git_metadata rows.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from collections import defaultdict
+from typing import Any
+
+from sqlalchemy import select, text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from repowise.core.persistence.models import GitMetadata, GraphNode
+
+
+class PRBlastRadiusAnalyzer:
+    """Compute blast radius for a proposed PR given its changed files."""
+
+    def __init__(self, session: AsyncSession, repo_id: str) -> None:
+        self._session = session
+        self._repo_id = repo_id
+
+    async def analyze_files(
+        self,
+        changed_files: list[str],
+        max_depth: int = 3,
+    ) -> dict:
+        """Return full blast-radius analysis for the given changed files.
+
+        Parameters
+        ----------
+        changed_files:
+            Relative file paths that are modified in the PR.
+        max_depth:
+            Maximum BFS depth for transitive ancestor lookup.
+        """
+        changed_set = set(changed_files)
+
+        # 1. Per-file direct risk
+        direct_risks = await self._score_files(changed_files)
+
+        # 2. Transitive affected files
+        transitive_affected = await self._transitive_affected(changed_files, max_depth)
+        all_affected_paths = list(changed_set | {e["path"] for e in transitive_affected})
+
+        # 3. Co-change warnings
+        cochange_warnings = await self._cochange_warnings(changed_files, changed_set)
+
+        # 4. Recommended reviewers (over all affected files)
+        recommended_reviewers = await self._recommend_reviewers(all_affected_paths)
+
+        # 5. Test gaps
+        test_gaps = await self._find_test_gaps(all_affected_paths)
+
+        # 6. Overall risk score (0-10)
+        overall_risk_score = self._compute_overall_risk(direct_risks, transitive_affected)
+
+        return {
+            "direct_risks": direct_risks,
+            "transitive_affected": transitive_affected,
+            "cochange_warnings": cochange_warnings,
+            "recommended_reviewers": recommended_reviewers,
+            "test_gaps": test_gaps,
+            "overall_risk_score": overall_risk_score,
+        }
+
+    # ------------------------------------------------------------------
+    # Private helpers
+    # ------------------------------------------------------------------
+
+    async def _score_files(self, paths: list[str]) -> list[dict]:
+        """Return direct risk records for each changed file."""
+        if not paths:
+            return []
+
+        # Fetch git_metadata for all paths in one query
+        res = await self._session.execute(
+            select(GitMetadata).where(
+                GitMetadata.repository_id == self._repo_id,
+                GitMetadata.file_path.in_(paths),
+            )
+        )
+        meta_by_path: dict[str, Any] = {m.file_path: m for m in res.scalars().all()}
+
+        # Fetch graph node pagerank (used as centrality proxy)
+        node_res = await self._session.execute(
+            select(GraphNode).where(
+                GraphNode.repository_id == self._repo_id,
+                GraphNode.node_id.in_(paths),
+            )
+        )
+        node_by_path: dict[str, Any] = {n.node_id: n for n in node_res.scalars().all()}
+
+        results = []
+        for path in paths:
+            meta = meta_by_path.get(path)
+            node = node_by_path.get(path)
+            temporal = float(getattr(meta, "temporal_hotspot_score", 0.0) or 0.0)
+            centrality = float(getattr(node, "pagerank", 0.0) or 0.0)
+            risk_score = self._score_file(temporal, centrality)
+            results.append(
+                {
+                    "path": path,
+                    "risk_score": round(risk_score, 4),
+                    "temporal_hotspot": round(temporal, 4),
+                    "centrality": round(centrality, 6),
+                }
+            )
+
+        results.sort(key=lambda x: -x["risk_score"])
+        return results
+
+    @staticmethod
+    def _score_file(temporal_hotspot_score: float, centrality: float) -> float:
+        """Compute file-level risk: centrality * (1 + temporal_hotspot_score)."""
+        return centrality * (1.0 + temporal_hotspot_score)
+
+    async def _transitive_affected(
+        self, changed_files: list[str], max_depth: int
+    ) -> list[dict]:
+        """BFS over reverse graph edges (source_node_id -> target_node_id direction).
+
+        We want files that *import* the changed files (i.e. are affected when a
+        changed file changes).  In graph_edges, an edge means
+        ``source imports target``, so we look for rows where
+        ``target_node_id IN (frontier)`` and collect the ``source_node_id``
+        values — those are the files that depend on our changed set.
+        """
+        visited: dict[str, int] = {}  # path -> depth at which it was first reached
+        frontier = list(set(changed_files))
+
+        for depth in range(1, max_depth + 1):
+            if not frontier:
+                break
+            # SQLite / SQLAlchemy compatible IN query via text()
+            placeholders = ",".join(f":p{i}" for i in range(len(frontier)))
+            params: dict[str, Any] = {"repo_id": self._repo_id}
+            params.update({f"p{i}": v for i, v in enumerate(frontier)})
+            rows = await self._session.execute(
+                text(
+                    f"SELECT DISTINCT source_node_id FROM graph_edges "
+                    f"WHERE repository_id = :repo_id "
+                    f"AND target_node_id IN ({placeholders})"
+                ),
+                params,
+            )
+            next_frontier = []
+            for (src,) in rows:
+                if src not in visited and src not in set(changed_files):
+                    visited[src] = depth
+                    next_frontier.append(src)
+            frontier = next_frontier
+
+        return [{"path": p, "depth": d} for p, d in sorted(visited.items(), key=lambda x: x[1])]
+
+    async def _cochange_warnings(
+        self, changed_files: list[str], changed_set: set[str]
+    ) -> list[dict]:
+        """Return co-change partners of changed files that are NOT in the PR."""
+        if not changed_files:
+            return []
+
+        res = await self._session.execute(
+            select(GitMetadata).where(
+                GitMetadata.repository_id == self._repo_id,
+                GitMetadata.file_path.in_(changed_files),
+            )
+        )
+
+        warnings = []
+        for meta in res.scalars().all():
+            partners = json.loads(meta.co_change_partners_json or "[]")
+            for partner in partners:
+                partner_path = partner.get("file_path") or partner.get("path") or ""
+                score = float(partner.get("co_change_count") or partner.get("count") or 0)
+                if partner_path and partner_path not in changed_set:
+                    warnings.append(
+                        {
+                            "changed": meta.file_path,
+                            "missing_partner": partner_path,
+                            "score": score,
+                        }
+                    )
+
+        warnings.sort(key=lambda x: -x["score"])
+        return warnings
+
+    async def _recommend_reviewers(self, affected_files: list[str]) -> list[dict]:
+        """Aggregate top owners of affected files; return top 5."""
+        if not affected_files:
+            return []
+
+        res = await self._session.execute(
+            select(GitMetadata).where(
+                GitMetadata.repository_id == self._repo_id,
+                GitMetadata.file_path.in_(affected_files),
+            )
+        )
+
+        owner_files: dict[str, list[float]] = defaultdict(list)
+        for meta in res.scalars().all():
+            email = meta.primary_owner_email or ""
+            pct = float(meta.primary_owner_commit_pct or 0.0)
+            if email:
+                owner_files[email].append(pct)
+
+        reviewers = [
+            {
+                "email": email,
+                "files": len(pcts),
+                "ownership_pct": round(sum(pcts) / len(pcts), 3) if pcts else 0.0,
+            }
+            for email, pcts in owner_files.items()
+        ]
+        reviewers.sort(key=lambda x: (-x["files"], -x["ownership_pct"]))
+        return reviewers[:5]
+
+    async def _find_test_gaps(self, affected_files: list[str]) -> list[str]:
+        """Return files that lack a corresponding test file.
+
+        Checks graph_nodes for paths matching test_<name>, <name>_test, or
+        <name>.spec.* patterns.
+        """
+        if not affected_files:
+            return []
+
+        node_res = await self._session.execute(
+            select(GraphNode.node_id).where(
+                GraphNode.repository_id == self._repo_id,
+                GraphNode.is_test == True,  # noqa: E712
+            )
+        )
+        test_paths = {row[0] for row in node_res.all()}
+
+        gaps = []
+        for path in affected_files:
+            base = os.path.splitext(os.path.basename(path))[0]
+            ext = os.path.splitext(path)[1].lstrip(".")
+            has_test = any(
+                (
+                    f"test_{base}" in tp
+                    or f"{base}_test" in tp
+                    or f"{base}.spec.{ext}" in tp
+                    or f"{base}.spec." in tp
+                )
+                for tp in test_paths
+            )
+            if not has_test:
+                gaps.append(path)
+
+        return gaps
+
+    @staticmethod
+    def _compute_overall_risk(
+        direct_risks: list[dict],
+        transitive_affected: list[dict],
+    ) -> float:
+        """Compute overall risk score on 0-10 scale."""
+        if not direct_risks:
+            return 0.0
+
+        avg_direct = sum(r["risk_score"] for r in direct_risks) / len(direct_risks)
+        max_direct = max(r["risk_score"] for r in direct_risks)
+        breadth_bonus = min(len(transitive_affected) / 20.0, 1.0)  # 0-1
+
+        # Weighted: 40% avg, 40% max, 20% breadth — scaled to 10
+        raw = (0.4 * avg_direct + 0.4 * max_direct + 0.2 * breadth_bonus)
+        # Normalise pagerank-based scores (typically << 1) to 0-10
+        score = min(raw * 100.0, 10.0)
+        return round(score, 2)
diff --git a/packages/core/src/repowise/core/analysis/security_scan.py b/packages/core/src/repowise/core/analysis/security_scan.py
new file mode 100644
index 0000000..d9abc66
--- /dev/null
+++ b/packages/core/src/repowise/core/analysis/security_scan.py
@@ -0,0 +1,128 @@
+"""Lightweight security signal extractor.
+
+Scans indexed symbols and source for keyword/regex patterns that indicate
+authentication, secret handling, raw SQL, dangerous deserialization, etc.
+
+Stores findings in the security_findings table (see migration 0011).
+"""
+
+from __future__ import annotations
+
+import re
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy.ext.asyncio import AsyncSession
+
+# ---------------------------------------------------------------------------
+# Pattern registry: (compiled_pattern, kind_label, severity)
+# ---------------------------------------------------------------------------
+_PATTERNS: list[tuple[re.Pattern, str, str]] = [
+    (re.compile(r"eval\s*\("), "eval_call", "high"),
+    (re.compile(r"exec\s*\("), "exec_call", "high"),
+    (re.compile(r"pickle\.loads"), "pickle_loads", "high"),
+    (re.compile(r"subprocess\..*shell\s*=\s*True"), "subprocess_shell_true", "high"),
+    (re.compile(r"os\.system"), "os_system", "high"),
+    (re.compile(r"password\s*=\s*['\"]"), "hardcoded_password", "high"),
+    (re.compile(r"(?:api_?key|secret)\s*=\s*['\"]"), "hardcoded_secret", "high"),
+    (re.compile(r'f[\'"].*SELECT.*\{.*\}'), "fstring_sql", "med"),
+    (re.compile(r'\.execute\(\s*[\'\"]\s*SELECT.*\+'), "concat_sql", "med"),
+    (re.compile(r"verify\s*=\s*False"), "tls_verify_false", "med"),
+    (re.compile(r"\bmd5\b|\bsha1\b"), "weak_hash", "low"),
+]
+
+# Symbol names that are informational security hotspots
+_SYMBOL_KEYWORDS = re.compile(
+    r"\b(auth|token|password|jwt|session|crypto)\b", re.IGNORECASE
+)
+
+
+class SecurityScanner:
+    """Scan a single file for security signals and persist to the database."""
+
+    def __init__(self, session: AsyncSession, repo_id: str) -> None:
+        self._session = session
+        self._repo_id = repo_id
+
+    async def scan_file(
+        self,
+        file_path: str,
+        source: str,
+        symbols: list[Any],
+    ) -> list[dict]:
+        """Scan *source* text and symbol names; return list of finding dicts.
+
+        Parameters
+        ----------
+        file_path:
+            Relative path of the file (for reference only; not used in scan).
+        source:
+            Full text content of the file.
+        symbols:
+            List of symbol objects that have a ``name`` attribute (or similar).
+        """
+        findings: list[dict] = []
+        lines = source.splitlines()
+
+        # Line-by-line pattern scan
+        for lineno, line in enumerate(lines, start=1):
+            for pattern, kind, severity in _PATTERNS:
+                if pattern.search(line):
+                    # Trim snippet to keep it concise
+                    snippet = line.strip()[:120]
+                    findings.append(
+                        {
+                            "kind": kind,
+                            "severity": severity,
+                            "snippet": snippet,
+                            "line": lineno,
+                        }
+                    )
+
+        # Symbol-name scan (informational / low)
+        for sym in symbols:
+            name = getattr(sym, "name", "") or getattr(sym, "qualified_name", "") or ""
+            if name and _SYMBOL_KEYWORDS.search(name):
+                findings.append(
+                    {
+                        "kind": "security_sensitive_symbol",
+                        "severity": "low",
+                        "snippet": name,
+                        "line": getattr(sym, "start_line", 0) or 0,
+                    }
+                )
+
+        return findings
+
+    async def persist(self, file_path: str, findings: list[dict]) -> None:
+        """Insert security findings into the security_findings table.
+
+        Uses raw INSERT to stay independent of any ORM session state.
+        Silently skips if the table doesn't exist yet (pre-migration).
+        """
+        from sqlalchemy import text
+
+        if not findings:
+            return
+
+        now = datetime.now(UTC)
+        for finding in findings:
+            try:
+                await self._session.execute(
+                    text(
+                        "INSERT INTO security_findings "
+                        "(repository_id, file_path, kind, severity, snippet, line_number, detected_at) "
+                        "VALUES (:repo_id, :file_path, :kind, :severity, :snippet, :line, :detected_at)"
+                    ),
+                    {
+                        "repo_id": self._repo_id,
+                        "file_path": file_path,
+                        "kind": finding["kind"],
+                        "severity": finding["severity"],
+                        "snippet": finding.get("snippet", ""),
+                        "line": finding.get("line", 0),
+                        "detected_at": now,
+                    },
+                )
+            except Exception:  # noqa: BLE001 — table may not exist pre-migration
+                break
diff --git a/packages/core/src/repowise/core/generation/cost_tracker.py b/packages/core/src/repowise/core/generation/cost_tracker.py
new file mode 100644
index 0000000..542c131
--- /dev/null
+++ b/packages/core/src/repowise/core/generation/cost_tracker.py
@@ -0,0 +1,264 @@
+"""Runtime LLM cost tracking for repowise.
+
+Tracks token usage and cost per session, and optionally persists rows to
+the ``llm_costs`` table for historical reporting via ``repowise costs``.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from typing import Any
+
+import structlog
+
+log = structlog.get_logger(__name__)
+
+# ---------------------------------------------------------------------------
+# Pricing table — USD per 1 million tokens
+# ---------------------------------------------------------------------------
+
+_PRICING: dict[str, dict[str, float]] = {
+    # Anthropic
+    "claude-opus-4-6": {"input": 15.0, "output": 75.0},
+    "claude-sonnet-4-6": {"input": 3.0, "output": 15.0},
+    "claude-haiku-4-5": {"input": 0.8, "output": 4.0},
+    "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0},
+    # OpenAI
+    "gpt-4o": {"input": 2.5, "output": 10.0},
+    "gpt-4o-mini": {"input": 0.15, "output": 0.6},
+    # Google Gemini
+    "gemini-2.0-flash": {"input": 0.075, "output": 0.3},
+    "gemini-2.5-flash": {"input": 0.30, "output": 2.50},
+    "gemini-2.5-pro": {"input": 1.25, "output": 10.0},
+    "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
+    "gemini-1.5-flash-lite": {"input": 0.075, "output": 0.30},
+    "gemini-1.5-pro": {"input": 1.25, "output": 5.0},
+    # Gemini preview / experimental models
+    "gemini-3.1-flash-lite-preview": {"input": 0.075, "output": 0.30},
+    "gemini-3-flash-preview": {"input": 0.075, "output": 0.30},
+}
+
+_FALLBACK_PRICING: dict[str, float] = {"input": 3.0, "output": 15.0}
+
+# Track which unknown models we've already warned about (per-process)
+_warned_models: set[str] = set()
+
+
+def _get_pricing(model: str) -> dict[str, float]:
+    """Return pricing for *model*, falling back and warning if unknown."""
+    if model in _PRICING:
+        return _PRICING[model]
+    if model not in _warned_models:
+        log.warning("cost_tracker.unknown_model", model=model, fallback=_FALLBACK_PRICING)
+        _warned_models.add(model)
+    return _FALLBACK_PRICING
+
+
+# ---------------------------------------------------------------------------
+# CostTracker
+# ---------------------------------------------------------------------------
+
+
+class CostTracker:
+    """Tracks LLM token usage and cost for a session.
+
+    Optionally persists each call to the ``llm_costs`` table when a
+    *session_factory* (async SQLAlchemy sessionmaker) is supplied.
+
+    Parameters
+    ----------
+    session_factory:
+        Async SQLAlchemy sessionmaker. When ``None``, only in-memory
+        tracking is performed.
+    repo_id:
+        Repository primary key to associate with persisted rows.
+    """
+
+    def __init__(
+        self,
+        session_factory: Any | None = None,
+        repo_id: str | None = None,
+    ) -> None:
+        self._session_factory = session_factory
+        self._repo_id = repo_id
+        self._session_cost: float = 0.0
+        self._session_tokens: int = 0
+
+    # ------------------------------------------------------------------
+    # Properties
+    # ------------------------------------------------------------------
+
+    @property
+    def session_cost(self) -> float:
+        """Cumulative USD cost for this tracker instance."""
+        return self._session_cost
+
+    @property
+    def session_tokens(self) -> int:
+        """Cumulative tokens (input + output) for this tracker instance."""
+        return self._session_tokens
+
+    # ------------------------------------------------------------------
+    # Recording
+    # ------------------------------------------------------------------
+
+    async def record(
+        self,
+        model: str,
+        input_tokens: int,
+        output_tokens: int,
+        operation: str,
+        file_path: str | None = None,
+    ) -> float:
+        """Record a single LLM call and return its cost in USD.
+
+        Parameters
+        ----------
+        model:
+            Model identifier, e.g. ``"claude-sonnet-4-6"``.
+        input_tokens:
+            Number of input/prompt tokens consumed.
+        output_tokens:
+            Number of output/completion tokens consumed.
+        operation:
+            Logical operation label, e.g. ``"doc_generation"`` or
+            ``"embedding"``.
+        file_path:
+            Source file being processed, if available.
+
+        Returns
+        -------
+        float
+            Cost in USD for this call.
+        """
+        pricing = _get_pricing(model)
+        cost = (input_tokens * pricing["input"] + output_tokens * pricing["output"]) / 1_000_000
+
+        self._session_cost += cost
+        self._session_tokens += input_tokens + output_tokens
+
+        log.debug(
+            "cost_tracker.record",
+            model=model,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            cost_usd=cost,
+            operation=operation,
+            file_path=file_path,
+        )
+
+        if self._session_factory is not None and self._repo_id is not None:
+            await self._persist(
+                model=model,
+                input_tokens=input_tokens,
+                output_tokens=output_tokens,
+                cost_usd=cost,
+                operation=operation,
+                file_path=file_path,
+            )
+
+        return cost
+
+    async def _persist(
+        self,
+        *,
+        model: str,
+        input_tokens: int,
+        output_tokens: int,
+        cost_usd: float,
+        operation: str,
+        file_path: str | None,
+    ) -> None:
+        """Write a row to the ``llm_costs`` table."""
+        try:
+            from repowise.core.persistence.models import LlmCost
+            from repowise.core.persistence import get_session
+
+            async with get_session(self._session_factory) as session:
+                row = LlmCost(
+                    repository_id=self._repo_id,
+                    model=model,
+                    operation=operation,
+                    input_tokens=input_tokens,
+                    output_tokens=output_tokens,
+                    cost_usd=cost_usd,
+                    file_path=file_path,
+                )
+                session.add(row)
+                await session.commit()
+        except Exception as exc:
+            log.warning("cost_tracker.persist_failed", error=str(exc))
+
+    # ------------------------------------------------------------------
+    # Querying
+    # ------------------------------------------------------------------
+
+    async def totals(
+        self,
+        since: datetime | None = None,
+        group_by: str = "operation",
+    ) -> list[dict]:
+        """Query aggregate cost totals from the database.
+
+        Parameters
+        ----------
+        since:
+            Only include rows whose ``ts`` is on or after this datetime.
+        group_by:
+            Grouping dimension: ``"operation"``, ``"model"``, or ``"day"``.
+
+        Returns
+        -------
+        list[dict]
+            Each dict has keys: ``group``, ``calls``, ``input_tokens``,
+            ``output_tokens``, ``cost_usd``.
+        """
+        if self._session_factory is None or self._repo_id is None:
+            return []
+
+        try:
+            import sqlalchemy as sa
+            from repowise.core.persistence.models import LlmCost
+            from repowise.core.persistence import get_session
+
+            async with get_session(self._session_factory) as session:
+                if group_by == "model":
+                    group_col = LlmCost.model
+                elif group_by == "day":
+                    # SQLite strftime; works for Postgres too with cast
+                    group_col = sa.func.strftime("%Y-%m-%d", LlmCost.ts)
+                else:
+                    group_col = LlmCost.operation
+
+                stmt = (
+                    sa.select(
+                        group_col.label("group"),
+                        sa.func.count().label("calls"),
+                        sa.func.sum(LlmCost.input_tokens).label("input_tokens"),
+                        sa.func.sum(LlmCost.output_tokens).label("output_tokens"),
+                        sa.func.sum(LlmCost.cost_usd).label("cost_usd"),
+                    )
+                    .where(LlmCost.repository_id == self._repo_id)
+                    .group_by(group_col)
+                    .order_by(sa.func.sum(LlmCost.cost_usd).desc())
+                )
+
+                if since is not None:
+                    stmt = stmt.where(LlmCost.ts >= since)
+
+                result = await session.execute(stmt)
+                rows = result.fetchall()
+
+            return [
+                {
+                    "group": row.group,
+                    "calls": row.calls,
+                    "input_tokens": row.input_tokens or 0,
+                    "output_tokens": row.output_tokens or 0,
+                    "cost_usd": row.cost_usd or 0.0,
+                }
+                for row in rows
+            ]
+        except Exception as exc:
+            log.warning("cost_tracker.totals_failed", error=str(exc))
+            return []
diff --git a/packages/core/src/repowise/core/generation/page_generator.py b/packages/core/src/repowise/core/generation/page_generator.py
index 84843cb..4f0c9d3 100644
--- a/packages/core/src/repowise/core/generation/page_generator.py
+++ b/packages/core/src/repowise/core/generation/page_generator.py
@@ -482,6 +482,7 @@ async def guarded_named(page_id: str, coro: Any) -> Any:
                         # Embed page for RAG (B1)
                         if self._vector_store is not None and isinstance(result, GeneratedPage):
                             try:
+                                page_summary = _extract_summary(result.content)
                                 await self._vector_store.embed_and_upsert(
                                     result.page_id,
                                     result.content,
@@ -489,6 +490,7 @@ async def guarded_named(page_id: str, coro: Any) -> Any:
                                         "page_type": result.page_type,
                                         "target_path": result.target_path,
                                         "content": result.content[:600],
+                                        "summary": page_summary,
                                     },
                                 )
                             except Exception as e:
@@ -668,10 +670,70 @@ async def guarded_named(page_id: str, coro: Any) -> Any:
         # Context is assembled for ALL code files (module pages need it).
         # Pages are generated only for files that cross the significance bar.
         # page_summaries from level 0+1 are available here (B2).
+        #
+        # Topo-sort: process leaves (no internal out-edges) before roots so that
+        # dependency summaries are available when assembling dependents' contexts.
+        # Falls back to existing priority order if networkx is unavailable or graph
+        # has cycles.
+        code_file_paths = [p.file_info.path for p in code_files]
+        try:
+            import networkx as nx  # type: ignore[import]
+
+            # Build a subgraph of just the code files we are about to generate
+            code_file_set = set(code_file_paths)
+            dag = nx.DiGraph()
+            dag.add_nodes_from(code_file_paths)
+            for path_ in code_file_paths:
+                if path_ in graph:
+                    for succ in graph.successors(path_):
+                        if succ in code_file_set:
+                            dag.add_edge(path_, succ)  # path_ depends on succ
+
+            if nx.is_directed_acyclic_graph(dag):
+                # topological_sort yields nodes in an order where for each edge u→v,
+                # u comes before v — i.e. dependents before dependencies.
+                # We want leaves (dependencies) first, so reverse the order.
+                topo_order = list(reversed(list(nx.topological_sort(dag))))
+            else:
+                # Cycle present: condense SCCs, topo-sort condensation, then expand.
+                condensation = nx.condensation(dag)
+                topo_order_scc = list(reversed(list(nx.topological_sort(condensation))))
+                scc_members: dict[int, list[str]] = {
+                    n: list(condensation.nodes[n]["members"]) for n in condensation.nodes
+                }
+                topo_order = [
+                    node for scc_id in topo_order_scc for node in scc_members[scc_id]
+                ]
+
+            # Preserve priority ordering within the topo-sort by mapping paths to
+            # their original priority index.
+            priority_index = {p: i for i, p in enumerate(code_file_paths)}
+            topo_order = [p for p in topo_order if p in priority_index]
+            # Re-sort code_files to match topo_order
+            path_to_parsed = {p.file_info.path: p for p in code_files}
+            code_files = [path_to_parsed[p] for p in topo_order if p in path_to_parsed]
+        except Exception:
+            pass  # Keep existing priority order on any failure
+
         file_page_contexts: dict[str, FilePageContext] = {}
 
         level2_coros: list[tuple[str, Any]] = []
         for p in code_files:
+            # Pre-fetch dependency summaries from vector store for deps not yet
+            # in the completed_page_summaries accumulator (e.g. from prior runs).
+            if self._vector_store is not None:
+                path_ = p.file_info.path
+                out_edges = list(graph.successors(path_)) if path_ in graph else []
+                internal_deps = [e for e in out_edges if not e.startswith("external:")]
+                for dep in internal_deps:
+                    if dep not in completed_page_summaries:
+                        try:
+                            result = await self._vector_store.get_page_summary_by_path(dep)
+                            if result and result.get("summary"):
+                                completed_page_summaries[dep] = result["summary"]
+                        except Exception:
+                            pass  # Non-fatal — dep context is optional
+
             ctx = self._assembler.assemble_file_page(
                 p,
                 graph,
diff --git a/packages/core/src/repowise/core/generation/templates/file_page.j2 b/packages/core/src/repowise/core/generation/templates/file_page.j2
index 2a90e43..8206d8f 100644
--- a/packages/core/src/repowise/core/generation/templates/file_page.j2
+++ b/packages/core/src/repowise/core/generation/templates/file_page.j2
@@ -51,6 +51,15 @@ Module docstring: {{ ctx.docstring }}
 {% endfor %}
 {% endif %}
 
+{% if ctx.dependency_summaries %}
+## Dependency Context
+The following are summaries of files that this file imports. Use these to understand how the imports are intended to be used:
+{% for dep_path, dep_summary in ctx.dependency_summaries.items() %}
+### `{{ dep_path }}`
+{{ dep_summary }}
+{% endfor %}
+{% endif %}
+
 {% if ctx.file_source_snippet %}
 ## Source Snippet
 ```{{ ctx.language }}
diff --git a/packages/core/src/repowise/core/ingestion/dynamic_hints/__init__.py b/packages/core/src/repowise/core/ingestion/dynamic_hints/__init__.py
new file mode 100644
index 0000000..95af349
--- /dev/null
+++ b/packages/core/src/repowise/core/ingestion/dynamic_hints/__init__.py
@@ -0,0 +1,6 @@
+from __future__ import annotations
+
+from .base import DynamicEdge, DynamicHintExtractor
+from .registry import HintRegistry
+
+__all__ = ["HintRegistry", "DynamicEdge", "DynamicHintExtractor"]
diff --git a/packages/core/src/repowise/core/ingestion/dynamic_hints/base.py b/packages/core/src/repowise/core/ingestion/dynamic_hints/base.py
new file mode 100644
index 0000000..25f2541
--- /dev/null
+++ b/packages/core/src/repowise/core/ingestion/dynamic_hints/base.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from pathlib import Path
+
+
+@dataclass
+class DynamicEdge:
+    source: str       # repo-relative path
+    target: str       # repo-relative path
+    edge_type: str    # "dynamic_uses" | "dynamic_imports" | "url_route"
+    hint_source: str  # extractor name
+    weight: float = 1.0
+
+
+class DynamicHintExtractor(ABC):
+    name: str
+
+    @abstractmethod
+    def extract(self, repo_root: Path) -> list[DynamicEdge]: ...
diff --git a/packages/core/src/repowise/core/ingestion/dynamic_hints/django.py b/packages/core/src/repowise/core/ingestion/dynamic_hints/django.py
new file mode 100644
index 0000000..6d1e4a4
--- /dev/null
+++ b/packages/core/src/repowise/core/ingestion/dynamic_hints/django.py
@@ -0,0 +1,152 @@
+from __future__ import annotations
+
+import ast
+import re
+from pathlib import Path
+
+from .base import DynamicEdge, DynamicHintExtractor
+
+
+def _app_to_path(app: str, repo_root: Path) -> str | None:
+    """Attempt to resolve a dotted app name to an __init__.py under repo_root."""
+    # Try direct directory: myapp/__init__.py
+    direct = repo_root / app / "__init__.py"
+    if direct.exists():
+        return str(direct.relative_to(repo_root).as_posix())
+    # Try dotted path: myapp.sub → myapp/sub/__init__.py
+    dotted = app.replace(".", "/") + "/__init__.py"
+    dotted_path = repo_root / dotted
+    if dotted_path.exists():
+        return str(dotted_path.relative_to(repo_root).as_posix())
+    return None
+
+
+def _module_to_path(module: str, repo_root: Path) -> str | None:
+    """Attempt to resolve a dotted module string to a .py file under repo_root."""
+    as_path = module.replace(".", "/")
+    # Try as a .py file directly
+    candidate = repo_root / (as_path + ".py")
+    if candidate.exists():
+        return str(candidate.relative_to(repo_root).as_posix())
+    # Try as __init__.py inside a package
+    candidate = repo_root / as_path / "__init__.py"
+    if candidate.exists():
+        return str(candidate.relative_to(repo_root).as_posix())
+    return None
+
+
+def _extract_string_list(node: ast.expr) -> list[str]:
+    """Extract string literals from an ast.List node."""
+    results: list[str] = []
+    if not isinstance(node, ast.List):
+        return results
+    for elt in node.elts:
+        if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
+            results.append(elt.value)
+    return results
+
+
+def _extract_string_value(node: ast.expr) -> str | None:
+    """Extract a string literal value from a node."""
+    if isinstance(node, ast.Constant) and isinstance(node.value, str):
+        return node.value
+    return None
+
+
+class DjangoDynamicHints(DynamicHintExtractor):
+    name = "django_settings"
+
+    def extract(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+        edges.extend(self._scan_settings(repo_root))
+        edges.extend(self._scan_urls(repo_root))
+        return edges
+
+    def _scan_settings(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+
+        # Collect all settings files
+        settings_files: list[Path] = list(repo_root.rglob("settings.py"))
+        for settings_dir in repo_root.rglob("settings"):
+            if settings_dir.is_dir():
+                settings_files.extend(settings_dir.glob("*.py"))
+
+        for settings_file in settings_files:
+            try:
+                source = settings_file.read_text(encoding="utf-8", errors="ignore")
+                tree = ast.parse(source, filename=str(settings_file))
+            except Exception:
+                continue
+
+            try:
+                rel_settings = settings_file.relative_to(repo_root).as_posix()
+            except ValueError:
+                continue
+
+            for node in ast.walk(tree):
+                if not isinstance(node, ast.Assign):
+                    continue
+                for target in node.targets:
+                    if not (isinstance(target, ast.Name)):
+                        continue
+                    name = target.id
+
+                    if name == "INSTALLED_APPS":
+                        for app in _extract_string_list(node.value):
+                            resolved = _app_to_path(app, repo_root)
+                            if resolved:
+                                edges.append(DynamicEdge(
+                                    source=rel_settings,
+                                    target=resolved,
+                                    edge_type="dynamic_imports",
+                                    hint_source=self.name,
+                                ))
+
+                    elif name == "ROOT_URLCONF":
+                        module = _extract_string_value(node.value)
+                        if module:
+                            resolved = _module_to_path(module, repo_root)
+                            if resolved:
+                                edges.append(DynamicEdge(
+                                    source=rel_settings,
+                                    target=resolved,
+                                    edge_type="dynamic_imports",
+                                    hint_source=self.name,
+                                ))
+
+                    elif name == "MIDDLEWARE":
+                        for middleware in _extract_string_list(node.value):
+                            resolved = _module_to_path(middleware, repo_root)
+                            if resolved:
+                                edges.append(DynamicEdge(
+                                    source=rel_settings,
+                                    target=resolved,
+                                    edge_type="dynamic_imports",
+                                    hint_source=self.name,
+                                ))
+
+        return edges
+
+    def _scan_urls(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+        include_re = re.compile(r"""include\(\s*['\"]([\w\.]+)['\"]""")
+
+        for urls_file in repo_root.rglob("urls.py"):
+            try:
+                source = urls_file.read_text(encoding="utf-8", errors="ignore")
+                rel_urls = urls_file.relative_to(repo_root).as_posix()
+            except Exception:
+                continue
+
+            for match in include_re.finditer(source):
+                module = match.group(1)
+                resolved = _module_to_path(module, repo_root)
+                if resolved:
+                    edges.append(DynamicEdge(
+                        source=rel_urls,
+                        target=resolved,
+                        edge_type="url_route",
+                        hint_source=self.name,
+                    ))
+
+        return edges
diff --git a/packages/core/src/repowise/core/ingestion/dynamic_hints/node.py b/packages/core/src/repowise/core/ingestion/dynamic_hints/node.py
new file mode 100644
index 0000000..62e6e11
--- /dev/null
+++ b/packages/core/src/repowise/core/ingestion/dynamic_hints/node.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+from typing import Any
+
+from .base import DynamicEdge, DynamicHintExtractor
+
+
+def _json_loads_lenient(text: str) -> Any:
+    """Try json.loads; on failure, strip trailing commas and retry."""
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        cleaned = re.sub(r",\s*([}\]])", r"\1", text)
+        return json.loads(cleaned)
+
+
+def _collect_export_strings(obj: Any) -> list[str]:
+    """Recursively collect string values from an exports object."""
+    results: list[str] = []
+    if isinstance(obj, str):
+        results.append(obj)
+    elif isinstance(obj, dict):
+        for v in obj.values():
+            results.extend(_collect_export_strings(v))
+    elif isinstance(obj, list):
+        for item in obj:
+            results.extend(_collect_export_strings(item))
+    return results
+
+
+class NodeDynamicHints(DynamicHintExtractor):
+    name = "node_package"
+
+    def extract(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+        edges.extend(self._scan_package_json(repo_root))
+        edges.extend(self._scan_tsconfig(repo_root))
+        return edges
+
+    def _scan_package_json(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+
+        for pkg_file in repo_root.rglob("package.json"):
+            # Skip node_modules
+            if "node_modules" in pkg_file.parts:
+                continue
+            try:
+                text = pkg_file.read_text(encoding="utf-8", errors="ignore")
+                data = json.loads(text)
+                rel_pkg = pkg_file.relative_to(repo_root).as_posix()
+                pkg_dir = pkg_file.parent
+            except Exception:
+                continue
+
+            # Collect entry point fields and exports strings
+            candidates: list[str] = []
+            for field in ("main", "module", "browser"):
+                val = data.get(field)
+                if isinstance(val, str):
+                    candidates.append(val)
+
+            exports = data.get("exports")
+            if exports is not None:
+                candidates.extend(_collect_export_strings(exports))
+
+            for candidate in candidates:
+                if not candidate.startswith("."):
+                    # Only resolve relative paths
+                    continue
+                resolved = (pkg_dir / candidate).resolve()
+                try:
+                    rel_resolved = resolved.relative_to(repo_root.resolve()).as_posix()
+                except ValueError:
+                    continue
+                if resolved.exists():
+                    edges.append(DynamicEdge(
+                        source=rel_pkg,
+                        target=rel_resolved,
+                        edge_type="dynamic_imports",
+                        hint_source=self.name,
+                    ))
+
+        return edges
+
+    def _scan_tsconfig(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+
+        for tsconfig in repo_root.rglob("tsconfig*.json"):
+            if "node_modules" in tsconfig.parts:
+                continue
+            try:
+                text = tsconfig.read_text(encoding="utf-8", errors="ignore")
+                data = _json_loads_lenient(text)
+                rel_tsconfig = tsconfig.relative_to(repo_root).as_posix()
+                tsconfig_dir = tsconfig.parent
+            except Exception:
+                continue
+
+            compiler_options = data.get("compilerOptions", {})
+            if not isinstance(compiler_options, dict):
+                continue
+
+            paths = compiler_options.get("paths")
+            if not isinstance(paths, dict):
+                continue
+
+            base_url = compiler_options.get("baseUrl", ".")
+            base_dir = (tsconfig_dir / base_url).resolve()
+
+            for _alias, targets in paths.items():
+                if not isinstance(targets, list):
+                    continue
+                for pattern in targets:
+                    if not isinstance(pattern, str):
+                        continue
+                    # Drop trailing /* from glob patterns
+                    clean = pattern.rstrip("/*").rstrip("/")
+                    if not clean:
+                        continue
+                    resolved = (base_dir / clean).resolve()
+                    try:
+                        rel_resolved = resolved.relative_to(repo_root.resolve()).as_posix()
+                    except ValueError:
+                        continue
+                    if resolved.exists():
+                        edges.append(DynamicEdge(
+                            source=rel_tsconfig,
+                            target=rel_resolved,
+                            edge_type="dynamic_imports",
+                            hint_source=self.name,
+                        ))
+
+        return edges
diff --git a/packages/core/src/repowise/core/ingestion/dynamic_hints/pytest_hints.py b/packages/core/src/repowise/core/ingestion/dynamic_hints/pytest_hints.py
new file mode 100644
index 0000000..4585fcd
--- /dev/null
+++ b/packages/core/src/repowise/core/ingestion/dynamic_hints/pytest_hints.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import ast
+from pathlib import Path
+
+from .base import DynamicEdge, DynamicHintExtractor
+
+
+def _get_fixture_names(tree: ast.AST) -> set[str]:
+    """Walk an AST and return the names of all @pytest.fixture / @fixture decorated functions."""
+    names: set[str] = []
+    for node in ast.walk(tree):
+        if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            continue
+        for decorator in node.decorator_list:
+            if _is_fixture_decorator(decorator):
+                names.append(node.name)
+                break
+    return set(names)
+
+
+def _is_fixture_decorator(decorator: ast.expr) -> bool:
+    """Return True if the decorator is @fixture or @pytest.fixture (with or without call)."""
+    # @fixture or @pytest.fixture
+    if isinstance(decorator, ast.Name) and decorator.id == "fixture":
+        return True
+    if isinstance(decorator, ast.Attribute) and decorator.attr == "fixture":
+        return True
+    # @fixture(...) or @pytest.fixture(...)
+    if isinstance(decorator, ast.Call):
+        return _is_fixture_decorator(decorator.func)
+    return False
+
+
+def _get_test_function_params(tree: ast.AST) -> set[str]:
+    """Return all parameter names of test_* functions in the AST."""
+    params: set[str] = set()
+    for node in ast.walk(tree):
+        if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
+            continue
+        if not node.name.startswith("test_"):
+            continue
+        for arg in node.args.args + node.args.posonlyargs + node.args.kwonlyargs:
+            params.add(arg.arg)
+        if node.args.vararg:
+            params.add(node.args.vararg.arg)
+        if node.args.kwarg:
+            params.add(node.args.kwarg.arg)
+    return params
+
+
+class PytestDynamicHints(DynamicHintExtractor):
+    name = "pytest_conftest"
+
+    def extract(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+
+        for conftest in repo_root.rglob("conftest.py"):
+            try:
+                source = conftest.read_text(encoding="utf-8", errors="ignore")
+                tree = ast.parse(source, filename=str(conftest))
+                rel_conftest = conftest.relative_to(repo_root).as_posix()
+            except Exception:
+                continue
+
+            fixture_names = _get_fixture_names(tree)
+            if not fixture_names:
+                continue
+
+            conftest_dir = conftest.parent
+            seen_targets: set[str] = set()
+
+            # Find all test files under the conftest's parent directory
+            for pattern in ("test_*.py", "*_test.py"):
+                for test_file in conftest_dir.rglob(pattern):
+                    if test_file == conftest:
+                        continue
+                    try:
+                        rel_test = test_file.relative_to(repo_root).as_posix()
+                    except ValueError:
+                        continue
+
+                    if rel_test in seen_targets:
+                        continue
+
+                    try:
+                        test_source = test_file.read_text(encoding="utf-8", errors="ignore")
+                        test_tree = ast.parse(test_source, filename=str(test_file))
+                    except Exception:
+                        continue
+
+                    # Check if any test function uses a fixture from this conftest
+                    test_params = _get_test_function_params(test_tree)
+                    if test_params & fixture_names:
+                        seen_targets.add(rel_test)
+                        edges.append(DynamicEdge(
+                            source=rel_conftest,
+                            target=rel_test,
+                            edge_type="dynamic_uses",
+                            hint_source=self.name,
+                        ))
+
+        return edges
diff --git a/packages/core/src/repowise/core/ingestion/dynamic_hints/registry.py b/packages/core/src/repowise/core/ingestion/dynamic_hints/registry.py
new file mode 100644
index 0000000..16b3750
--- /dev/null
+++ b/packages/core/src/repowise/core/ingestion/dynamic_hints/registry.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import structlog
+
+from .base import DynamicEdge, DynamicHintExtractor
+from .django import DjangoDynamicHints
+from .pytest_hints import PytestDynamicHints
+from .node import NodeDynamicHints
+
+log = structlog.get_logger(__name__)
+
+
+class HintRegistry:
+    def __init__(self, extractors: list[DynamicHintExtractor] | None = None) -> None:
+        self._extractors = extractors or [
+            DjangoDynamicHints(),
+            PytestDynamicHints(),
+            NodeDynamicHints(),
+        ]
+
+    def extract_all(self, repo_root: Path) -> list[DynamicEdge]:
+        edges: list[DynamicEdge] = []
+        for ex in self._extractors:
+            try:
+                got = ex.extract(repo_root)
+                edges.extend(got)
+                log.debug("dynamic_hints", extractor=ex.name, count=len(got))
+            except Exception as e:
+                log.warning("dynamic_hints_failed", extractor=ex.name, error=str(e))
+        return edges
diff --git a/packages/core/src/repowise/core/ingestion/git_indexer.py b/packages/core/src/repowise/core/ingestion/git_indexer.py
index db4a643..b1875cb 100644
--- a/packages/core/src/repowise/core/ingestion/git_indexer.py
+++ b/packages/core/src/repowise/core/ingestion/git_indexer.py
@@ -100,6 +100,9 @@ def _quiet_del(self: Any) -> None:
 # Co-change temporal decay: half-life ~125 days (lambda for exp(-t/tau)).
 _CO_CHANGE_DECAY_TAU: float = 180.0
 
+# Hotspot temporal decay: half-life for exponentially weighted churn score.
+HOTSPOT_HALFLIFE_DAYS: float = 180.0
+
 # Regex to extract PR/MR numbers from commit messages.
 # Matches: "#123", "Merge pull request #456", "(#789)", "!42" (GitLab MR)
 _PR_NUMBER_RE = re.compile(r"(?:pull request |)\#(\d+)|\(#(\d+)\)|!(\d+)")
@@ -555,6 +558,8 @@ def _index_file(self, file_path: str, repo: Any) -> dict:
             # Phase 3 fields
             "original_path": None,
             "merge_commit_count_90d": 0,
+            # Temporal hotspot score (exponentially decayed churn)
+            "temporal_hotspot_score": 0.0,
         }
 
         try:
@@ -672,6 +677,18 @@ def _index_file(self, file_path: str, repo: Any) -> dict:
             total_churn = meta["lines_added_90d"] + meta["lines_deleted_90d"]
             meta["avg_commit_size"] = total_churn / c90 if c90 > 0 else 0.0
 
+            # Temporal hotspot score: exponentially decayed per-commit churn.
+            # Each commit contributes weight * clamped_lines where weight decays
+            # with a half-life of HOTSPOT_HALFLIFE_DAYS days from now.
+            _ln2 = math.log(2)
+            temporal_score = 0.0
+            for c in commits:
+                age_days = max((now.timestamp() - c.ts) / 86400.0, 0.0)
+                weight = math.exp(-_ln2 * age_days / HOTSPOT_HALFLIFE_DAYS)
+                lines = min((c.added + c.deleted) / 100.0, 3.0)
+                temporal_score += weight * lines
+            meta["temporal_hotspot_score"] = temporal_score
+
             # Contributor count & bus factor
             meta["contributor_count"] = len(author_counts)
             total_commits = sum(author_counts.values())
@@ -964,14 +981,21 @@ def _flush_commit() -> None:
 
     @staticmethod
     def _compute_percentiles(metadata_list: list[dict]) -> None:
-        """Compute churn_percentile and is_hotspot. Mutates in place."""
+        """Compute churn_percentile and is_hotspot. Mutates in place.
+
+        Primary sort key is temporal_hotspot_score (exponentially decayed churn);
+        commit_count_90d is used as a tiebreak, matching the SQL PERCENT_RANK path.
+        """
         if not metadata_list:
             return
 
-        # Sort by commit_count_90d for percentile ranking
+        # Sort by temporal_hotspot_score (primary) then commit_count_90d (tiebreak)
         sorted_by_churn = sorted(
             range(len(metadata_list)),
-            key=lambda i: metadata_list[i].get("commit_count_90d", 0),
+            key=lambda i: (
+                metadata_list[i].get("temporal_hotspot_score") or 0.0,
+                metadata_list[i].get("commit_count_90d", 0),
+            ),
         )
 
         total = len(metadata_list)
diff --git a/packages/core/src/repowise/core/ingestion/graph.py b/packages/core/src/repowise/core/ingestion/graph.py
index a003d64..d9d21df 100644
--- a/packages/core/src/repowise/core/ingestion/graph.py
+++ b/packages/core/src/repowise/core/ingestion/graph.py
@@ -500,6 +500,26 @@ def update_co_change_edges(self, updated_meta: dict, min_count: int = 3) -> None
         # Re-add co_changes edges
         self.add_co_change_edges(updated_meta, min_count)
 
+    # ------------------------------------------------------------------
+    # Dynamic-hint edges
+    # ------------------------------------------------------------------
+
+    def add_dynamic_edges(self, edges: list) -> None:
+        """Add dynamic-hint edges to the graph. Each edge is a DynamicEdge."""
+        for e in edges:
+            if e.source not in self._graph:
+                continue
+            if e.target not in self._graph:
+                # add a stub node so dead-code analysis sees it as reachable
+                self._graph.add_node(e.target)
+            self._graph.add_edge(
+                e.source,
+                e.target,
+                edge_type="dynamic",
+                hint_source=e.hint_source,
+                weight=e.weight,
+            )
+
     # ------------------------------------------------------------------
     # Framework-aware synthetic edges
     # ------------------------------------------------------------------
diff --git a/packages/core/src/repowise/core/persistence/coordinator.py b/packages/core/src/repowise/core/persistence/coordinator.py
new file mode 100644
index 0000000..0116ecd
--- /dev/null
+++ b/packages/core/src/repowise/core/persistence/coordinator.py
@@ -0,0 +1,212 @@
+"""Atomic three-store transaction coordinator.
+
+Buffers writes across SQL (AsyncSession), in-memory graph (GraphBuilder /
+NetworkX), and the vector store. Flushes them in order; rolls back on failure.
+
+Usage:
+    coord = AtomicStorageCoordinator(session, graph_builder, vector_store)
+    async with coord.transaction() as txn:
+        txn.add_sql(some_orm_obj)
+        txn.add_graph_node("path/file.py", attrs={...})
+        txn.add_graph_edge("a.py", "b.py", attrs={...})
+        txn.add_vector("page-id", {"path": ..., "summary": ..., "embedding": ...})
+    # On normal exit: SQL commit, graph applied, vectors upserted.
+    # On exception anywhere: SQL rollback, graph nodes/edges removed, vector ids deleted.
+
+Vector store notes
+------------------
+All three stores (InMemoryVectorStore, LanceDBVectorStore, PgVectorStore) share
+the same async API:
+  - upsert:  embed_and_upsert(page_id: str, text: str, metadata: dict) -> None
+  - delete:  delete(page_id: str) -> None
+  - count:   __len__() (InMemoryVectorStore only; others unsupported)
+
+The ``record`` dict passed to ``add_vector`` must contain a ``"text"`` key
+(the raw text to embed).  All other keys are forwarded as metadata.
+
+GraphBuilder notes
+------------------
+The NetworkX DiGraph is stored as ``GraphBuilder._graph`` (private attribute).
+This coordinator accesses it directly via ``getattr(builder, "_graph", None)``
+to avoid triggering a full ``build()`` call.
+"""
+from __future__ import annotations
+
+import asyncio
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import Any, AsyncIterator
+import structlog
+
+log = structlog.get_logger(__name__)
+
+
+@dataclass
+class _PendingTransaction:
+    pending_sql_objects: list[Any] = field(default_factory=list)
+    pending_graph_nodes: list[tuple[str, dict]] = field(default_factory=list)
+    pending_graph_edges: list[tuple[str, str, dict]] = field(default_factory=list)
+    pending_vectors: list[tuple[str, dict]] = field(default_factory=list)  # (id, record)
+
+    def add_sql(self, obj: Any) -> None:
+        self.pending_sql_objects.append(obj)
+
+    def add_graph_node(self, node_id: str, attrs: dict | None = None) -> None:
+        self.pending_graph_nodes.append((node_id, attrs or {}))
+
+    def add_graph_edge(self, src: str, dst: str, attrs: dict | None = None) -> None:
+        self.pending_graph_edges.append((src, dst, attrs or {}))
+
+    def add_vector(self, vector_id: str, record: dict) -> None:
+        """Queue a vector upsert.
+
+        ``record`` must contain a ``"text"`` key with the raw text to embed.
+        All remaining keys are passed as metadata to the vector store.
+        """
+        self.pending_vectors.append((vector_id, record))
+
+
+class AtomicStorageCoordinator:
+    def __init__(self, session, graph_builder=None, vector_store=None) -> None:
+        self._session = session
+        self._graph_builder = graph_builder
+        self._vector_store = vector_store
+        self._lock = asyncio.Lock()
+
+    @asynccontextmanager
+    async def transaction(self) -> AsyncIterator[_PendingTransaction]:
+        txn = _PendingTransaction()
+        applied_nodes: list[str] = []
+        applied_edges: list[tuple[str, str]] = []
+        applied_vector_ids: list[str] = []
+        async with self._lock:
+            try:
+                yield txn
+                # === FLUSH ===
+                # 1. SQL first (most likely to fail constraints)
+                for obj in txn.pending_sql_objects:
+                    self._session.add(obj)
+                await self._session.flush()
+
+                # 2. Graph (in-memory; track for rollback)
+                # Access _graph directly to avoid triggering a full build() call.
+                if self._graph_builder is not None:
+                    g = getattr(self._graph_builder, "_graph", None)
+                    if g is not None:
+                        for node_id, attrs in txn.pending_graph_nodes:
+                            existed = node_id in g
+                            g.add_node(node_id, **attrs)
+                            if not existed:
+                                applied_nodes.append(node_id)
+                        for src, dst, attrs in txn.pending_graph_edges:
+                            if not g.has_edge(src, dst):
+                                g.add_edge(src, dst, **attrs)
+                                applied_edges.append((src, dst))
+
+                # 3. Vector store last
+                # All stores expose:  embed_and_upsert(page_id, text, metadata) async
+                if self._vector_store is not None and txn.pending_vectors:
+                    for vid, record in txn.pending_vectors:
+                        await _vector_upsert(self._vector_store, vid, record)
+                        applied_vector_ids.append(vid)
+
+                await self._session.commit()
+                log.debug(
+                    "atomic_txn_committed",
+                    sql=len(txn.pending_sql_objects),
+                    nodes=len(applied_nodes),
+                    edges=len(applied_edges),
+                    vectors=len(applied_vector_ids),
+                )
+            except Exception as e:
+                log.warning("atomic_txn_failed_rollback", error=str(e))
+                # SQL rollback
+                try:
+                    await self._session.rollback()
+                except Exception as e2:
+                    log.error("sql_rollback_failed", error=str(e2))
+                # Graph rollback
+                if self._graph_builder is not None:
+                    g = getattr(self._graph_builder, "_graph", None)
+                    if g is not None:
+                        for src, dst in applied_edges:
+                            if g.has_edge(src, dst):
+                                g.remove_edge(src, dst)
+                        for node_id in applied_nodes:
+                            if node_id in g:
+                                g.remove_node(node_id)
+                # Vector rollback — all stores expose delete(page_id) async
+                if self._vector_store is not None:
+                    for vid in applied_vector_ids:
+                        try:
+                            await _vector_delete(self._vector_store, vid)
+                        except Exception as e2:
+                            log.error("vector_rollback_failed", id=vid, error=str(e2))
+                raise
+
+    async def health_check(self) -> dict:
+        """Compare counts across stores. Returns drift report."""
+        from sqlalchemy import text
+        report: dict = {"sql_pages": None, "vector_count": None, "graph_nodes": None, "drift": None}
+        try:
+            res = await self._session.execute(text("SELECT COUNT(*) FROM wiki_pages"))
+            report["sql_pages"] = int(res.scalar() or 0)
+        except Exception as e:
+            report["sql_pages_error"] = str(e)
+        if self._graph_builder is not None:
+            g = getattr(self._graph_builder, "_graph", None)
+            if g is not None:
+                report["graph_nodes"] = g.number_of_nodes()
+        if self._vector_store is not None:
+            try:
+                report["vector_count"] = await _vector_count(self._vector_store)
+            except Exception as e:
+                report["vector_count_error"] = str(e)
+        # Compute drift between SQL and vector if both available
+        if report["sql_pages"] is not None and report["vector_count"] is not None:
+            denom = max(report["sql_pages"], 1)
+            report["drift"] = abs(report["sql_pages"] - report["vector_count"]) / denom
+        return report
+
+
+# ---------------------------------------------------------------------------
+# Vector store adapter helpers
+#
+# All three stores (InMemoryVectorStore, LanceDBVectorStore, PgVectorStore)
+# share the same method names:
+#   upsert: embed_and_upsert(page_id: str, text: str, metadata: dict) -> None  (async)
+#   delete: delete(page_id: str) -> None  (async)
+#   count:  __len__() (sync, InMemoryVectorStore only; others return -1)
+# ---------------------------------------------------------------------------
+
+async def _vector_upsert(store, vid: str, record: dict) -> None:
+    """Call embed_and_upsert on the store.
+
+    ``record`` must contain a ``"text"`` key.  All other keys are forwarded
+    as metadata.  Raises ValueError if ``"text"`` is absent.
+    """
+    text = record.get("text")
+    if text is None:
+        raise ValueError(
+            f"_vector_upsert: record for '{vid}' is missing required 'text' key. "
+            f"Keys present: {list(record.keys())}"
+        )
+    metadata = {k: v for k, v in record.items() if k != "text"}
+    await store.embed_and_upsert(vid, text, metadata)
+
+
+async def _vector_delete(store, vid: str) -> None:
+    """Call delete(page_id) on the store."""
+    await store.delete(vid)
+
+
+async def _vector_count(store) -> int:
+    """Return the number of vectors in the store.
+
+    InMemoryVectorStore exposes __len__; LanceDB and PgVector do not have a
+    cheap count method, so we return -1 for those.
+    """
+    fn = getattr(store, "__len__", None)
+    if fn is not None:
+        return int(fn())
+    return -1
diff --git a/packages/core/src/repowise/core/persistence/crud.py b/packages/core/src/repowise/core/persistence/crud.py
index 00dcf64..577abb8 100644
--- a/packages/core/src/repowise/core/persistence/crud.py
+++ b/packages/core/src/repowise/core/persistence/crud.py
@@ -18,7 +18,7 @@
 from datetime import UTC, datetime
 from typing import Any
 
-from sqlalchemy import select
+from sqlalchemy import select, text
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from .models import (
@@ -709,31 +709,41 @@ async def recompute_git_percentiles(
     session: AsyncSession,
     repository_id: str,
 ) -> int:
-    """Reload all git_metadata rows and recompute churn_percentile + is_hotspot.
+    """Recompute churn_percentile + is_hotspot using a SQL PERCENT_RANK window function.
 
     Called after incremental updates so that percentile rankings stay fresh
     without a full ``repowise init``.  Returns the number of rows updated.
+
+    Primary ranking signal is temporal_hotspot_score (exponentially decayed churn);
+    commit_count_90d is the tiebreak.  Works on both SQLite (3.25+) and PostgreSQL.
     """
-    result = await session.execute(
+    # First check how many rows exist so we can return the count without an
+    # extra query after the UPDATE.
+    count_result = await session.execute(
         select(GitMetadata).where(GitMetadata.repository_id == repository_id)
     )
-    rows = result.scalars().all()
+    rows = count_result.scalars().all()
     if not rows:
         return 0
 
-    # Sort by 90-day commit count for percentile ranking
-    sorted_rows = sorted(rows, key=lambda r: r.commit_count_90d or 0)
-    total = len(sorted_rows)
-
-    for rank, row in enumerate(sorted_rows):
-        pct = rank / total if total > 0 else 0.0
-        row.churn_percentile = pct
-        c90 = row.commit_count_90d or 0
-        row.is_hotspot = pct >= 0.75 and c90 > 0
-        row.updated_at = _now_utc()
-
+    sql = """
+WITH ranked AS (
+  SELECT id, PERCENT_RANK() OVER (
+    PARTITION BY repository_id
+    ORDER BY COALESCE(temporal_hotspot_score, 0.0), commit_count_90d
+  ) AS prank
+  FROM git_metadata
+  WHERE repository_id = :repo_id
+)
+UPDATE git_metadata
+SET churn_percentile = (SELECT prank FROM ranked WHERE ranked.id = git_metadata.id),
+    is_hotspot = ((SELECT prank FROM ranked WHERE ranked.id = git_metadata.id) >= 0.75
+                  AND git_metadata.commit_count_90d > 0)
+WHERE repository_id = :repo_id;
+"""
+    await session.execute(text(sql), {"repo_id": repository_id})
     await session.flush()
-    return total
+    return len(rows)
 
 
 # ---------------------------------------------------------------------------
diff --git a/packages/core/src/repowise/core/persistence/models.py b/packages/core/src/repowise/core/persistence/models.py
index 3b28500..1ab2229 100644
--- a/packages/core/src/repowise/core/persistence/models.py
+++ b/packages/core/src/repowise/core/persistence/models.py
@@ -299,6 +299,9 @@ class GitMetadata(Base):
     original_path: Mapped[str | None] = mapped_column(Text, nullable=True)
     merge_commit_count_90d: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
 
+    # Temporal hotspot score: exponentially time-decayed churn signal
+    temporal_hotspot_score: Mapped[float | None] = mapped_column(Float, nullable=True, default=0.0)
+
     created_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, default=_now_utc
     )
@@ -401,6 +404,45 @@ class ChatMessage(Base):
     )
 
 
+class LlmCost(Base):
+    """A single LLM API call cost record."""
+
+    __tablename__ = "llm_costs"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    repository_id: Mapped[str] = mapped_column(
+        String(32), ForeignKey("repositories.id", ondelete="CASCADE"), nullable=False
+    )
+    ts: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, default=_now_utc
+    )
+    model: Mapped[str] = mapped_column(String(100), nullable=False)
+    operation: Mapped[str] = mapped_column(String(50), nullable=False)
+    input_tokens: Mapped[int] = mapped_column(Integer, nullable=False)
+    output_tokens: Mapped[int] = mapped_column(Integer, nullable=False)
+    cost_usd: Mapped[float] = mapped_column(Float, nullable=False)
+    file_path: Mapped[str | None] = mapped_column(String(1024), nullable=True)
+
+
+class SecurityFinding(Base):
+    """A security signal detected during file ingestion."""
+
+    __tablename__ = "security_findings"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    repository_id: Mapped[str] = mapped_column(
+        String(32), ForeignKey("repositories.id", ondelete="CASCADE"), nullable=False
+    )
+    file_path: Mapped[str] = mapped_column(String(1024), nullable=False)
+    kind: Mapped[str] = mapped_column(String(100), nullable=False)
+    severity: Mapped[str] = mapped_column(String(20), nullable=False)
+    snippet: Mapped[str | None] = mapped_column(Text, nullable=True)
+    line_number: Mapped[int | None] = mapped_column(Integer, nullable=True)
+    detected_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, default=_now_utc
+    )
+
+
 class DeadCodeFinding(Base):
     """Dead code finding: unreachable files, unused exports, zombie packages."""
 
diff --git a/packages/core/src/repowise/core/persistence/vector_store.py b/packages/core/src/repowise/core/persistence/vector_store.py
index dce05c0..14e431b 100644
--- a/packages/core/src/repowise/core/persistence/vector_store.py
+++ b/packages/core/src/repowise/core/persistence/vector_store.py
@@ -76,6 +76,14 @@ async def list_page_ids(self) -> set[str]:
         """
         return set()  # default: empty (subclasses should override)
 
+    async def get_page_summary_by_path(self, path: str) -> dict | None:
+        """Return {'summary': str, 'key_exports': list[str]} for a previously-indexed page, or None.
+
+        Used for RAG context injection during doc generation: when generating page B
+        that imports A, we fetch A's previously-generated summary and feed it to the LLM.
+        """
+        return None  # default: no-op (subclasses should override)
+
 
 # ---------------------------------------------------------------------------
 # InMemoryVectorStore
@@ -145,6 +153,23 @@ async def close(self) -> None:
     async def list_page_ids(self) -> set[str]:
         return set(self._store.keys())
 
+    async def get_page_summary_by_path(self, path: str) -> dict | None:
+        """Return {'summary': str, 'key_exports': list[str]} for a previously-indexed page, or None.
+
+        Used for RAG context injection during doc generation: when generating page B
+        that imports A, we fetch A's previously-generated summary and feed it to the LLM.
+
+        Implementation note: reads 'summary' from metadata if present (set by the
+        generation pipeline), else falls back to the first 500 chars of 'content'.
+        'key_exports' reads the 'exports' metadata field if present, else [].
+        """
+        for _pid, (_, meta) in self._store.items():
+            if meta.get("target_path") == path:
+                summary = meta.get("summary") or str(meta.get("content", ""))[:500]
+                key_exports = meta.get("exports") or []
+                return {"summary": summary, "key_exports": list(key_exports)}
+        return None
+
     def __len__(self) -> int:
         return len(self._store)
 
@@ -294,6 +319,38 @@ async def list_page_ids(self) -> set[str]:
         rows = await self._table.query().select(["page_id"]).to_list()  # type: ignore[union-attr]
         return {r["page_id"] for r in rows}
 
+    async def get_page_summary_by_path(self, path: str) -> dict | None:
+        """Return {'summary': str, 'key_exports': list[str]} for a previously-indexed page, or None.
+
+        Used for RAG context injection during doc generation: when generating page B
+        that imports A, we fetch A's previously-generated summary and feed it to the LLM.
+
+        Implementation note: LanceDB stores up to 200 chars of content in 'content_snippet'.
+        We use that as the summary. 'key_exports' is not stored in the LanceDB schema, so
+        we return [] — the caller only uses the text summary for prompt injection.
+        """
+        await self._ensure_connected()
+        if self._table is None:
+            return None
+
+        safe_path = path.replace("'", "''")
+        try:
+            rows = (
+                await self._table.query()  # type: ignore[union-attr]
+                .where(f"target_path = '{safe_path}'")
+                .select(["content_snippet"])
+                .limit(1)
+                .to_list()
+            )
+        except Exception:
+            return None
+
+        if not rows:
+            return None
+
+        summary = rows[0].get("content_snippet") or ""
+        return {"summary": str(summary), "key_exports": []}
+
 
 # ---------------------------------------------------------------------------
 # PgVectorStore
@@ -390,3 +447,44 @@ async def list_page_ids(self) -> set[str]:
                 sa_text("SELECT id FROM wiki_pages WHERE embedding IS NOT NULL")
             )
             return {r[0] for r in rows.fetchall()}
+
+    async def get_page_summary_by_path(self, path: str) -> dict | None:
+        """Return {'summary': str, 'key_exports': list[str]} for a previously-indexed page, or None.
+
+        Used for RAG context injection during doc generation: when generating page B
+        that imports A, we fetch A's previously-generated summary and feed it to the LLM.
+
+        Implementation note: reads the 'content' column (first 500 chars) from the
+        wiki_pages table matched by target_path. 'key_exports' is derived from the
+        page's exports if stored in a metadata JSON column; otherwise returns [].
+        """
+        from sqlalchemy.sql import text as sa_text
+
+        async with self._session_factory() as session:
+            rows = await session.execute(
+                sa_text(
+                    "SELECT content, metadata FROM wiki_pages "
+                    "WHERE target_path = :path "
+                    "LIMIT 1"
+                ),
+                {"path": path},
+            )
+            row = rows.fetchone()
+
+        if row is None:
+            return None
+
+        content = str(row[0] or "")[:500]
+        # Extract key_exports from metadata JSON column if present
+        key_exports: list[str] = []
+        if row[1] and isinstance(row[1], dict):
+            key_exports = list(row[1].get("exports", []))
+        elif row[1] and isinstance(row[1], str):
+            import json
+            try:
+                meta = json.loads(row[1])
+                key_exports = list(meta.get("exports", []))
+            except (json.JSONDecodeError, AttributeError):
+                pass
+
+        return {"summary": content, "key_exports": key_exports}
diff --git a/packages/core/src/repowise/core/pipeline/orchestrator.py b/packages/core/src/repowise/core/pipeline/orchestrator.py
index dfae3d1..a7c390c 100644
--- a/packages/core/src/repowise/core/pipeline/orchestrator.py
+++ b/packages/core/src/repowise/core/pipeline/orchestrator.py
@@ -15,8 +15,9 @@
 from __future__ import annotations
 
 import asyncio
+import os
 import time
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -27,6 +28,35 @@
 
 logger = structlog.get_logger(__name__)
 
+
+# ---------------------------------------------------------------------------
+# Process-pool worker (module-level — must be picklable)
+# ---------------------------------------------------------------------------
+
+# Module-level process-local parser cache (one per worker process).
+_WORKER_PARSER: Any = None
+
+
+def _parse_one(path_and_fi_and_bytes: tuple) -> Any:
+    """Worker function for ProcessPoolExecutor parsing.
+
+    Constructs (or reuses) a process-local ASTParser and parses one file.
+    Returns a ParsedFile on success, or (abs_path_str, error_str) on failure.
+    The parser is constructed lazily inside the worker — the ASTParser itself
+    (which holds compiled tree-sitter Language/Query objects) is never pickled.
+    Only FileInfo (input) and ParsedFile (output) cross the process boundary;
+    both are plain dataclasses and therefore picklable.
+    """
+    global _WORKER_PARSER
+    fi, source = path_and_fi_and_bytes
+    try:
+        if _WORKER_PARSER is None:
+            from repowise.core.ingestion import ASTParser
+            _WORKER_PARSER = ASTParser()
+        return _WORKER_PARSER.parse_file(fi, source)
+    except Exception as exc:
+        return (fi.abs_path, str(exc))
+
 # Maximum seconds to spend on decision extraction before giving up.
 # Large repos with tens of thousands of files can take arbitrarily long.
 DECISION_EXTRACTION_TIMEOUT_SECS = 300
@@ -111,6 +141,7 @@ async def run_pipeline(
     test_run: bool = False,
     resume: bool = False,
     progress: ProgressCallback | None = None,
+    cost_tracker: Any | None = None,
 ) -> PipelineResult:
     """Run the repowise indexing/analysis/generation pipeline.
 
@@ -156,28 +187,47 @@ async def run_pipeline(
 
     commit_depth = max(1, min(commit_depth, 5000))
 
+    # Attach cost tracker to provider if supplied
+    if cost_tracker is not None and llm_client is not None and hasattr(llm_client, "_cost_tracker"):
+        llm_client._cost_tracker = cost_tracker
+
     # ---- Phase 1: Ingestion ------------------------------------------------
     if progress:
         progress.on_message("info", "Phase 1: Ingestion")
 
-    parsed_files, file_infos, repo_structure, source_map, graph_builder = (
-        await _run_ingestion(
+    # Launch git indexing as a background task immediately — it is independent
+    # of parsing and graph-build, so the two stages can run concurrently.
+    # _run_ingestion does: traverse → ProcessPool parse → graph build → dynamic hints.
+    # _run_git_indexing does: git log → co-change accumulation (I/O bound, own executor).
+
+    async def _git_stage() -> tuple:
+        return await _run_git_indexing(
+            repo_path,
+            commit_depth=commit_depth,
+            follow_renames=follow_renames,
+            progress=progress,
+        )
+
+    async def _ingestion_stage() -> tuple:
+        return await _run_ingestion(
             repo_path,
             exclude_patterns=exclude_patterns,
             skip_tests=skip_tests,
             skip_infra=skip_infra,
             progress=progress,
         )
-    )
 
-    # Git indexing (runs concurrently with ingestion in the CLI, but here
-    # we start it after traversal since we're already async)
-    git_summary, git_metadata_list, git_meta_map = await _run_git_indexing(
-        repo_path,
-        commit_depth=commit_depth,
-        follow_renames=follow_renames,
-        progress=progress,
-    )
+    (
+        parsed_files,
+        file_infos,
+        repo_structure,
+        source_map,
+        graph_builder,
+    ), (
+        git_summary,
+        git_metadata_list,
+        git_meta_map,
+    ) = await asyncio.gather(_ingestion_stage(), _git_stage())
 
     # Add co-change edges to the graph
     if git_meta_map:
@@ -335,31 +385,77 @@ async def _run_ingestion(
             if fi.language not in ("dockerfile", "makefile", "terraform", "shell")
         ]
 
-    # Parse (sequential — GraphBuilder is not thread-safe)
+    # ---- Parse phase: CPU-bound, run in ProcessPoolExecutor ----------------
     if progress:
         progress.on_phase_start("parse", len(file_infos))
 
-    parser = ASTParser()
+    # Read source bytes up front (I/O, sequential — fast enough; keeps worker
+    # args small: FileInfo + bytes, both picklable plain dataclasses/bytes).
+    fi_and_bytes: list[tuple] = []
+    for fi in file_infos:
+        try:
+            source = Path(fi.abs_path).read_bytes()
+            fi_and_bytes.append((fi, source))
+        except Exception:
+            if progress:
+                progress.on_item_done("parse")
+
     parsed_files: list[Any] = []
     source_map: dict[str, bytes] = {}
     graph_builder = GraphBuilder(repo_path=repo_path)
 
-    for i, fi in enumerate(file_infos):
-        try:
-            source = Path(fi.abs_path).read_bytes()
-            parsed = parser.parse_file(fi, source)
-            parsed_files.append(parsed)
+    loop = asyncio.get_running_loop()
+    workers = max(1, os.cpu_count() or 4)
+
+    _use_process_pool = True
+    parse_results: list[Any] = []
+
+    try:
+        with ProcessPoolExecutor(max_workers=workers) as pool:
+            tasks = [
+                loop.run_in_executor(pool, _parse_one, item)
+                for item in fi_and_bytes
+            ]
+            # Use as_completed via asyncio.as_completed to report per-file progress.
+            # We need to preserve (task → fi_and_bytes index) for source_map so we
+            # wrap tasks in a list and drain with gather instead.
+            parse_results = await asyncio.gather(*tasks, return_exceptions=True)
+    except Exception as pool_exc:
+        logger.warning(
+            "process_pool_parse_failed_falling_back",
+            error=str(pool_exc),
+        )
+        _use_process_pool = False
+        # Fallback: in-process sequential parse
+        _fallback_parser = ASTParser()
+        for i, (fi, source) in enumerate(fi_and_bytes):
+            try:
+                result = _fallback_parser.parse_file(fi, source)
+                parse_results.append(result)
+            except Exception as exc:
+                parse_results.append((fi.abs_path, str(exc)))
+            if progress:
+                progress.on_item_done("parse")
+            if i % 50 == 49:
+                await asyncio.sleep(0)
+
+    # Aggregate results into GraphBuilder on the main loop (not thread-safe).
+    for idx, result in enumerate(parse_results):
+        fi, source = fi_and_bytes[idx]
+        if isinstance(result, tuple) and len(result) == 2 and isinstance(result[1], str):
+            # Error tuple: (abs_path_str, error_str)
+            logger.debug("parse_error_in_worker", path=result[0], error=result[1])
+        elif isinstance(result, Exception):
+            logger.debug("parse_exception_in_worker", path=fi.abs_path, error=str(result))
+        else:
+            parsed_files.append(result)
             source_map[fi.path] = source
-            graph_builder.add_file(parsed)
-        except Exception:
-            pass  # skip unparseable files
-        if progress:
+            graph_builder.add_file(result)
+        # Report per-file progress if we used the process pool (fallback already reported above).
+        if _use_process_pool and progress:
             progress.on_item_done("parse")
-        # Yield control every 50 files so the event loop stays responsive
-        if i % 50 == 49:
-            await asyncio.sleep(0)
 
-    # Build graph (CPU-bound — run in thread to keep event loop free)
+    # ---- Graph build phase -------------------------------------------------
     if progress:
         progress.on_phase_start("graph", 1)
     await asyncio.to_thread(graph_builder.build)
@@ -373,6 +469,17 @@ async def _run_ingestion(
     except Exception:
         pass  # framework edge detection is best-effort
 
+    # ---- Dynamic hints wiring (after static graph is fully built) ----------
+    try:
+        from repowise.core.ingestion.dynamic_hints import HintRegistry
+
+        registry = HintRegistry()
+        dynamic_edges = await loop.run_in_executor(None, registry.extract_all, repo_path)
+        graph_builder.add_dynamic_edges(dynamic_edges)
+        logger.info("dynamic_hints_added", count=len(dynamic_edges))
+    except Exception as hints_exc:
+        logger.warning("dynamic_hints_failed", error=str(hints_exc))
+
     if progress:
         progress.on_item_done("graph")
 
@@ -533,6 +640,7 @@ async def run_generation(
     concurrency: int,
     progress: ProgressCallback | None,
     resume: bool = False,
+    cost_tracker: Any | None = None,
 ) -> list[Any]:
     """Run LLM-powered page generation.
 
@@ -547,6 +655,10 @@ async def run_generation(
     from repowise.core.persistence.vector_store import InMemoryVectorStore
     from repowise.core.providers.embedding.base import MockEmbedder
 
+    # Attach cost tracker to LLM client if available
+    if cost_tracker is not None and llm_client is not None and hasattr(llm_client, "_cost_tracker"):
+        llm_client._cost_tracker = cost_tracker
+
     config = GenerationConfig(max_concurrency=concurrency)
     assembler = ContextAssembler(config)
 
@@ -573,6 +685,9 @@ def on_page_done(page_type: str) -> None:
         _pages_done += 1
         if progress:
             progress.on_item_done("generation")
+            # Push live cost update if the callback supports it
+            if cost_tracker is not None and hasattr(progress, "set_cost"):
+                progress.set_cost(cost_tracker.session_cost)
 
     if progress:
         progress.on_phase_start("generation", None)
diff --git a/packages/core/src/repowise/core/pipeline/persist.py b/packages/core/src/repowise/core/pipeline/persist.py
index 78af265..8c702ac 100644
--- a/packages/core/src/repowise/core/pipeline/persist.py
+++ b/packages/core/src/repowise/core/pipeline/persist.py
@@ -108,6 +108,25 @@ async def persist_pipeline_result(
     if all_symbols:
         await batch_upsert_symbols(session, repo_id, all_symbols)
 
+    # ---- Security scan -------------------------------------------------------
+    # Choice: persist.py (rather than orchestrator.py) because there is already
+    # a clear per-file loop over parsed_files here, and the instructions ask for
+    # a minimal, non-invasive addition.  The orchestrator parse stage is owned
+    # by another agent and must not be touched.
+    try:
+        from repowise.core.analysis.security_scan import SecurityScanner
+
+        scanner = SecurityScanner(session, repo_id)
+        for pf in result.parsed_files:
+            source_text = getattr(pf.file_info, "content", "") or ""
+            findings = await scanner.scan_file(
+                pf.file_info.path, source_text, pf.symbols
+            )
+            if findings:
+                await scanner.persist(pf.file_info.path, findings)
+    except Exception as _sec_err:  # noqa: BLE001 — scanner must never break the pipeline
+        logger.warning("security_scan_skipped", error=str(_sec_err))
+
     # ---- Git metadata --------------------------------------------------------
     if result.git_metadata_list:
         await upsert_git_metadata_bulk(session, repo_id, result.git_metadata_list)
diff --git a/packages/core/src/repowise/core/providers/llm/anthropic.py b/packages/core/src/repowise/core/providers/llm/anthropic.py
index 8d9b0b2..1322100 100644
--- a/packages/core/src/repowise/core/providers/llm/anthropic.py
+++ b/packages/core/src/repowise/core/providers/llm/anthropic.py
@@ -36,9 +36,12 @@
     RateLimitError,
 )
 
-from typing import Any, AsyncIterator
+from typing import TYPE_CHECKING, Any, AsyncIterator
 from repowise.core.rate_limiter import RateLimiter
 
+if TYPE_CHECKING:
+    from repowise.core.generation.cost_tracker import CostTracker
+
 log = structlog.get_logger(__name__)
 
 _MAX_RETRIES = 3
@@ -61,6 +64,7 @@ def __init__(
         api_key: str | None = None,
         model: str = "claude-sonnet-4-6",
         rate_limiter: RateLimiter | None = None,
+        cost_tracker: CostTracker | None = None,
     ) -> None:
         resolved_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
         if not resolved_key:
@@ -71,6 +75,7 @@ def __init__(
         self._client = AsyncAnthropic(api_key=resolved_key)
         self._model = model
         self._rate_limiter = rate_limiter
+        self._cost_tracker = cost_tracker
 
     @property
     def provider_name(self) -> str:
@@ -164,6 +169,23 @@ async def _generate_with_retry(
             cached_tokens=result.cached_tokens,
             request_id=request_id,
         )
+
+        if self._cost_tracker is not None:
+            import asyncio
+
+            try:
+                asyncio.get_event_loop().create_task(
+                    self._cost_tracker.record(
+                        model=self._model,
+                        input_tokens=result.input_tokens,
+                        output_tokens=result.output_tokens,
+                        operation="doc_generation",
+                        file_path=None,
+                    )
+                )
+            except RuntimeError:
+                pass  # No running event loop — skip async record
+
         return result
 
     # --- ChatProvider protocol implementation ---
diff --git a/packages/core/src/repowise/core/providers/llm/gemini.py b/packages/core/src/repowise/core/providers/llm/gemini.py
index 7398c7a..46503dc 100644
--- a/packages/core/src/repowise/core/providers/llm/gemini.py
+++ b/packages/core/src/repowise/core/providers/llm/gemini.py
@@ -36,9 +36,12 @@
     RateLimitError,
 )
 
-from typing import Any, AsyncIterator
+from typing import TYPE_CHECKING, Any, AsyncIterator
 from repowise.core.rate_limiter import RateLimiter
 
+if TYPE_CHECKING:
+    from repowise.core.generation.cost_tracker import CostTracker
+
 log = structlog.get_logger(__name__)
 
 _MAX_RETRIES = 3
@@ -53,6 +56,7 @@ class GeminiProvider(BaseProvider):
         model:        Gemini model name. Defaults to gemini-3.1-flash-lite-preview.
         api_key:      Google API key. Falls back to GEMINI_API_KEY or GOOGLE_API_KEY env var.
         rate_limiter: Optional RateLimiter instance.
+        cost_tracker: Optional CostTracker for recording token usage and cost.
     """
 
     def __init__(
@@ -60,6 +64,7 @@ def __init__(
         model: str = "gemini-3.1-flash-lite-preview",
         api_key: str | None = None,
         rate_limiter: RateLimiter | None = None,
+        cost_tracker: "CostTracker | None" = None,
     ) -> None:
         self._model = model
         self._api_key = (
@@ -73,6 +78,7 @@ def __init__(
                 "No API key found. Pass api_key= or set GEMINI_API_KEY / GOOGLE_API_KEY env var.",
             )
         self._rate_limiter = rate_limiter
+        self._cost_tracker = cost_tracker
         self._client: object | None = None  # cached; created once on first call
 
     @property
@@ -185,6 +191,21 @@ def _call_sync() -> GeneratedResponse:
             output_tokens=result.output_tokens,
             request_id=request_id,
         )
+
+        if self._cost_tracker is not None:
+            try:
+                asyncio.get_event_loop().create_task(
+                    self._cost_tracker.record(
+                        model=self._model,
+                        input_tokens=result.input_tokens,
+                        output_tokens=result.output_tokens,
+                        operation="doc_generation",
+                        file_path=None,
+                    )
+                )
+            except RuntimeError:
+                pass  # No running event loop — skip async record
+
         return result
 
     # --- ChatProvider protocol implementation ---
diff --git a/packages/core/src/repowise/core/providers/llm/litellm.py b/packages/core/src/repowise/core/providers/llm/litellm.py
index 0692cd9..1273e22 100644
--- a/packages/core/src/repowise/core/providers/llm/litellm.py
+++ b/packages/core/src/repowise/core/providers/llm/litellm.py
@@ -37,9 +37,12 @@
     RateLimitError,
 )
 
-from typing import Any, AsyncIterator
+from typing import TYPE_CHECKING, Any, AsyncIterator
 from repowise.core.rate_limiter import RateLimiter
 
+if TYPE_CHECKING:
+    from repowise.core.generation.cost_tracker import CostTracker
+
 log = structlog.get_logger(__name__)
 
 _MAX_RETRIES = 3
@@ -64,11 +67,13 @@ def __init__(
         api_key: str | None = None,
         api_base: str | None = None,
         rate_limiter: RateLimiter | None = None,
+        cost_tracker: "CostTracker | None" = None,
     ) -> None:
         self._model = model
         self._api_key = api_key
         self._api_base = api_base
         self._rate_limiter = rate_limiter
+        self._cost_tracker = cost_tracker
 
     @property
     def provider_name(self) -> str:
@@ -162,6 +167,23 @@ async def _generate_with_retry(
             output_tokens=result.output_tokens,
             request_id=request_id,
         )
+
+        if self._cost_tracker is not None:
+            import asyncio
+
+            try:
+                asyncio.get_event_loop().create_task(
+                    self._cost_tracker.record(
+                        model=self._model,
+                        input_tokens=result.input_tokens,
+                        output_tokens=result.output_tokens,
+                        operation="doc_generation",
+                        file_path=None,
+                    )
+                )
+            except RuntimeError:
+                pass  # No running event loop — skip async record
+
         return result
 
     # --- ChatProvider protocol implementation ---
diff --git a/packages/core/src/repowise/core/providers/llm/openai.py b/packages/core/src/repowise/core/providers/llm/openai.py
index b13248b..1cc3e7b 100644
--- a/packages/core/src/repowise/core/providers/llm/openai.py
+++ b/packages/core/src/repowise/core/providers/llm/openai.py
@@ -35,9 +35,12 @@
     RateLimitError,
 )
 
-from typing import Any, AsyncIterator
+from typing import TYPE_CHECKING, Any, AsyncIterator
 from repowise.core.rate_limiter import RateLimiter
 
+if TYPE_CHECKING:
+    from repowise.core.generation.cost_tracker import CostTracker
+
 log = structlog.get_logger(__name__)
 
 _MAX_RETRIES = 3
@@ -61,6 +64,7 @@ def __init__(
         model: str = "gpt-5.4-nano",
         base_url: str | None = None,
         rate_limiter: RateLimiter | None = None,
+        cost_tracker: "CostTracker | None" = None,
     ) -> None:
         resolved_key = api_key or os.environ.get("OPENAI_API_KEY")
         if not resolved_key:
@@ -71,6 +75,7 @@ def __init__(
         self._client = AsyncOpenAI(api_key=resolved_key, base_url=base_url)
         self._model = model
         self._rate_limiter = rate_limiter
+        self._cost_tracker = cost_tracker
 
     @property
     def provider_name(self) -> str:
@@ -161,6 +166,23 @@ async def _generate_with_retry(
             output_tokens=result.output_tokens,
             request_id=request_id,
         )
+
+        if self._cost_tracker is not None:
+            import asyncio
+
+            try:
+                asyncio.get_event_loop().create_task(
+                    self._cost_tracker.record(
+                        model=self._model,
+                        input_tokens=result.input_tokens,
+                        output_tokens=result.output_tokens,
+                        operation="doc_generation",
+                        file_path=None,
+                    )
+                )
+            except RuntimeError:
+                pass  # No running event loop — skip async record
+
         return result
 
     # --- ChatProvider protocol implementation ---
diff --git a/packages/server/src/repowise/server/mcp_server/tool_dead_code.py b/packages/server/src/repowise/server/mcp_server/tool_dead_code.py
index 8596233..364b104 100644
--- a/packages/server/src/repowise/server/mcp_server/tool_dead_code.py
+++ b/packages/server/src/repowise/server/mcp_server/tool_dead_code.py
@@ -28,6 +28,10 @@ async def get_dead_code(
     directory: str | None = None,
     owner: str | None = None,
     group_by: str | None = None,
+    include_internals: bool = False,
+    include_zombie_packages: bool = True,
+    no_unreachable: bool = False,
+    no_unused_exports: bool = False,
 ) -> dict:
     """Get a tiered refactor plan for dead and unused code.
 
@@ -39,16 +43,39 @@ async def get_dead_code(
     group_by="owner" to see who owns the most dead code. Use tier
     to focus on a single confidence band.
 
+    Scan scope flags (mirror the DeadCodeAnalyzer.analyze config):
+    - Use ``min_confidence=0.7`` for high-confidence cleanups — filters out
+      speculative findings and surfaces only code with zero references that
+      hasn't been touched in months. Ideal before a release or refactor sprint.
+    - Use ``include_internals=True`` for aggressive scans of private symbols
+      (functions/variables prefixed with _ or declared without exports). This
+      has a higher false-positive rate and is off by default; enable it when
+      doing a thorough dead-code purge of a stable, well-tested module.
+    - Use ``no_unreachable=True`` to skip file-level reachability checks and
+      focus only on symbol-level findings (unused exports/internals).
+    - Use ``no_unused_exports=True`` to skip public-export checks, e.g. when
+      you know the repo exposes a public API consumed externally.
+    - Use ``include_zombie_packages=False`` to suppress monorepo package
+      findings, useful in repos where cross-package imports are intentionally
+      absent during development.
+
     Args:
         repo: Repository path, name, or ID.
         kind: Filter by kind (unreachable_file, unused_export, unused_internal, zombie_package).
-        min_confidence: Minimum confidence threshold (default 0.5).
+        min_confidence: Minimum confidence threshold (default 0.5). Use 0.7 for high-confidence
+            cleanups only.
         safe_only: Only return findings marked safe_to_delete (default false).
         limit: Maximum findings per tier (default 20).
         tier: Focus on a single tier: "high" (>=0.8), "medium" (0.5-0.8), or "low" (<0.5).
         directory: Filter findings to a specific directory prefix.
         owner: Filter findings by primary owner name.
         group_by: "directory" for per-directory rollup, "owner" for ownership hotspots.
+        include_internals: Include unused private/internal symbol findings (default false).
+            Enable for aggressive scans of private symbols.
+        include_zombie_packages: Include zombie-package findings for monorepo packages with
+            no external importers (default true).
+        no_unreachable: Suppress unreachable-file findings (default false).
+        no_unused_exports: Suppress unused-export findings (default false).
     """
     async with get_session(_state._session_factory) as session:
         repository = await _get_repo(session, repo)
@@ -73,10 +100,23 @@ async def get_dead_code(
             )
             git_meta_map = {g.file_path: g for g in git_res.scalars().all()}
 
+    # --- Build excluded kinds from scope flags ---
+    _excluded_kinds: set[str] = set()
+    if no_unreachable:
+        _excluded_kinds.add("unreachable_file")
+    if no_unused_exports:
+        _excluded_kinds.add("unused_export")
+    if not include_internals:
+        _excluded_kinds.add("unused_internal")
+    if not include_zombie_packages:
+        _excluded_kinds.add("zombie_package")
+
     # --- Apply filters ---
     filtered = all_findings
     if kind:
         filtered = [f for f in filtered if f.kind == kind]
+    elif _excluded_kinds:
+        filtered = [f for f in filtered if f.kind not in _excluded_kinds]
     if safe_only:
         filtered = [f for f in filtered if f.safe_to_delete]
     if min_confidence > 0:
diff --git a/packages/server/src/repowise/server/mcp_server/tool_overview.py b/packages/server/src/repowise/server/mcp_server/tool_overview.py
index 75ad0d1..fb0063a 100644
--- a/packages/server/src/repowise/server/mcp_server/tool_overview.py
+++ b/packages/server/src/repowise/server/mcp_server/tool_overview.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from collections import Counter
+from collections import Counter, defaultdict
 from typing import Any
 
 from sqlalchemy import select
@@ -117,6 +117,78 @@ async def get_overview(repo: str | None = None) -> dict:
                 "top_churn_modules": top_modules,
             }
 
+        # B. Knowledge map -------------------------------------------------------
+        knowledge_map: dict[str, Any] = {}
+        if all_git:
+            # top_owners: aggregate primary_owner_email across all files
+            owner_file_count: dict[str, int] = defaultdict(int)
+            owner_pct_sum: dict[str, float] = defaultdict(float)
+            for g in all_git:
+                email = g.primary_owner_email or ""
+                if email:
+                    owner_file_count[email] += 1
+                    owner_pct_sum[email] += float(g.primary_owner_commit_pct or 0.0)
+
+            total_files = len(all_git) or 1
+            top_owners = sorted(
+                [
+                    {
+                        "email": email,
+                        "files_owned": count,
+                        "percentage": round(count / total_files * 100.0, 1),
+                    }
+                    for email, count in owner_file_count.items()
+                ],
+                key=lambda x: -x["files_owned"],
+            )[:10]
+
+            # knowledge_silos: files where primary owner has > 80% ownership
+            knowledge_silos = [
+                g.file_path
+                for g in all_git
+                if (g.primary_owner_commit_pct or 0.0) > 0.8
+            ]
+
+            # onboarding_targets: high-centrality files with least docs
+            # pagerank from graph_nodes; doc length from wiki_pages
+            node_result = await session.execute(
+                select(GraphNode).where(
+                    GraphNode.repository_id == repository.id,
+                    GraphNode.is_test == False,  # noqa: E712
+                )
+            )
+            all_nodes = node_result.scalars().all()
+
+            # Build word-count map from wiki_pages (file pages)
+            page_result = await session.execute(
+                select(Page).where(
+                    Page.repository_id == repository.id,
+                    Page.page_type == "file_page",
+                )
+            )
+            doc_words: dict[str, int] = {
+                p.target_path: len(p.content.split()) for p in page_result.scalars().all()
+            }
+
+            onboarding_candidates = [
+                {
+                    "path": n.node_id,
+                    "pagerank": n.pagerank,
+                    "doc_words": doc_words.get(n.node_id, 0),
+                }
+                for n in all_nodes
+                if n.pagerank > 0.0
+            ]
+            # Sort by fewest doc words first (least documented), then by highest pagerank
+            onboarding_candidates.sort(key=lambda x: (x["doc_words"], -x["pagerank"]))
+            onboarding_targets = [c["path"] for c in onboarding_candidates[:5]]
+
+            knowledge_map = {
+                "top_owners": top_owners,
+                "knowledge_silos": knowledge_silos,
+                "onboarding_targets": onboarding_targets,
+            }
+
         return {
             "title": overview_page.title if overview_page else repository.name,
             "content_md": overview_page.content if overview_page else "No overview generated yet.",
@@ -135,4 +207,5 @@ async def get_overview(repo: str | None = None) -> dict:
             ],
             "entry_points": [n.node_id for n in entry_nodes],
             "git_health": git_health,
+            "knowledge_map": knowledge_map,
         }
diff --git a/packages/server/src/repowise/server/mcp_server/tool_risk.py b/packages/server/src/repowise/server/mcp_server/tool_risk.py
index f8cae89..7cad1a7 100644
--- a/packages/server/src/repowise/server/mcp_server/tool_risk.py
+++ b/packages/server/src/repowise/server/mcp_server/tool_risk.py
@@ -11,6 +11,8 @@
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from sqlalchemy import text
+
 from repowise.core.persistence.database import get_session
 from repowise.core.persistence.models import (
     GitMetadata,
@@ -127,6 +129,48 @@ def _compute_impact_surface(
     return ranked[:3]
 
 
+async def _check_test_gap(session: AsyncSession, repo_id: str, target: str) -> bool:
+    """Return True if no test file corresponding to *target* exists in graph_nodes."""
+    import os
+
+    base = os.path.splitext(os.path.basename(target))[0]
+    ext = os.path.splitext(target)[1].lstrip(".")
+    # Build a LIKE pattern broad enough to catch test_<base>, <base>_test, <base>.spec.*
+    patterns = [f"%test_{base}%", f"%{base}_test%", f"%{base}.spec.{ext}%"]
+    for pat in patterns:
+        row = await session.execute(
+            select(GraphNode).where(
+                GraphNode.repository_id == repo_id,
+                GraphNode.is_test == True,  # noqa: E712
+                GraphNode.node_id.like(pat),
+            ).limit(1)
+        )
+        if row.scalar_one_or_none() is not None:
+            return False
+    return True
+
+
+async def _get_security_signals(
+    session: AsyncSession, repo_id: str, target: str
+) -> list[dict]:
+    """Fetch stored security findings for *target* from security_findings table."""
+    try:
+        rows = await session.execute(
+            text(
+                "SELECT kind, severity, snippet FROM security_findings "
+                "WHERE repository_id = :repo_id AND file_path = :fp "
+                "ORDER BY severity DESC, kind"
+            ),
+            {"repo_id": repo_id, "fp": target},
+        )
+        return [
+            {"kind": r[0], "severity": r[1], "snippet": r[2]}
+            for r in rows.all()
+        ]
+    except Exception:  # noqa: BLE001 — table may not exist pre-migration
+        return []
+
+
 async def _assess_one_target(
     session: AsyncSession,
     repository: Repository,
@@ -136,7 +180,12 @@ async def _assess_one_target(
     reverse_deps: dict[str, set[str]],
     node_meta: dict[str, Any],
 ) -> dict:
-    """Assess risk for a single target file."""
+    """Assess risk for a single target file.
+
+    Enriches each result with:
+    - test_gap: bool — True when no test file matching this file's basename exists.
+    - security_signals: list of {kind, severity, snippet} from security_findings.
+    """
     repo_id = repository.id
     result_data: dict[str, Any] = {"target": target}
 
@@ -164,6 +213,8 @@ async def _assess_one_target(
             reverse_deps,
             node_meta,
         )
+        result_data["test_gap"] = await _check_test_gap(session, repo_id, target)
+        result_data["security_signals"] = await _get_security_signals(session, repo_id, target)
         result_data["risk_summary"] = f"{target} — no git metadata available"
         return result_data
 
@@ -240,6 +291,10 @@ async def _assess_one_target(
     if merge_commit_count > 0:
         result_data["merge_commit_count_90d"] = merge_commit_count
 
+    # C. Test gaps + security signals
+    result_data["test_gap"] = await _check_test_gap(session, repo_id, target)
+    result_data["security_signals"] = await _get_security_signals(session, repo_id, target)
+
     capped = getattr(meta, "commit_count_capped", False)
     capped_note = " (history truncated — actual count may be higher)" if capped else ""
     result_data["commit_count_capped"] = capped
@@ -262,6 +317,7 @@ async def _assess_one_target(
 async def get_risk(
     targets: list[str],
     repo: str | None = None,
+    changed_files: list[str] | None = None,
 ) -> dict:
     """Assess modification risk for one or more files before making changes.
 
@@ -270,14 +326,26 @@ async def get_risk(
     - risk_type ("churn-heavy"/"bug-prone"/"high-coupling"/"stable")
     - impact_surface: top 3 critical modules that would break
     - dependents, co-change partners, ownership
+    - test_gap: bool — True if no test file exists for this file
+    - security_signals: list of {kind, severity, snippet} from static analysis
 
     Plus the top 5 global hotspots for ambient awareness.
 
-    Example: get_risk(["src/auth/service.py", "src/auth/middleware.py"])
+    Pass ``changed_files`` for PR review / blast radius analysis. When provided,
+    the response includes an additional ``pr_blast_radius`` key containing:
+    - direct_risks: per-file risk score (centrality × temporal hotspot)
+    - transitive_affected: files that import any changed file (up to depth 3)
+    - cochange_warnings: historical co-change partners missing from the PR
+    - recommended_reviewers: top 5 owners of affected files
+    - test_gaps: changed/affected files lacking a corresponding test
+    - overall_risk_score: 0-10 composite score
+
+    Example: get_risk(["src/auth/service.py"], changed_files=["src/auth/service.py"])
 
     Args:
-        targets: List of file paths to assess.
+        targets: List of file paths to assess (standard per-file risk).
         repo: Repository path, name, or ID.
+        changed_files: Optional list of files changed in a PR for blast-radius analysis.
     """
     async with get_session(_state._session_factory) as session:
         repository = await _get_repo(session, repo)
@@ -343,7 +411,18 @@ async def get_risk(
             if h.file_path not in target_set
         ][:5]
 
-    return {
+        # A. PR blast radius (only when caller passes changed_files)
+        pr_blast_radius: dict | None = None
+        if changed_files:
+            from repowise.core.analysis.pr_blast import PRBlastRadiusAnalyzer
+
+            analyzer = PRBlastRadiusAnalyzer(session, repo_id)
+            pr_blast_radius = await analyzer.analyze_files(changed_files)
+
+    response: dict = {
         "targets": {r["target"]: r for r in results},
         "global_hotspots": global_hotspots,
     }
+    if pr_blast_radius is not None:
+        response["pr_blast_radius"] = pr_blast_radius
+    return response
diff --git a/tests/unit/persistence/test_models.py b/tests/unit/persistence/test_models.py
index b9333c0..d4f9f81 100644
--- a/tests/unit/persistence/test_models.py
+++ b/tests/unit/persistence/test_models.py
@@ -304,5 +304,7 @@ def test_base_includes_all_models():
         "decision_records",
         "conversations",
         "chat_messages",
+        "llm_costs",
+        "security_findings",
     }
     assert expected == table_names

From ef8c12eaa2888b9eaa2734fad263c27254afa7dc Mon Sep 17 00:00:00 2001
From: Swati Ahuja <swatiahuja.ahuja@gmail.com>
Date: Thu, 9 Apr 2026 18:42:34 +0530
Subject: [PATCH 2/3]  feat: get_answer + get_symbol MCP tools, RAG-style
 answer cache, compact context default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

  Adds two new MCP tools, two supporting alembic migrations, and a set of
  ingestion / generation improvements that make the wiki layer usable for
  single-call agent workflows. All existing tools continue to work
  unchanged. Bumps the public tool count from 8 to 10.

  New MCP tools
  -------------
  - mcp_server/tool_answer.py (new): get_answer(question, scope?, repo?)
    is a one-call RAG endpoint over the wiki layer. It runs an FTS pass
    with a coverage re-ranker, splits relational questions on connectives
    and boosts pages at the intersection of both halves, gates synthesis
    on a top/second dominance ratio (>= 1.2x), and only invokes the LLM
    when retrieval is clearly dominant. High-confidence responses include
    a note explaining the consumer can cite directly without verification
    reads. Ambiguous retrievals return ranked excerpts so the agent
    grounds in source instead of anchoring on a wrong frame. Synthesised
    answers are persisted to AnswerCache by question hash so repeat
    questions return at zero LLM cost. Degrades cleanly to retrieval-only
    mode when no provider is configured.

  - mcp_server/tool_symbol.py (new): get_symbol(symbol_id) resolves a
    qualified id of the form "path/to/file.py::Class::method" (also
    accepts the dot separator) to its source body, signature, file
    location, line range, and docstring. Recovers the rich on-disk
    signature so base classes, decorators, and full type annotations
    reach the LLM (the stripped DB form would lose these). Handles
    duplicate-row resolution by canonical pick rather than raising
    MultipleResultsFound.

  - mcp_server/_meta.py (new): shared _meta envelope and per-tool hint
    builders used by tool_answer / tool_context / tool_symbol so all
    three return a consistent metadata block (timing, hint, page counts).

  - mcp_server/__init__.py: re-exports the new tools, updates the
    module docstring to "10 tools".

  Schema migrations
  -----------------
  - alembic/versions/0012_page_summary.py (new): adds wiki_pages.summary
    TEXT NOT NULL DEFAULT "". Stores a 1–3 sentence purpose blurb per
    page so get_context can return narrative file-level descriptions
    without shipping content_md on every turn. Server default backfills
    existing rows on upgrade. Reversible downgrade defined.

  - alembic/versions/0013_answer_cache.py (new): creates the answer_cache
    table with (id, repository_id, question_hash, question, payload_json,
    provider_name, model_name, created_at), a unique constraint on
    (repository_id, question_hash), an index on repository_id, and a
    CASCADE foreign key to repositories so dropping a repo cleans up its
    cache automatically. Pure CREATE TABLE — no impact on existing data.
    Reversible downgrade defined.

  - core/persistence/models.py: adds the Page.summary column and the
    AnswerCache ORM model matching the migrations above.

  - core/persistence/crud.py: helpers for upserting page summaries and
    reading/writing AnswerCache rows.

  Existing MCP tools
  ------------------
  - mcp_server/tool_context.py: get_context now defaults to compact=True.
    Compact mode drops the structure block, the imported_by list, and
    per-symbol docstring/end_line fields, keeping responses under ~10K
    characters on dense files. Pass compact=False to get the full payload
    on demand. Docstring trimmed to clean tool documentation. Internal
    Fallback labels relabeled in plain English.

  - mcp_server/tool_search.py: docstring expanded into clean tool
    documentation; behaviour unchanged.

  - mcp_server/tool_risk.py: cleanup pass; behaviour unchanged.

  - server/chat_tools.py and docstring counts: updated to 10 tools.

  Ingestion / generation
  ----------------------
  - core/generation/page_generator.py: _is_significant_file() now treats
    any file tagged is_test=True (with at least one extracted symbol) as
    significant, regardless of PageRank. Test files have near-zero
    centrality because nothing imports them back, but they answer
    "what test exercises X" / "where is Y verified" questions and the
    doc layer is the right place to surface those. Filtering remains
    available via --skip-tests.

  - core/ingestion/traverser.py: removes the workaround that excluded
    tests/, test/, spec/, specs/, __tests__ from the traversal. The
    underlying pagerank-inflation bug it guarded against is fixed in
    graph.py via the deterministic stem-priority disambiguation
    (_stem_priority / _build_stem_map), so test files can now be
    indexed safely while still being tagged is_test=True for downstream
    filtering.

  - core/ingestion/graph.py: prose cleanup in the stem-priority docstring
    and _build_stem_map; explains the test-fixture-named-like-the-package
    failure mode in neutral terms. Framework-aware synthetic-edge code
    (_add_conftest_edges, _add_django_edges, _add_fastapi_edges,
    _add_flask_edges, dispatched by add_framework_edges(tech_stack))
    is unchanged.

  - core/ingestion/parser.py, core/generation/models.py: small cleanups
    feeding the new wiki_pages.summary field through the generation
    pipeline.

  CLI
  ---
  - cli/main.py: minor wiring for the new tools and the compact default.

  Tests
  -----
  - tests/unit/server/test_tool_symbol.py (new): unit tests for
    _resolve_symbol covering separator-style mismatches between
    Class.method and Class::method and MultipleResultsFound handling
    on duplicate lookup keys.
  - tests/unit/server/test_mcp.py: counter and fixture updates for the
    10-tool surface.
  - tests/unit/ingestion/test_graph.py: fixture updates around the
    stem-priority cleanup.

  Docs
  ----
  - README.md: bumps "Eight MCP tools" → "Ten MCP tools" in the headline,
    abstract, comparison table, and competitor matrix; adds get_answer,
    get_symbol, and compact-default rows to the tool table; documents
    the test-files-in-wiki and single-call-answer additions in the
    "What's new" section.
  - docs/ARCHITECTURE.md: schema table now lists the summary column on
    wiki_pages and the new answer_cache table; the page-generator
    section documents the test-file inclusion rule; references to "8
    tools" updated to 10.
  - docs/CHANGELOG.md: Unreleased Added entries for get_answer,
    get_symbol, the two migrations, and test-file indexing; Changed
    entry for the get_context compact default.
  - docs/USER_GUIDE.md: tool table updated to 10 entries.
  - docs/architecture-guide.md, docs/CHAT.md: tool counts updated.
  - packages/server/README.md, plugins/claude-code/DEVELOPER.md,
    website/index.md, website/concepts.md, website/mcp-server.md,
    website/claude-md-generator.md: tool counts updated; mcp-server.md
    gains full sections (parameters, returns, examples) for get_answer
    and get_symbol and documents the new compact parameter on
    get_context.

  Verified
  --------
  Ran `repowise init --index-only` end-to-end against pallets/flask:
  125 files, 1,624 symbols, 125 nodes, 241 edges (191 imports + 28
  framework + 22 dynamic), 8 languages, 14 hotspots, 13 dead-code
  findings. SQL audit confirmed both new migrations applied
  (answer_cache table present; wiki_pages.summary column present),
  test files contributed 920 symbols, and conftest framework edges
  fired. Live MCP-tool checks against the full-mode wiki: get_symbol
  resolved src/flask/app.py::Flask to its source body and signature
  across lines 109–508; get_context returned the LLM summary without
  the structure / imported_by blocks (compact default); get_answer
  ran retrieval, hit the dominance gate at 1.07× < 1.2×, and correctly
  returned ranked excerpts instead of synthesising a wrong frame.
---
 README.md                                     |  27 +-
 docs/ARCHITECTURE.md                          |  21 +-
 docs/CHANGELOG.md                             |   6 +
 docs/CHAT.md                                  |   4 +-
 docs/USER_GUIDE.md                            |   6 +-
 docs/architecture-guide.md                    |   4 +-
 packages/cli/src/repowise/cli/main.py         |   6 +
 .../alembic/versions/0012_page_summary.py     |  35 +
 .../alembic/versions/0013_answer_cache.py     |  60 ++
 .../src/repowise/core/generation/models.py    |   4 +
 .../core/generation/page_generator.py         |  50 +-
 .../core/src/repowise/core/ingestion/graph.py | 138 ++-
 .../src/repowise/core/ingestion/parser.py     |  30 +-
 .../src/repowise/core/ingestion/traverser.py  |  19 +
 .../src/repowise/core/persistence/crud.py     |   4 +
 .../src/repowise/core/persistence/models.py   |  43 +
 packages/server/README.md                     |   4 +-
 .../server/src/repowise/server/chat_tools.py  |   2 +-
 .../repowise/server/mcp_server/__init__.py    |   6 +-
 .../src/repowise/server/mcp_server/_meta.py   | 137 +++
 .../repowise/server/mcp_server/tool_answer.py | 809 ++++++++++++++++++
 .../server/mcp_server/tool_context.py         | 459 ++++++++--
 .../repowise/server/mcp_server/tool_risk.py   |  11 +-
 .../repowise/server/mcp_server/tool_search.py |  12 +-
 .../repowise/server/mcp_server/tool_symbol.py | 368 ++++++++
 plugins/claude-code/DEVELOPER.md              |   2 +-
 tests/unit/ingestion/test_graph.py            | 108 +++
 tests/unit/server/test_mcp.py                 |  89 ++
 tests/unit/server/test_tool_symbol.py         | 168 ++++
 website/claude-md-generator.md                |   2 +-
 website/concepts.md                           |   2 +-
 website/index.md                              |   2 +-
 website/mcp-server.md                         |  55 +-
 33 files changed, 2565 insertions(+), 128 deletions(-)
 create mode 100644 packages/core/alembic/versions/0012_page_summary.py
 create mode 100644 packages/core/alembic/versions/0013_answer_cache.py
 create mode 100644 packages/server/src/repowise/server/mcp_server/_meta.py
 create mode 100644 packages/server/src/repowise/server/mcp_server/tool_answer.py
 create mode 100644 packages/server/src/repowise/server/mcp_server/tool_symbol.py
 create mode 100644 tests/unit/server/test_tool_symbol.py

diff --git a/README.md b/README.md
index a887b3b..8c579f5 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 <img src=".github/assets/logo.png" width="280" alt="repowise" /><br />
 **Codebase intelligence for AI-assisted engineering teams.**
 
-Four intelligence layers. Eight MCP tools. One `pip install`.
+Four intelligence layers. Ten MCP tools. One `pip install`.
 
 [![PyPI version](https://img.shields.io/pypi/v/repowise?color=F59520&labelColor=0A0A0A)](https://pypi.org/project/repowise/)
 [![License: AGPL v3](https://img.shields.io/badge/license-AGPL--v3-F59520?labelColor=0A0A0A)](https://www.gnu.org/licenses/agpl-3.0)
@@ -23,7 +23,7 @@ Four intelligence layers. Eight MCP tools. One `pip install`.
 
 When Claude Code reads a 3,000-file codebase, it reads files. It does not know who owns them, which ones change together, which ones are dead, or why they were built the way they were.
 
-repowise fixes that. It indexes your codebase into four intelligence layers — dependency graph, git history, auto-generated documentation, and architectural decisions — and exposes them to Claude Code (and any MCP-compatible AI agent) through eight precisely designed tools.
+repowise fixes that. It indexes your codebase into four intelligence layers — dependency graph, git history, auto-generated documentation, and architectural decisions — and exposes them to Claude Code (and any MCP-compatible AI agent) through ten precisely designed tools.
 
 The result: Claude Code answers *"why does auth work this way?"* instead of *"here is what auth.ts contains."*
 
@@ -48,6 +48,15 @@ The dependency graph now captures edges that pure AST parsing misses:
 
 These edges appear in `get_context`, `get_risk`, and `get_dependency_path` like any other dependency.
 
+### Single-call answers via `get_answer`
+A new `get_answer(question)` MCP tool collapses the typical "search → read → reason" loop into one call. It runs retrieval over the wiki, gates on confidence (top-hit dominance ratio), and synthesizes a 2–5 sentence answer with concrete file/symbol citations. High-confidence answers can be cited directly; ambiguous ones return ranked excerpts so the agent grounds in source. Responses are cached per repository by question hash, so repeated questions cost nothing.
+
+### Symbol lookup via `get_symbol`
+A new `get_symbol(symbol_id)` MCP tool resolves a fully-qualified symbol identifier (e.g. `pkg/module.py::Class::method`) to its definition, returning the source body, signature, file location, and any cross-referenced docstring — without the agent having to grep then read.
+
+### Test files in the documentation layer
+The page generator now treats test files as first-class wiki targets. They have near-zero PageRank (nothing imports them back) but answer real questions like "what test exercises X" or "where is Y verified", which the doc layer is the right place to surface. Filtering remains available via `skip_tests` for users who prefer to exclude them.
+
 ### Temporal hotspot decay
 Hotspot scoring now uses an exponentially time-decayed score with a 180-day half-life layered on top of the raw 90-day churn count. A commit from a year ago contributes roughly 25% as much as a commit from today. The score reflects recent activity, not just total volume. Surfaced in `get_overview` and `get_risk`.
 
@@ -55,7 +64,7 @@ Hotspot scoring now uses an exponentially time-decayed score with a 180-day half
 Incremental updates now recompute global percentile ranks for every file using a single `PERCENT_RANK()` SQL window function. Previously this required loading all rows into Python. The new approach is both faster and correct on large repos — no sampling, no approximation.
 
 ### PR blast radius
-`get_risk(changed_files=[...])` now returns a full blast-radius report: transitive affected files, co-change warnings for historical co-change partners not included in the PR, recommended reviewers ranked by temporal ownership, test gap detection, and an overall 0–10 risk score. Same eight tools — substantially more signal per call.
+`get_risk(changed_files=[...])` now returns a full blast-radius report: transitive affected files, co-change warnings for historical co-change partners not included in the PR, recommended reviewers ranked by temporal ownership, test gap detection, and an overall 0–10 risk score. Same flat tool surface — substantially more signal per call.
 
 ### Knowledge map in `get_overview`
 `get_overview` now surfaces: top owners across the codebase, "bus factor 1" knowledge silos (files where one person owns >80% of commits), and onboarding targets — high-centrality files with the weakest documentation coverage. Useful for team planning and risk review.
@@ -128,17 +137,19 @@ Add to your Claude Code config (`~/.claude/claude_desktop_config.json`):
 
 ---
 
-## Eight MCP tools
+## Ten MCP tools
 
 Most tools are designed around data entities — one module, one file, one symbol — which forces AI agents into long chains of sequential calls. repowise tools are designed around **tasks**. Pass multiple targets in one call. Get complete context back.
 
 | Tool | What it answers | When Claude Code calls it |
 |---|---|---|
+| `get_answer(question)` | One-call RAG: retrieves over the wiki, gates on confidence, and synthesizes a cited 2–5 sentence answer. High-confidence answers cite directly; ambiguous queries return ranked excerpts. Responses are cached per repository by question hash. | First call on any code question — collapses search → read → reason into one round-trip |
+| `get_symbol(symbol_id)` | Resolves a qualified symbol id (`path::Class::method`) to its source body, signature, and docstring | When the question names a specific class, function, or method |
 | `get_overview()` | Architecture summary, module map, entry points | First call on any unfamiliar codebase |
-| `get_context(targets, include?)` | Docs, ownership, decisions, freshness for any targets — files, modules, or symbols | Before reading or modifying code. Pass all relevant targets in one call. |
+| `get_context(targets, include?, compact?)` | Docs, ownership, decisions, freshness for any targets — files, modules, or symbols. `compact=True` is the default and bounds the response to ~10K characters; pass `compact=False` for the full structure block, importer list, and per-symbol docstrings | Before reading or modifying code. Pass all relevant targets in one call. |
 | `get_risk(targets?, changed_files?)` | Hotspot scores, dependents, co-change partners, blast radius, recommended reviewers, test gaps, security signals, 0–10 risk score | Before modifying files — understand what could break |
 | `get_why(query?)` | Three modes: NL search over decisions · path-based decisions for a file · no-arg health dashboard | Before architectural changes — understand existing intent |
-| `search_codebase(query)` | Semantic search over the full wiki. Natural language. | When you don't know where something lives |
+| `search_codebase(query)` | Semantic search over the full wiki. Natural language. | When `get_answer` returned low confidence and you need to discover candidate pages by topic |
 | `get_dependency_path(from, to)` | Connection path between two files, modules, or symbols | When tracing how two things are connected |
 | `get_dead_code(min_confidence?, include_internals?, include_zombie_packages?)` | Unreachable code sorted by confidence and cleanup impact | Cleanup tasks |
 | `get_architecture_diagram(module?)` | Mermaid diagram for the repo or a specific module | Documentation and presentation |
@@ -151,7 +162,7 @@ Most tools are designed around data entities — one module, one file, one symbo
 |---|---|---|---|
 | Claude Code alone (no MCP) | grep + read ~30 files | ~8 min | Ownership, prior decisions, hidden coupling |
 | repowise (old 16-tool design) | 16 sequential calls | ~15 min | Nothing — but slow |
-| **repowise (8 tools)** | **5 calls** | **~2 min** | **Nothing** |
+| **repowise (10 tools)** | **5 calls** | **~2 min** | **Nothing** |
 
 The 5 calls for that task:
 
@@ -325,7 +336,7 @@ When a senior engineer leaves, the "why" usually leaves with them. Decision inte
 | Git intelligence (hotspots, ownership, co-changes) | ✅ | ❌ | ❌ | ❌ | ✅ |
 | Bus factor analysis | ✅ | ❌ | ❌ | ❌ | ✅ |
 | Architectural decision records | ✅ | ❌ | ❌ | ❌ | ❌ |
-| MCP server for AI agents | ✅ 8 tools | ❌ | ✅ 3 tools | ✅ | ✅ |
+| MCP server for AI agents | ✅ 10 tools | ❌ | ✅ 3 tools | ✅ | ✅ |
 | Auto-generated CLAUDE.md | ✅ | ❌ | ❌ | ❌ | ❌ |
 | Doc freshness scoring | ✅ | ❌ | ❌ | ⚠️ staleness only | ❌ |
 | Incremental updates on commit | ✅ <30s | ✅ | ❌ | ✅ | ✅ |
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index c93f737..9025c3c 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -16,7 +16,7 @@ For per-package detail (installation, full API reference, all CLI flags, file ma
 |---------|--------|----------------|
 | `packages/core` | [`packages/core/README.md`](../packages/core/README.md) | Ingestion, generation, persistence, providers — all key classes with code examples |
 | `packages/cli` | [`packages/cli/README.md`](../packages/cli/README.md) | All 10 CLI commands with every flag documented |
-| `packages/server` | [`packages/server/README.md`](../packages/server/README.md) | All REST API endpoints, 8 MCP tools, webhook setup, scheduler jobs |
+| `packages/server` | [`packages/server/README.md`](../packages/server/README.md) | All REST API endpoints, 10 MCP tools, webhook setup, scheduler jobs |
 | `packages/web` | [`packages/web/README.md`](../packages/web/README.md) | Every frontend file with purpose — API client, hooks, components, pages |
 
 ---
@@ -78,7 +78,7 @@ For per-package detail (installation, full API reference, all CLI flags, file ma
 │      Three Stores     │   │              Consumers                  │
 │                      │   │                                         │
 │  SQL (wiki pages,    │   │  Web UI     MCP Server   GitHub Action  │
-│  jobs, symbols,      │   │  (Next.js)  (9 tools)    (CI/CD)        │
+│  jobs, symbols,      │   │  (Next.js)  (10 tools)   (CI/CD)        │
 │  versions)           │   │                                         │
 │                      │   │  repowise CLI                           │
 │  Vector (LanceDB /   │   │  (init, update, watch,                  │
@@ -167,7 +167,7 @@ repowise/
 │   ├── server/                 # Python: FastAPI REST API + MCP server
 │   │   └── src/repowise/server/
 │   │       ├── routers/         # FastAPI routers (repos, pages, jobs, symbols, graph, git, dead-code, decisions, search, claude-md)
-│   │       ├── mcp_server/      # MCP server package (8 tools, split into focused modules)
+│   │       ├── mcp_server/      # MCP server package (10 tools, split into focused modules)
 │   │       ├── webhooks/        # GitHub + GitLab handlers
 │   │       ├── job_executor.py  # Background pipeline executor — bridges REST endpoints to core pipeline
 │   │       └── scheduler.py     # APScheduler background jobs
@@ -219,9 +219,10 @@ Key tables:
 | Table | Purpose |
 |-------|---------|
 | `repos` | Registered repositories, sync state, provider config |
-| `wiki_pages` | All generated wiki pages with content, metadata, confidence score |
+| `wiki_pages` | All generated wiki pages with content, metadata, confidence score, and a short LLM-extracted `summary` (1–3 sentences) used by `get_context` to keep responses bounded |
 | `page_versions` | Full version history of every page (for diff view) |
 | `symbols` | Symbol index: every function, class, method across all files |
+| `answer_cache` | Memoised `get_answer` responses keyed by `(repository_id, question_hash)` plus the provider/model used. Repeated questions return at zero LLM cost; cache entries are invalidated by repository re-indexing. |
 | `generation_jobs` | Job state machine with checkpoint fields for resumability |
 | `webhook_events` | Every received webhook event (deduplication, audit, retry) |
 | `symbol_rename_history` | Detected renames for auditing and targeted text patching |
@@ -424,6 +425,14 @@ cross-package edges tracked in the graph.
 Each `FileInfo` is tagged with: `language`, `is_test`, `is_config`, `is_api_contract`,
 `is_entry_point`, `git_hash`. These tags influence generation priority and prompt choice.
 
+**Test files are first-class wiki targets.** The page generator includes any file
+tagged `is_test=True` that has at least one extracted symbol, even if the file's
+PageRank is near zero (which is typical: nothing imports test files back, so
+graph-centrality metrics never select them on their own). Test files answer
+questions of the form *"what test exercises X"* / *"where is Y verified"*, and
+the doc layer is the right place to surface those. Users who want to exclude
+tests from the wiki entirely can pass `--skip-tests` to `repowise init`.
+
 ### 5.2 AST Parsing
 
 `ASTParser` is a single class that handles all supported languages. There are no
@@ -1103,7 +1112,7 @@ file, tokens used, estimated cost, estimated time remaining).
 repowise includes an interactive chat interface that lets users ask questions about
 their codebase and receive answers grounded in the wiki, dependency graph, git
 history, and architectural decisions. The chat agent uses whichever LLM provider
-the user has configured and has access to all 8 MCP tools.
+the user has configured and has access to all 10 MCP tools.
 
 See [`docs/CHAT.md`](CHAT.md) for the full technical reference covering the
 backend agentic loop, SSE streaming protocol, provider abstraction extensions,
@@ -1114,7 +1123,7 @@ database schema, frontend component architecture, and artifact rendering system.
 - **Provider-agnostic** — the chat agent goes through the same provider abstraction
   as documentation generation. A `ChatProvider` protocol extends `BaseProvider` with
   `stream_chat()` for streaming + tool use without breaking existing callers.
-- **Tool reuse** — the 8 MCP tools are called directly as Python functions (no
+- **Tool reuse** — the 10 MCP tools are called directly as Python functions (no
   subprocess round-trip). Tool schemas are defined once in `chat_tools.py` and
   fed to both the LLM and the executor.
 - **SSE streaming** — `POST /api/repos/{repo_id}/chat/messages` runs the agentic
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 4614ab6..194d7dd 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -12,6 +12,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 
 ### Added
+- **`get_answer` MCP tool** (`tool_answer.py`) — single-call RAG over the wiki layer. Runs retrieval, gates synthesis on top-hit dominance ratio, and returns a 2–5 sentence answer with concrete file/symbol citations plus a `confidence` label. High-confidence responses can be cited directly without verification reads. Backed by an `AnswerCache` table so repeated questions on the same repository cost nothing on the second call.
+- **`get_symbol` MCP tool** (`tool_symbol.py`) — resolves a fully-qualified symbol id (`path::Class::method`, also accepts `Class.method`) to its source body, signature, file location, line range, and docstring. Returns the rich source-line signature (with base classes, decorators, and full type annotations preserved) instead of the stripped DB form.
+- **`Page.summary` column** — short LLM-extracted summary (1–3 sentences) attached to every wiki page during generation. Used by `get_context` to keep context payloads bounded on dense files. Added by alembic migration `0012_page_summary`.
+- **`AnswerCache` table** — memoised `get_answer` responses keyed by `(repository_id, question_hash)` plus the provider/model used. Added by alembic migration `0013_answer_cache`. Cache entries are repository-scoped and invalidated by re-indexing.
+- **Test files in the wiki** — `page_generator._is_significant_file()` now treats any file tagged `is_test=True` (with at least one extracted symbol) as significant, regardless of PageRank. Test files have near-zero centrality because nothing imports them back, but they answer "what test exercises X" / "where is Y verified" questions; the doc layer is the right place to surface those. Filtering remains available via `--skip-tests`.
 - **Overview dashboard** (`/repos/[id]/overview`) — new landing page for each repository with:
   - Health score ring (composite of doc coverage, freshness, dead code, hotspot density, silo risk)
   - Attention panel highlighting items needing action (stale docs, high-risk hotspots, dead code)
@@ -27,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **Health score utility** (`web/src/lib/utils/health-score.ts`) — composite health score computation, attention item builder, and language aggregation for the overview dashboard
 
 ### Changed
+- **`get_context` default is now `compact=True`** — drops the `structure` block, the `imported_by` list, and per-symbol docstring/end-line fields to keep the response under ~10K characters. Pass `compact=False` for the full payload (e.g. when you specifically need import-graph dependents on a large file).
 - `init_cmd.py` refactored to use shared `persist_pipeline_result()` instead of inline persistence logic
 - Pipeline orchestrator uses async-friendly patterns to keep the event loop responsive during ingestion
 - Sidebar and mobile nav updated to include "Overview" link
diff --git a/docs/CHAT.md b/docs/CHAT.md
index ae84b96..4e9d616 100644
--- a/docs/CHAT.md
+++ b/docs/CHAT.md
@@ -2,7 +2,7 @@
 
 The codebase chat feature lets users have an interactive conversation with their
 codebase. The agent uses whichever LLM provider the user has configured, has
-access to all 8 MCP tools, and streams responses back to the browser in real time
+access to all 10 MCP tools, and streams responses back to the browser in real time
 showing tool calls as they happen and rendering results in an artifact panel.
 
 ---
@@ -158,7 +158,7 @@ class ChatProvider(Protocol):
 
 Defined in `packages/server/src/repowise/server/chat_tools.py`.
 
-Single source of truth for tool schemas and execution. Imports the 8 MCP tool
+Single source of truth for tool schemas and execution. Imports the 10 MCP tool
 functions directly from `repowise.server.mcp_server`.
 
 ```python
diff --git a/docs/USER_GUIDE.md b/docs/USER_GUIDE.md
index 0625b96..665eb79 100644
--- a/docs/USER_GUIDE.md
+++ b/docs/USER_GUIDE.md
@@ -315,12 +315,14 @@ This is how you connect repowise to Claude Code, Cursor, Cline, Windsurf, and ot
 | `--transport` | Protocol: `stdio` (default, for editors) or `sse` (for web clients) |
 | `--port` | Port for SSE transport (default: 7338) |
 
-**MCP tools exposed (8 tools):**
+**MCP tools exposed (10 tools):**
 
 | Tool | What it does |
 |------|-------------|
+| `get_answer` | One-call RAG: confidence-gated synthesis over the wiki, with cited 2–5 sentence answers and a per-repository question cache |
+| `get_symbol` | Resolve a qualified symbol id (`path::Class::method`) to its source body, signature, and docstring |
 | `get_overview` | Repository architecture summary, key modules, entry points, git health |
-| `get_context` | Complete context for files/modules/symbols — docs, ownership, decisions, freshness |
+| `get_context` | Complete context for files/modules/symbols — docs, ownership, decisions, freshness. Defaults to `compact=True`; pass `compact=False` for the full structure block and importer list. |
 | `get_risk` | Modification risk assessment — hotspot score, dependents, bus factor, trend |
 | `get_why` | Why code is structured the way it is — architectural decisions, git archaeology |
 | `search_codebase` | Semantic search over wiki with git freshness boosting |
diff --git a/docs/architecture-guide.md b/docs/architecture-guide.md
index e17dea3..1b0ed3c 100644
--- a/docs/architecture-guide.md
+++ b/docs/architecture-guide.md
@@ -890,7 +890,7 @@ The chat endpoint runs an agentic loop where the LLM can call Repowise tools:
 User: "How does auth work in this codebase?"
      │
      ▼
-  LLM receives: system prompt (with repo context) + 8 tool schemas
+  LLM receives: system prompt (with repo context) + 10 tool schemas
      │
      ▼
   Iteration 1: LLM calls search_codebase("authentication")
@@ -914,7 +914,7 @@ Max 10 iterations per request. Streamed via SSE (Server-Sent Events).
 
 ## 8. MCP Tools
 
-MCP (Model Context Protocol) lets AI coding assistants (Claude Code, Cursor, Windsurf, Cline) call Repowise tools directly. There are 8 tools, each answering a specific question.
+MCP (Model Context Protocol) lets AI coding assistants (Claude Code, Cursor, Windsurf, Cline) call Repowise tools directly. There are 10 tools, each answering a specific question.
 
 ### Tool 1: `get_overview` — "What is this codebase?"
 
diff --git a/packages/cli/src/repowise/cli/main.py b/packages/cli/src/repowise/cli/main.py
index 7fa3650..2cc741d 100644
--- a/packages/cli/src/repowise/cli/main.py
+++ b/packages/cli/src/repowise/cli/main.py
@@ -41,3 +41,9 @@ def cli() -> None:
 cli.add_command(serve_command)
 cli.add_command(mcp_command)
 cli.add_command(reindex_command)
+
+
+if __name__ == "__main__":
+    # Allow `python -m repowise.cli.main` (used by repowise-bench when running
+    # against a local source checkout instead of a pip-installed package).
+    cli()
diff --git a/packages/core/alembic/versions/0012_page_summary.py b/packages/core/alembic/versions/0012_page_summary.py
new file mode 100644
index 0000000..aecfc72
--- /dev/null
+++ b/packages/core/alembic/versions/0012_page_summary.py
@@ -0,0 +1,35 @@
+"""Add summary column to wiki_pages.
+
+Stores a 1–3 sentence purpose blurb per page so MCP get_context can return
+narrative file-level descriptions without shipping the full content_md to the
+agent on every turn. Always populated (LLM-extracted in full mode, deterministic
+in index-only mode).
+
+Revision ID: 0012
+Revises: 0011
+Create Date: 2026-04-08
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers
+revision: str = "0012"
+down_revision: str | None = "0011"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "wiki_pages",
+        sa.Column("summary", sa.Text(), nullable=False, server_default=""),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("wiki_pages", "summary")
diff --git a/packages/core/alembic/versions/0013_answer_cache.py b/packages/core/alembic/versions/0013_answer_cache.py
new file mode 100644
index 0000000..25518f5
--- /dev/null
+++ b/packages/core/alembic/versions/0013_answer_cache.py
@@ -0,0 +1,60 @@
+"""Add answer_cache table for get_answer LLM synthesis caching.
+
+Caches the full JSON payload of a get_answer response keyed by repository
+and question hash. Repeat questions from the agent return zero-LLM-cost
+hits.
+
+Revision ID: 0013
+Revises: 0012
+Create Date: 2026-04-08
+"""
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# revision identifiers
+revision: str = "0013"
+down_revision: str | None = "0012"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_table(
+        "answer_cache",
+        sa.Column("id", sa.String(32), primary_key=True),
+        sa.Column(
+            "repository_id",
+            sa.String(32),
+            sa.ForeignKey("repositories.id", ondelete="CASCADE"),
+            nullable=False,
+        ),
+        sa.Column("question_hash", sa.String(64), nullable=False),
+        sa.Column("question", sa.Text(), nullable=False),
+        sa.Column("payload_json", sa.Text(), nullable=False),
+        sa.Column("provider_name", sa.String(64), nullable=False, server_default=""),
+        sa.Column("model_name", sa.String(128), nullable=False, server_default=""),
+        sa.Column(
+            "created_at",
+            sa.DateTime(timezone=True),
+            nullable=False,
+            server_default=sa.func.now(),
+        ),
+        sa.UniqueConstraint(
+            "repository_id", "question_hash", name="uq_answer_cache_q"
+        ),
+    )
+    op.create_index(
+        "ix_answer_cache_repo",
+        "answer_cache",
+        ["repository_id"],
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_answer_cache_repo", table_name="answer_cache")
+    op.drop_table("answer_cache")
diff --git a/packages/core/src/repowise/core/generation/models.py b/packages/core/src/repowise/core/generation/models.py
index 5630ec7..f326b36 100644
--- a/packages/core/src/repowise/core/generation/models.py
+++ b/packages/core/src/repowise/core/generation/models.py
@@ -130,6 +130,10 @@ class GeneratedPage:
     confidence: float = 1.0
     freshness_status: str = "fresh"  # FreshnessStatus literal
     metadata: dict[str, object] = field(default_factory=dict)
+    # 1–3 sentence purpose blurb extracted from the rendered content. Used by
+    # MCP get_context as the default narrative payload (content is gated behind
+    # include=["full_doc"]).
+    summary: str = ""
 
     @property
     def total_tokens(self) -> int:
diff --git a/packages/core/src/repowise/core/generation/page_generator.py b/packages/core/src/repowise/core/generation/page_generator.py
index 4f0c9d3..97f5050 100644
--- a/packages/core/src/repowise/core/generation/page_generator.py
+++ b/packages/core/src/repowise/core/generation/page_generator.py
@@ -964,6 +964,7 @@ def _build_generated_page(
             page_type=page_type,
             title=title,
             content=response.content,
+            summary=_extract_summary(response.content),
             source_hash=source_hash,
             model_name=self._provider.model_name,
             provider_name=self._provider.provider_name,
@@ -987,6 +988,40 @@ def _render(self, template_name: str, **kwargs: Any) -> str:
 # ---------------------------------------------------------------------------
 
 
+def _extract_summary(content: str, max_chars: int = 320) -> str:
+    """Extract a 1–3 sentence purpose blurb from rendered wiki markdown.
+
+    Strategy: walk lines top-to-bottom, skip blanks/headings/list-markers/HTML
+    comments, and take the first prose paragraph. Truncate at sentence boundary
+    near max_chars. Fully deterministic — no extra LLM call.
+    """
+    if not content:
+        return ""
+    para_lines: list[str] = []
+    for raw in content.splitlines():
+        line = raw.strip()
+        if not line:
+            if para_lines:
+                break
+            continue
+        if line.startswith(("#", ">", "```", "---", "<!--", "|", "- ", "* ", "1.")):
+            if para_lines:
+                break
+            continue
+        para_lines.append(line)
+    if not para_lines:
+        return ""
+    text = " ".join(para_lines)
+    if len(text) <= max_chars:
+        return text
+    # Truncate at the last sentence boundary before max_chars
+    cut = text[:max_chars]
+    last_period = max(cut.rfind(". "), cut.rfind("? "), cut.rfind("! "))
+    if last_period > max_chars // 2:
+        return cut[: last_period + 1]
+    return cut.rstrip() + "…"
+
+
 def _is_infra_file(parsed: ParsedFile) -> bool:
     """Return True if the file is an infrastructure file."""
     lang = parsed.file_info.language
@@ -1020,17 +1055,26 @@ def _is_significant_file(
     bet = betweenness.get(path, 0.0)
     is_entry = parsed.file_info.is_entry_point
 
-    # F3: package __init__.py files are module interfaces — always include
+    # Package __init__.py files are module interfaces — always include them
     # if they have any symbols (re-exports, __getattr__, etc.)
     if path.endswith("__init__.py") and len(parsed.symbols) > 0:
         return True
 
+    # Test files are always significant when present. They have near-zero
+    # PageRank because nothing imports them back, but they answer "what
+    # tests exercise X" / "where is Y verified" questions that the doc layer
+    # is the right place to surface. Users who want to exclude tests
+    # entirely can do so via skip_tests in the orchestrator upstream.
+    if parsed.file_info.is_test and len(parsed.symbols) > 0:
+        return True
+
     # Must appear significant in the graph
     if not (is_entry or pr >= pr_threshold or bet > 0.0):
         return False
 
-    # F2: waive symbol requirement for connected files with no original
-    # definitions (e.g. state/config modules imported by many files)
+    # Waive the symbol-count requirement for graph-connected files that have
+    # no original definitions of their own (e.g. state/config modules that
+    # are imported by many files but mostly re-export or assemble values).
     if len(parsed.symbols) < config.file_page_min_symbols:
         return is_entry or pr >= pr_threshold
 
diff --git a/packages/core/src/repowise/core/ingestion/graph.py b/packages/core/src/repowise/core/ingestion/graph.py
index d9d21df..b0e9baf 100644
--- a/packages/core/src/repowise/core/ingestion/graph.py
+++ b/packages/core/src/repowise/core/ingestion/graph.py
@@ -34,6 +34,73 @@
 
 _LARGE_REPO_THRESHOLD = 30_000  # nodes — above this, algorithms are expensive
 
+# Path segments that mark a file as low-value for stem-based import resolution.
+# Files under these directories lose stem-collision tiebreaks against equivalents
+# in the canonical source tree (e.g. a `flask.py` test fixture will never beat
+# `src/flask/__init__.py` for the import-stem "flask"). The list is intentionally
+# language-agnostic — it captures the universal convention that fixture, example,
+# and script trees shadow rather than replace library code.
+_LOW_VALUE_PATH_SEGMENTS = frozenset(
+    {
+        "tests",
+        "test",
+        "_tests",
+        "__tests__",
+        "testing",
+        "test_apps",
+        "testdata",
+        "test_data",
+        "fixtures",
+        "examples",
+        "example",
+        "samples",
+        "sample",
+        "scripts",
+        "benchmarks",
+        "bench",
+        "docs",
+        "doc",
+    }
+)
+
+
+def _stem_priority(path: str, stem: str) -> tuple[int, int, int, str]:
+    """Sort key for choosing among files that share an import stem.
+
+    Lower tuples sort first; callers take ``candidates[0]`` as the resolution.
+    The ordering is deliberately language-agnostic so the same logic governs
+    Python, Go, C/C++, and the generic fallback in :meth:`_resolve_import`.
+
+    Fields, in priority order:
+
+    1. **Parent-directory match.** A file whose parent directory equals the
+       stem is almost always the canonical home for that name across every
+       package layout we care about — ``src/flask/__init__.py`` for stem
+       ``flask``, ``pkg/foo/foo.go`` for stem ``foo``, ``include/json/json.h``
+       for stem ``json``. Strongest single signal we have.
+    2. **Low-value path.** Files under fixture/example/script/doc trees lose
+       to equivalents in the source tree. This catches the failure mode
+       where a test fixture named identically to the package (e.g.
+       ``tests/.../<pkg>.py``) would otherwise win the stem-collision
+       tiebreak and inflate its PageRank by absorbing the entire library's
+       in-edges.
+    3. **Path depth.** Canonical package roots live shallow; deep nesting
+       usually means a vendored copy or a sub-fixture.
+    4. **Lexicographic path.** Deterministic tiebreak so resolution is
+       independent of dict iteration order — critical for reproducible
+       graphs across re-indexes and platforms.
+    """
+    path_obj = Path(path)
+    parts = path_obj.parts
+    if path_obj.name == "__init__.py":
+        # Registered under parent dir name — parent-matching by construction.
+        parent_match = 0
+    else:
+        parent_dir = parts[-2].lower() if len(parts) >= 2 else ""
+        parent_match = 0 if parent_dir == stem else 1
+    low_value = 1 if any(seg.lower() in _LOW_VALUE_PATH_SEGMENTS for seg in parts) else 0
+    return (parent_match, low_value, len(parts), path)
+
 
 class GraphBuilder:
     """Build a dependency graph from a collection of ParsedFile objects.
@@ -82,11 +149,7 @@ def build(self) -> nx.DiGraph:
 
         # Build lookup tables for import resolution
         path_set = set(self._parsed_files.keys())
-        # stem_map: "calculator" → "python_pkg/calculator.py"
-        stem_map: dict[str, str] = {}
-        for p in path_set:
-            stem = Path(p).stem.lower()
-            stem_map[stem] = p
+        stem_map = self._build_stem_map(path_set)
 
         for path, parsed in self._parsed_files.items():
             for imp in parsed.imports:
@@ -335,12 +398,56 @@ def _extract_include_dirs(self, source_file: str) -> list[str]:
             result.append(str(p.resolve()))
         return result
 
+    def _build_stem_map(self, path_set: set[str]) -> dict[str, list[str]]:
+        """Map import-stems to candidate file paths, sorted best-first.
+
+        For Python ``__init__.py`` files the stem is the *parent directory
+        name*, since ``import flask`` resolves to ``src/flask/__init__.py``
+        and not to a file with literal stem ``__init__``. For every other
+        file the stem is the filename without extension. The same map is
+        consulted by Python, Go, C/C++, and the generic fallback in
+        :meth:`_resolve_import` — keeping all collision logic in one place
+        is what makes the resolver deterministic across languages.
+
+        On stem collisions (test fixtures, vendored copies, deep examples)
+        candidates are sorted by :func:`_stem_priority` so callers can take
+        ``candidates[0]`` and get the canonical resolution. The fix that
+        prevents test-fixture-named-like-the-package PageRank inflation
+        lives here, not in any per-directory exclusion list.
+
+        Complexity: O(N) build, plus O(k log k) per bucket of size k. Total
+        worst case O(N log N) when one stem dominates; in practice O(N).
+        """
+        buckets: dict[str, list[str]] = {}
+        for p in path_set:
+            path_obj = Path(p)
+            if path_obj.name == "__init__.py":
+                parent = path_obj.parent.name
+                if not parent:
+                    # Repo-root __init__.py — no meaningful key. Skip rather
+                    # than register under the empty stem.
+                    continue
+                stem = parent.lower()
+            else:
+                stem = path_obj.stem.lower()
+            buckets.setdefault(stem, []).append(p)
+
+        for stem, paths in buckets.items():
+            paths.sort(key=lambda candidate: _stem_priority(candidate, stem))
+        return buckets
+
+    @staticmethod
+    def _stem_lookup(stem_map: dict[str, list[str]], stem: str) -> str | None:
+        """Return the highest-priority path for ``stem``, or None."""
+        candidates = stem_map.get(stem)
+        return candidates[0] if candidates else None
+
     def _resolve_import(
         self,
         module_path: str,
         importer_path: str,
         path_set: set[str],
-        stem_map: dict[str, str],
+        stem_map: dict[str, list[str]],
         language: str,
     ) -> str | None:
         """Best-effort resolve of an import to a known file path."""
@@ -367,17 +474,26 @@ def _resolve_import(
                         return c
                 return None
             # Absolute import: "python_pkg.calculator" → "python_pkg/calculator.py"
+            # Try the obvious filesystem layouts in order. Modern Python
+            # packaging conventions place the package under "src/", so we
+            # check that prefix too — non-existent candidates are filtered
+            # by the path_set membership check, so adding more candidates
+            # is free of regressions.
             dotted = module_path.replace(".", "/")
             candidates = [
                 f"{dotted}.py",
                 f"{dotted}/__init__.py",
+                f"src/{dotted}.py",
+                f"src/{dotted}/__init__.py",
             ]
             for c in candidates:
                 if c in path_set:
                     return c
-            # Stem-only fallback
+            # Stem-only fallback — uses the deterministic priority from
+            # _build_stem_map so test fixtures named like the package
+            # cannot win against the canonical source file.
             stem = module_path.split(".")[-1].lower()
-            return stem_map.get(stem)
+            return self._stem_lookup(stem_map, stem)
 
         # --- TypeScript / JavaScript ---
         if language in ("typescript", "javascript"):
@@ -406,7 +522,7 @@ def _resolve_import(
         if language == "go":
             # Last segment of the import path is the package name
             stem = module_path.rsplit("/", 1)[-1].lower()
-            return stem_map.get(stem)
+            return self._stem_lookup(stem_map, stem)
 
         # --- C / C++ ---
         if language in ("cpp", "c"):
@@ -431,11 +547,11 @@ def _resolve_import(
                     pass
             # 3. Stem-matching fallback
             stem = Path(module_path).stem.lower()
-            return stem_map.get(stem)
+            return self._stem_lookup(stem_map, stem)
 
         # --- Generic fallback: stem matching ---
         stem = Path(module_path).stem.lower()
-        return stem_map.get(stem)
+        return self._stem_lookup(stem_map, stem)
 
     # ------------------------------------------------------------------
     # Co-change edges (Phase 5.5)
diff --git a/packages/core/src/repowise/core/ingestion/parser.py b/packages/core/src/repowise/core/ingestion/parser.py
index cae5605..8c4e35f 100644
--- a/packages/core/src/repowise/core/ingestion/parser.py
+++ b/packages/core/src/repowise/core/ingestion/parser.py
@@ -814,15 +814,24 @@ def _extract_symbol_docstring(def_node: Node, src: str, lang: str) -> str | None
 
 def _build_signature(node_type: str, name: str, params_text: str, def_node: Node, src: str) -> str:
     """Build a human-readable signature string."""
+    # Helper: try multiple field names for "return type", fall back gracefully.
+    def _ret(fields: tuple[str, ...]) -> str:
+        for f in fields:
+            n = def_node.child_by_field_name(f)
+            if n is not None:
+                return f" -> {_node_text(n, src)}"
+        return ""
+
     if node_type == "function_definition":
         # Detect async via child "async" keyword (tree-sitter-python >= 0.23)
         prefix = "async " if any(c.type == "async" for c in def_node.children) else ""
-        # Get return type annotation for Python
-        ret_node = def_node.child_by_field_name("return_type")
-        ret_text = f" -> {_node_text(ret_node, src)}" if ret_node else ""
-        return f"{prefix}def {name}{params_text}{ret_text}"
-    if node_type in ("function_declaration", "generator_function_declaration", "function_item"):
-        return f"function {name}{params_text}"
+        return f"{prefix}def {name}{params_text}{_ret(('return_type',))}"
+    if node_type == "function_item":
+        # Rust: return_type field
+        return f"fn {name}{params_text}{_ret(('return_type',))}"
+    if node_type in ("function_declaration", "generator_function_declaration"):
+        # TS/JS use return_type; Go uses result
+        return f"function {name}{params_text}{_ret(('return_type', 'result'))}"
     if node_type in ("class_definition", "class_declaration", "abstract_class_declaration"):
         base = f"class {name}"
         if params_text:
@@ -835,9 +844,14 @@ def _build_signature(node_type: str, name: str, params_text: str, def_node: Node
     if node_type == "enum_declaration":
         return f"enum {name}"
     if node_type == "method_definition":
-        return f"{name}{params_text}"
+        # TypeScript/JavaScript class method
+        return f"{name}{params_text}{_ret(('return_type',))}"
     if node_type == "method_declaration":
-        return f"func ({name}) method{params_text}"
+        # Go method: include receiver text and result type
+        recv_node = def_node.child_by_field_name("receiver")
+        recv_text = _node_text(recv_node, src) if recv_node else ""
+        recv_prefix = f"{recv_text} " if recv_text else ""
+        return f"func {recv_prefix}{name}{params_text}{_ret(('result',))}"
     if node_type in ("struct_item", "struct_specifier"):
         return f"struct {name}"
     if node_type in ("enum_item", "enum_specifier"):
diff --git a/packages/core/src/repowise/core/ingestion/traverser.py b/packages/core/src/repowise/core/ingestion/traverser.py
index 25894a7..fa878b0 100644
--- a/packages/core/src/repowise/core/ingestion/traverser.py
+++ b/packages/core/src/repowise/core/ingestion/traverser.py
@@ -64,6 +64,25 @@
         ".cache",
         ".idea",
         ".vscode",
+        # NOTE: test/tests/spec/specs/__tests__ are intentionally NOT
+        # blocked here. They used to be excluded as a workaround for a
+        # PageRank-inflation bug in graph.py, where a test fixture named
+        # like the package (e.g. tests/.../<pkg>.py) would dominate the
+        # import stem map and collect spurious in-edges from the entire
+        # library. That bug is now fixed in graph.py via deterministic
+        # stem disambiguation (see _build_stem_map / _stem_priority), so
+        # test files can be indexed safely. Their content is needed to
+        # answer questions about test helpers and fixtures. Files under
+        # these directories are still tagged is_test=True via
+        # _is_test_file() so downstream consumers can filter them when
+        # appropriate.
+        #
+        # The following ARE still blocked because they typically hold
+        # binary fixtures, generated artifacts, or browser-driven test
+        # rigs whose content rarely answers code questions:
+        "e2e",
+        "fixtures",
+        "conftest",
     }
 )
 
diff --git a/packages/core/src/repowise/core/persistence/crud.py b/packages/core/src/repowise/core/persistence/crud.py
index 577abb8..b3654bd 100644
--- a/packages/core/src/repowise/core/persistence/crud.py
+++ b/packages/core/src/repowise/core/persistence/crud.py
@@ -209,6 +209,7 @@ async def upsert_page(
     page_type: str,
     title: str,
     content: str,
+    summary: str = "",
     target_path: str,
     source_hash: str,
     model_name: str,
@@ -261,6 +262,7 @@ async def upsert_page(
         existing.page_type = page_type
         existing.title = title
         existing.content = content
+        existing.summary = summary
         existing.target_path = target_path
         existing.source_hash = source_hash
         existing.model_name = model_name
@@ -284,6 +286,7 @@ async def upsert_page(
             page_type=page_type,
             title=title,
             content=content,
+            summary=summary,
             target_path=target_path,
             source_hash=source_hash,
             model_name=model_name,
@@ -323,6 +326,7 @@ async def upsert_page_from_generated(
         page_type=gp.page_type,  # type: ignore[attr-defined]
         title=gp.title,  # type: ignore[attr-defined]
         content=gp.content,  # type: ignore[attr-defined]
+        summary=getattr(gp, "summary", "") or "",
         target_path=gp.target_path,  # type: ignore[attr-defined]
         source_hash=gp.source_hash,  # type: ignore[attr-defined]
         model_name=gp.model_name,  # type: ignore[attr-defined]
diff --git a/packages/core/src/repowise/core/persistence/models.py b/packages/core/src/repowise/core/persistence/models.py
index 1ab2229..3f03538 100644
--- a/packages/core/src/repowise/core/persistence/models.py
+++ b/packages/core/src/repowise/core/persistence/models.py
@@ -100,6 +100,11 @@ class Page(Base):
     page_type: Mapped[str] = mapped_column(String(64), nullable=False)
     title: Mapped[str] = mapped_column(Text, nullable=False)
     content: Mapped[str] = mapped_column(Text, nullable=False)
+    # 1–3 sentence purpose blurb. Always populated (LLM-extracted from content
+    # for full mode, deterministic structure summary for index-only mode).
+    # Surfaced by get_context as the default narrative; content is gated
+    # behind include=["full_doc"] to keep MCP responses small.
+    summary: Mapped[str] = mapped_column(Text, nullable=False, default="")
     target_path: Mapped[str] = mapped_column(Text, nullable=False)
     source_hash: Mapped[str] = mapped_column(String(64), nullable=False)
     model_name: Mapped[str] = mapped_column(String(128), nullable=False)
@@ -475,3 +480,41 @@ class DeadCodeFinding(Base):
     analyzed_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, default=_now_utc
     )
+
+
+class AnswerCache(Base):
+    """Cached LLM-synthesized answers from get_answer.
+
+    Keyed by (repo_id, question_hash). The hash is computed from the
+    normalized question text only — answer cache invalidation on index
+    change is handled by deleting rows for a repository when its alembic
+    head advances (cheap to rebuild).
+
+    Storing payload as a single JSON text column keeps the schema stable
+    across get_answer response shape changes.
+    """
+
+    __tablename__ = "answer_cache"
+
+    id: Mapped[str] = mapped_column(String(32), primary_key=True, default=_new_uuid)
+    repository_id: Mapped[str] = mapped_column(
+        String(32), ForeignKey("repositories.id", ondelete="CASCADE"), nullable=False
+    )
+    # SHA-256 hex of the normalized (lowercased + stripped) question.
+    question_hash: Mapped[str] = mapped_column(String(64), nullable=False)
+    # Original (un-normalized) question, kept for human inspection.
+    question: Mapped[str] = mapped_column(Text, nullable=False)
+    # Full JSON payload from get_answer (answer, citations, confidence,
+    # fallback_targets, retrieval).
+    payload_json: Mapped[str] = mapped_column(Text, nullable=False)
+    # Provider + model used for the synthesis call (lets us invalidate
+    # selectively if a better model is configured later).
+    provider_name: Mapped[str] = mapped_column(String(64), nullable=False, default="")
+    model_name: Mapped[str] = mapped_column(String(128), nullable=False, default="")
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), nullable=False, default=_now_utc
+    )
+
+    __table_args__ = (
+        UniqueConstraint("repository_id", "question_hash", name="uq_answer_cache_q"),
+    )
diff --git a/packages/server/README.md b/packages/server/README.md
index f4a9a96..8aafb3e 100644
--- a/packages/server/README.md
+++ b/packages/server/README.md
@@ -11,7 +11,7 @@ FastAPI REST API, webhook handlers, MCP server, and background job scheduler for
 | Component | Description |
 |-----------|-------------|
 | **REST API** | FastAPI application with full CRUD for repos, pages, symbols, jobs, git analytics, dead code |
-| **MCP Server** | 8 MCP tools for AI coding assistants (Claude Code, Cursor, Cline) |
+| **MCP Server** | 10 MCP tools for AI coding assistants (Claude Code, Cursor, Cline) |
 | **Webhooks** | GitHub and GitLab push event handlers — trigger incremental updates automatically |
 | **Scheduler** | APScheduler background jobs — polling fallback, stale page decay, periodic re-sync |
 
@@ -153,7 +153,7 @@ Job progress events (`JobProgressEvent`) carry: `event` type, `file` currently b
 
 ## MCP Server
 
-repowise exposes 8 MCP tools for AI coding assistants. Start the MCP server via:
+repowise exposes 10 MCP tools for AI coding assistants. Start the MCP server via:
 
 ```bash
 repowise mcp                          # stdio transport (Claude Code, Cursor, Cline)
diff --git a/packages/server/src/repowise/server/chat_tools.py b/packages/server/src/repowise/server/chat_tools.py
index 1ca0669..53dde52 100644
--- a/packages/server/src/repowise/server/chat_tools.py
+++ b/packages/server/src/repowise/server/chat_tools.py
@@ -1,6 +1,6 @@
 """Chat tool registry — single source of truth for tool schemas and execution.
 
-Imports the 8 MCP tool functions directly and exposes them as a callable registry
+Imports the 10 MCP tool functions directly and exposes them as a callable registry
 for the agentic chat loop. Also provides OpenAI-format tool definitions for the LLM.
 """
 
diff --git a/packages/server/src/repowise/server/mcp_server/__init__.py b/packages/server/src/repowise/server/mcp_server/__init__.py
index 8dd038e..99a441b 100644
--- a/packages/server/src/repowise/server/mcp_server/__init__.py
+++ b/packages/server/src/repowise/server/mcp_server/__init__.py
@@ -1,4 +1,4 @@
-"""repowise MCP Server — 9 tools for AI coding assistants.
+"""repowise MCP Server — 10 tools for AI coding assistants.
 
 Exposes the full repowise wiki as queryable tools via the MCP protocol.
 Supports both stdio transport (Claude Code, Cursor, Cline) and SSE transport
@@ -27,6 +27,7 @@
     mcp,
     run_mcp,
 )
+from repowise.server.mcp_server.tool_answer import get_answer
 from repowise.server.mcp_server.tool_context import get_context
 from repowise.server.mcp_server.tool_dead_code import get_dead_code
 from repowise.server.mcp_server.tool_decision_records import update_decision_records
@@ -38,6 +39,7 @@
 from repowise.server.mcp_server.tool_overview import get_overview
 from repowise.server.mcp_server.tool_risk import get_risk
 from repowise.server.mcp_server.tool_search import search_codebase
+from repowise.server.mcp_server.tool_symbol import get_symbol
 from repowise.server.mcp_server.tool_why import get_why
 
 # ---------------------------------------------------------------------------
@@ -90,12 +92,14 @@ def __setattr__(self, name: str, value: Any) -> None:
     "_get_repo",
     "_is_path",
     "create_mcp_server",
+    "get_answer",
     "get_architecture_diagram",
     "get_context",
     "get_dead_code",
     "get_dependency_path",
     "get_overview",
     "get_risk",
+    "get_symbol",
     "get_why",
     "mcp",
     "run_mcp",
diff --git a/packages/server/src/repowise/server/mcp_server/_meta.py b/packages/server/src/repowise/server/mcp_server/_meta.py
new file mode 100644
index 0000000..891fa72
--- /dev/null
+++ b/packages/server/src/repowise/server/mcp_server/_meta.py
@@ -0,0 +1,137 @@
+"""Shared `_meta` envelope helpers for MCP tool responses.
+
+Every tool can attach a small `_meta` dict to its response with timing and
+optional hint text. The hint is the killer feature: a short, conservative
+nudge toward the cheaper next-tool when one obviously applies. Hints are
+intentionally narrow — pushing every agent toward `get_symbol` regardless of
+question shape would replicate the over-trust failure mode that drove
+jcodemunch's accuracy regression on alive-with-dead-exports tasks.
+
+Rules of thumb baked into the hint generators:
+  * NEVER suggest a more compact tool when the original question contains
+    explanation words ("explain", "why", "how does", "what is the relationship",
+    "describe").
+  * Only suggest get_symbol when the agent has already pinpointed a single
+    symbol or single file — never as a starting move.
+  * Hints are advisory; the harness/agent is free to ignore them.
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+# Question patterns where narrative wiki context wins over symbol-body slicing.
+# Used to suppress "use get_symbol" hints — those questions need surrounding prose.
+_EXPLAIN_TOKENS = (
+    "explain",
+    "why ",
+    "why is",
+    "why does",
+    "why was",
+    "how does",
+    "how do",
+    "how is",
+    "how are",
+    "what is the relationship",
+    "describe",
+    "walk me through",
+    "tell me about",
+    "purpose of",
+)
+
+
+def is_explanation_question(question: str | None) -> bool:
+    """True if the question reads like 'explain X', not 'find X'.
+
+    Used as a guard before any hint that would push the agent toward
+    symbol-level (narrower) retrieval. Conservative by design: any explanation
+    cue suppresses the hint.
+    """
+    if not question:
+        return False
+    q = question.strip().lower()
+    return any(tok in q for tok in _EXPLAIN_TOKENS)
+
+
+def build_meta(
+    *,
+    timing_ms: float | None = None,
+    hint: str | None = None,
+    cached: bool = False,
+    extra: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Construct a `_meta` envelope. All fields optional, omitted if falsy.
+
+    Stable shape:
+      {
+        "timing_ms": float,   # tool wall-time (omitted if None)
+        "hint":      str,     # short follow-up suggestion (omitted if None)
+        "cached":    bool,    # only included when True
+        ...extras
+      }
+    """
+    out: dict[str, Any] = {}
+    if timing_ms is not None:
+        out["timing_ms"] = round(float(timing_ms), 2)
+    if hint:
+        out["hint"] = hint
+    if cached:
+        out["cached"] = True
+    if extra:
+        out.update(extra)
+    return out
+
+
+def context_hint(targets: list[str], compact: bool) -> str | None:
+    """Hint for `get_context` callers.
+
+    Conservative: only fires when the call shape suggests the agent could
+    have used a cheaper tool, AND the suggestion is unambiguously safe.
+    """
+    if compact:
+        # Already in compact mode — don't push further.
+        return None
+    if not targets:
+        return None
+    # Single file target where the agent is likely to follow up with a Read:
+    # nudge toward get_symbol so they slice instead of reading the whole file.
+    if len(targets) == 1 and "::" not in targets[0] and "/" in targets[0]:
+        return (
+            "If you only need one function from this file, call "
+            "get_symbol(symbol_id='{path}::{name}') to get just that "
+            "function body — cheaper than Read.".format(
+                path=targets[0], name="<symbol_name>"
+            )
+        )
+    return None
+
+
+def symbol_hint(symbol_id: str, end_line: int, start_line: int) -> str | None:
+    """Hint for `get_symbol` callers.
+
+    Suggests context_lines expansion only for very small symbols where the
+    body alone may be insufficient.
+    """
+    span = max(end_line - start_line, 0)
+    if span < 5:
+        return (
+            "Small symbol — pass context_lines=10 if you need surrounding "
+            "context (imports, sibling defs)."
+        )
+    return None
+
+
+def answer_hint(confidence: str, retrieval_count: int) -> str | None:
+    """Hint for `get_answer` callers.
+
+    Encourages verification when confidence is low; never tells the agent to
+    "trust the answer" — that's the over-trust failure mode.
+    """
+    if confidence == "low":
+        return (
+            "Low confidence — Read the listed fallback_targets to verify "
+            "before answering."
+        )
+    if retrieval_count == 0:
+        return "No wiki hits — fall back to search_codebase or Grep."
+    return None
diff --git a/packages/server/src/repowise/server/mcp_server/tool_answer.py b/packages/server/src/repowise/server/mcp_server/tool_answer.py
new file mode 100644
index 0000000..1d3c053
--- /dev/null
+++ b/packages/server/src/repowise/server/mcp_server/tool_answer.py
@@ -0,0 +1,809 @@
+"""MCP Tool: get_answer — RAG-style synthesis over the wiki layer.
+
+Single-call retrieval + LLM synthesis. Replaces the agent's multi-turn
+search → context → read loop with one tool call that returns:
+
+    {
+      "answer":            str   — 2–5 sentence synthesised answer
+      "citations":         list  — file paths backing the answer
+      "confidence":        str   — "high" | "medium" | "low"
+      "fallback_targets":  list  — top retrieval hits the agent should Read
+                                   to verify (always present)
+      "retrieval":         list  — raw top-N hits with snippets
+    }
+
+When no LLM provider is configured, the tool degrades to retrieval-only
+mode (returns ranked hits + snippets, confidence="low") so C1 / index-only
+deployments still benefit from the structured single-call shortcut.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import contextlib
+import hashlib
+import json as _json
+import os
+import time
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import select
+
+from repowise.core.persistence.database import get_session
+from repowise.core.persistence.models import AnswerCache, Page, WikiSymbol
+from repowise.server.mcp_server import _state
+from repowise.server.mcp_server._helpers import _get_repo
+from repowise.server.mcp_server._meta import answer_hint as _answer_hint
+from repowise.server.mcp_server._meta import build_meta as _build_meta
+from repowise.server.mcp_server._server import mcp
+
+# How many top retrieval hits to enrich with WikiSymbol context. Enriching
+# every hit produces large responses that bloat the cached prompt prefix on
+# multi-turn agent sessions without changing the answer — the agent typically
+# cites the top-1 file. Top-2 captures the primary navigation need with a
+# bounded payload.
+_ENRICH_TOP_N_HITS = 2
+# How many symbols per enriched file. Bounded to keep the context block from
+# growing unboundedly on dense files; the limit is sufficient to surface both
+# foundational types and a representative function/method.
+_MAX_SYMBOLS_PER_HIT = 4
+
+# Sort priority by symbol kind. Classes first because "what does X do" /
+# "which class inherits from Y" questions resolve at the class level. Then
+# top-level functions, then methods (which usually only matter once the
+# class context is established).
+_KIND_PRIORITY = {"class": 0, "interface": 0, "function": 1, "method": 2}
+# Per-symbol docstring truncation. Keeps the context block bounded — the
+# first sentence is typically sufficient and trailing prose mostly contributes
+# cache-write cost on follow-up turns.
+_MAX_SYMBOL_DOC_CHARS = 120
+
+# Confidence gate for synthesis. When the top retrieval hit is NOT clearly
+# dominant relative to the second-best hit, skip LLM synthesis and return
+# ranked snippets only. This forces the agent to ground in source rather than
+# trust a possibly-wrong frame. Generic, repo-agnostic, no question parsing.
+# Failure modes addressed:
+#   (a) wrong-target retrieval where top-1 and top-2 are both plausible;
+#   (b) synthesis hallucination on tangential top hits.
+_DOMINANCE_RATIO = 1.2
+_COVERAGE_THRESHOLD = 0.66
+# The dominance ratio threshold (top_score / second_score >= 1.2x) separates
+# reliable retrievals from ambiguous ones. This is a property of BM25-style
+# retrieval with a coverage re-ranker on top, not of any particular repository;
+# tune if a deployment shows systematic over- or under-gating.
+
+# When the gate triggers and we drop synthesis, fetch this many chars of
+# real page content per top hit so the agent has substantive raw material
+# to ground in (vs. one-line summary that's too thin to act on).
+_GATED_EXCERPT_CHARS = 600
+_GATED_RETURN_HITS = 3
+
+# Intersection-retrieval connectives. If a question contains any of these
+# (case-insensitive whole-word), it's likely a relational/multi-entity
+# question. We split the question on the connective, run two FTS passes,
+# and boost any page that appears in BOTH result sets — the page at the
+# intersection is much more likely to be the actual answer than a page
+# at the top of either single-side query.
+# This is grammar, not domain — the same list applies to any English-language
+# code question, independent of the repository or codebase.
+_RELATIONAL_CONNECTIVES = (
+    " between ", " from ", " across ", " through ", " with ",
+    " and ", " versus ", " vs ",
+)
+
+# Term-coverage re-ranker tuning. Multiplies BM25 by (FLOOR + (1-FLOOR)*coverage)
+# where coverage = (# distinct query terms present in hit) / (# query terms).
+# FLOOR=0.5 → single-concept questions (coverage≈1.0) are unaffected;
+# multi-constraint questions where a hit covers 1/3 of terms get scored at 0.67
+# of their raw BM25 (vs 1.0 for a hit covering 3/3). Conjunctive coverage
+# becomes a tie-breaker rather than a hard filter.
+_COVERAGE_FLOOR = 0.5
+# English stopwords — minimal list, just enough to keep "what is the" from
+# dominating coverage. Not language-specific, not repo-specific.
+_STOPWORDS = frozenset({
+    "a","an","the","is","are","was","were","be","been","being","of","to","in",
+    "on","at","by","for","with","from","as","that","this","these","those","it",
+    "its","and","or","but","not","no","do","does","did","done","have","has",
+    "had","what","which","who","whom","whose","when","where","why","how","can",
+    "could","should","would","may","might","will","shall","i","you","he","she",
+    "we","they","me","him","her","us","them","my","your","his","their","our",
+    "if","then","than","so","such","there","here","about","into","through",
+    "between","across","over","under","up","down","out","off","via",
+})
+# Cap on bytes read from source per symbol when we recover a real signature
+# from disk (multi-line def with type annotations). Anything longer than this
+# gets truncated; the agent can call get_symbol for the full body.
+_MAX_RICH_SIG_LINES = 4
+
+
+def _hash_question(question: str) -> str:
+    """Stable SHA-256 of the normalized question. Lowercase + strip + collapse ws."""
+    norm = " ".join(question.lower().strip().split())
+    return hashlib.sha256(norm.encode("utf-8")).hexdigest()
+
+_log = __import__("logging").getLogger("repowise.mcp.answer")
+
+_SYSTEM_PROMPT = (
+    "You are a code-aware retrieval assistant. Given a developer question and "
+    "excerpts from a project wiki, answer in 2–5 sentences. Cite the source "
+    "files by relative path inline like (path/to/file.py). If the excerpts do "
+    "not contain enough information, say so explicitly and suggest which files "
+    "the developer should inspect. Never invent file paths."
+)
+
+_USER_TEMPLATE = """\
+Question: {question}
+
+Project wiki excerpts (top {n} retrieval hits):
+
+{context}
+
+Answer in 2–5 sentences. Cite file paths inline. If the excerpts are not
+sufficient, say so and list the most likely files to inspect.
+"""
+
+
+def _resolve_provider_for_answer():
+    """Best-effort provider lookup mirroring cli/helpers.resolve_provider.
+
+    Avoids the click dependency from the cli package. Returns a BaseProvider
+    or None if no API key / provider is configured.
+    """
+    try:
+        from repowise.core.providers.llm.registry import get_provider
+    except Exception:
+        _log.debug("Provider registry import failed", exc_info=True)
+        return None
+
+    name = os.environ.get("REPOWISE_PROVIDER")
+    model = os.environ.get("REPOWISE_DOC_MODEL") or os.environ.get("REPOWISE_MODEL")
+
+    def _try(provider_name: str, **kwargs: Any):
+        try:
+            return get_provider(provider_name, **kwargs)
+        except Exception:
+            _log.debug("get_provider(%s) failed", provider_name, exc_info=True)
+            return None
+
+    # Explicit selection wins.
+    if name:
+        kw: dict[str, Any] = {}
+        if model:
+            kw["model"] = model
+        if name == "anthropic" and os.environ.get("ANTHROPIC_API_KEY"):
+            kw["api_key"] = os.environ["ANTHROPIC_API_KEY"]
+        elif name == "openai" and os.environ.get("OPENAI_API_KEY"):
+            kw["api_key"] = os.environ["OPENAI_API_KEY"]
+        elif name == "gemini" and (
+            os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
+        ):
+            kw["api_key"] = os.environ.get("GEMINI_API_KEY") or os.environ.get(
+                "GOOGLE_API_KEY"
+            )
+        elif name == "ollama" and os.environ.get("OLLAMA_BASE_URL"):
+            kw["base_url"] = os.environ["OLLAMA_BASE_URL"]
+        return _try(name, **kw)
+
+    # Auto-detect from API keys.
+    if os.environ.get("ANTHROPIC_API_KEY"):
+        kw = {"api_key": os.environ["ANTHROPIC_API_KEY"]}
+        if model:
+            kw["model"] = model
+        return _try("anthropic", **kw)
+    if os.environ.get("OPENAI_API_KEY"):
+        kw = {"api_key": os.environ["OPENAI_API_KEY"]}
+        if model:
+            kw["model"] = model
+        return _try("openai", **kw)
+    if os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY"):
+        kw = {
+            "api_key": os.environ.get("GEMINI_API_KEY")
+            or os.environ.get("GOOGLE_API_KEY")
+        }
+        if model:
+            kw["model"] = model
+        return _try("gemini", **kw)
+    if os.environ.get("OLLAMA_BASE_URL"):
+        kw = {"base_url": os.environ["OLLAMA_BASE_URL"]}
+        if model:
+            kw["model"] = model
+        return _try("ollama", **kw)
+    return None
+
+
+def _build_context_block(hits: list[dict], max_chars_per_hit: int = 800) -> str:
+    """Format retrieval hits as a compact text block for the LLM.
+
+    Each hit includes:
+      * file path + title + retrieval score
+      * file-level summary (Page.summary, capped at max_chars_per_hit)
+      * up to _MAX_SYMBOLS_PER_HIT WikiSymbol entries (signature + truncated
+        docstring) — the critical addition that turns get_answer from a
+        navigator into a real synthesizer for symbol-level questions.
+    """
+    parts = []
+    for i, h in enumerate(hits, start=1):
+        body_src = h.get("summary") or h.get("snippet") or ""
+        body = body_src[:max_chars_per_hit]
+        block = [
+            f"[{i}] {h['target_path']} (score={h['score']:.3f})",
+            f"    title: {h['title']}",
+            f"    summary: {body}",
+        ]
+        symbols = h.get("symbols") or []
+        if symbols:
+            block.append("    symbols:")
+            for s in symbols[:_MAX_SYMBOLS_PER_HIT]:
+                sig = s.get("signature") or s.get("name") or ""
+                kind = s.get("kind") or "?"
+                doc = (s.get("docstring") or "").strip()
+                if doc:
+                    doc_one_line = " ".join(doc.split())[:_MAX_SYMBOL_DOC_CHARS]
+                    block.append(f"      - [{kind}] {sig}")
+                    block.append(f"          {doc_one_line}")
+                else:
+                    block.append(f"      - [{kind}] {sig}")
+        parts.append("\n".join(block))
+    return "\n\n".join(parts)
+
+
+def _read_signature_from_source(
+    repo_root: Path | None, file_path: str, start_line: int
+) -> str | None:
+    """Read the symbol's actual signature line from disk.
+
+    Returns the def/class line (or its multi-line continuation) verbatim from
+    the source file. Captures everything WikiSymbol.signature strips:
+      * base classes for `class Foo(Bar, Baz):`
+      * decorators (one line above the def)
+      * full type annotations across line continuations
+
+    None on any failure — caller falls back to the stored signature.
+    """
+    if repo_root is None:
+        return None
+    try:
+        abs_path = (repo_root / file_path).resolve()
+        # Defense in depth: never read outside the repo root.
+        try:
+            abs_path.relative_to(repo_root.resolve())
+        except ValueError:
+            return None
+        text = abs_path.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return None
+    lines = text.splitlines()
+    if not lines or start_line < 1 or start_line > len(lines):
+        return None
+    # Walk forward up to _MAX_RICH_SIG_LINES until we close the parenthesis
+    # group (Python signatures often span multiple lines for type hints).
+    sig_lines: list[str] = []
+    paren_depth = 0
+    for i in range(start_line - 1, min(start_line - 1 + _MAX_RICH_SIG_LINES, len(lines))):
+        line = lines[i]
+        sig_lines.append(line.strip())
+        paren_depth += line.count("(") - line.count(")")
+        if line.rstrip().endswith(":") and paren_depth <= 0:
+            break
+    if not sig_lines:
+        return None
+    return " ".join(sig_lines)
+
+
+async def _hydrate_symbols_for_hits(
+    session, repo_id: str, hits: list[dict]
+) -> None:
+    """Mutate `hits` in place: attach `symbols` list to top-N file_page hits.
+
+    Only the top _ENRICH_TOP_N_HITS hits get enriched — others would just bloat
+    the cached prompt prefix on follow-up turns without changing the answer.
+
+    For each enriched symbol we ALSO try to recover the real source-line
+    signature from disk (`_read_signature_from_source`) so base classes,
+    decorators, and full type annotations reach the LLM. WikiSymbol.signature
+    strips these at parse time, so the on-disk read is what gives the LLM a
+    faithful view of the symbol's interface.
+    """
+    # Identify the top file_page hits in retrieval-rank order. `hits` is
+    # already sorted by descending score upstream.
+    enrich_paths: list[str] = []
+    for h in hits:
+        if (
+            h.get("target_path")
+            and h.get("page_type") == "file_page"
+            and len(enrich_paths) < _ENRICH_TOP_N_HITS
+        ):
+            enrich_paths.append(h["target_path"])
+    if not enrich_paths:
+        return
+
+    res = await session.execute(
+        select(WikiSymbol)
+        .where(
+            WikiSymbol.repository_id == repo_id,
+            WikiSymbol.file_path.in_(enrich_paths),
+        )
+        .order_by(WikiSymbol.file_path, WikiSymbol.start_line)
+    )
+    by_file: dict[str, list[dict]] = {}
+    repo_root = Path(_state._repo_path) if _state._repo_path else None
+    for row in res.scalars().all():
+        rich_sig = _read_signature_from_source(
+            repo_root, row.file_path, row.start_line
+        )
+        by_file.setdefault(row.file_path, []).append(
+            {
+                "name": row.name,
+                "kind": row.kind,
+                # Prefer the real source line (has bases / decorators / types)
+                # falling back to the stripped WikiSymbol.signature on failure.
+                "signature": rich_sig or row.signature,
+                "docstring": row.docstring or "",
+                "start_line": row.start_line,
+            }
+        )
+    # Cap each list to _MAX_SYMBOLS_PER_HIT. Sort by start_line ASC —
+    # natural document order is the most general default. Kind-priority
+    # sorting (classes before functions before methods) is available via
+    # _KIND_PRIORITY but is not applied here, since reordering symbols away
+    # from source order can mislead the LLM about file structure.
+    for path, syms in by_file.items():
+        syms.sort(key=lambda s: s["start_line"])
+        by_file[path] = syms[:_MAX_SYMBOLS_PER_HIT]
+    for h in hits:
+        if h.get("target_path") in by_file:
+            h["symbols"] = by_file[h["target_path"]]
+
+
+def _split_relational(question: str) -> list[str] | None:
+    """If the question is relational (contains a connective like 'and' or
+    'between'), split it into two sub-queries on the FIRST matching
+    connective. Returns [left, right] or None if not relational.
+
+    Heuristic only — works on English grammar, not on code or repo terms.
+    """
+    q = " " + question.strip() + " "
+    qlow = q.lower()
+    for conn in _RELATIONAL_CONNECTIVES:
+        idx = qlow.find(conn)
+        if idx > 0:
+            left = q[:idx].strip()
+            right = q[idx + len(conn):].strip()
+            # Both sides must have at least 3 content terms to be a real
+            # multi-entity question (not e.g. "what is X and how").
+            if len(_question_terms(left)) >= 3 and len(_question_terms(right)) >= 3:
+                return [left, right]
+    return None
+
+
+async def _intersection_boost(question: str, hits: list[dict]) -> None:
+    """For relational questions, boost any hit that appears in both halves
+    of a split-FTS retrieval. Mutates `hits` in place: adds a multiplicative
+    bonus to `score` for hits that appear in both subset retrievals.
+
+    Universal IR principle: pages at the intersection of two query halves
+    are much more likely to answer relational questions than pages at the
+    top of either half alone. Independent of repo or domain.
+    """
+    parts = _split_relational(question)
+    if parts is None or _state._fts is None:
+        return
+    sub_hit_ids: list[set] = []
+    for sub_q in parts:
+        try:
+            sub = await asyncio.wait_for(
+                _state._fts.search(sub_q, limit=15), timeout=3.0
+            )
+            sub_hit_ids.append({h.page_id for h in sub})
+        except Exception:
+            return
+    if len(sub_hit_ids) < 2:
+        return
+    intersection = sub_hit_ids[0] & sub_hit_ids[1]
+    if not intersection:
+        return
+    # 2× boost for hits at the intersection — strong enough to overtake
+    # a single-side top hit, not so strong that it ignores BM25 entirely.
+    for h in hits:
+        if h.get("page_id") in intersection:
+            h["score"] = h.get("score", 0.0) * 2.0
+            h["_intersection"] = True
+    hits.sort(key=lambda h: h["score"], reverse=True)
+
+
+async def _enrich_gated_excerpts(hits: list[dict]) -> None:
+    """For the gated (low-confidence) return path, fetch real page content
+    for top hits so the agent has substantive raw material instead of
+    one-line summaries. Mutates `hits` in place — adds an `excerpt` field.
+
+    Universal motivation: thin retrieval output forces consumers to fall
+    back on priors instead of grounding in source. Symmetric with the
+    enrichment we already do for synthesis.
+    """
+    if not hits:
+        return
+    page_ids = [h["page_id"] for h in hits[:_GATED_RETURN_HITS] if h.get("page_id")]
+    if not page_ids:
+        return
+    try:
+        async with get_session(_state._session_factory) as session:
+            res = await session.execute(
+                select(Page.id, Page.content_md).where(Page.id.in_(page_ids))
+            )
+            content_by_id = {row[0]: (row[1] or "") for row in res.all()}
+    except Exception:
+        return
+    for h in hits[:_GATED_RETURN_HITS]:
+        body = content_by_id.get(h.get("page_id"), "")
+        if body:
+            h["excerpt"] = body[:_GATED_EXCERPT_CHARS]
+
+
+def _question_terms(question: str) -> list[str]:
+    """Extract content terms from a question. Lowercase, alnum-tokenized,
+    stopwords + length<3 dropped. Used by the term-coverage re-ranker."""
+    import re
+    raw = re.findall(r"[a-zA-Z0-9_]+", question.lower())
+    return [t for t in raw if len(t) >= 3 and t not in _STOPWORDS]
+
+
+def _rerank_by_coverage(hits: list[dict], question: str) -> list[dict]:
+    """Re-rank FTS hits by term-coverage boost on top of BM25.
+
+    For each hit, compute the fraction of distinct query terms present in
+    (title + snippet + summary), then multiply the raw BM25 score by
+    (FLOOR + (1-FLOOR)*coverage). Single-concept questions (coverage≈1.0
+    across all hits) are unaffected; multi-constraint questions push hits
+    that cover all the terms above hits that repeat just one term.
+
+    This addresses a common BM25 failure mode where a hit that matches one
+    constraint very strongly can outrank a hit that matches all constraints
+    moderately — the latter is usually the better answer for multi-constraint
+    questions.
+    """
+    terms = set(_question_terms(question))
+    if not terms or not hits:
+        return hits
+    n_terms = len(terms)
+    for h in hits:
+        haystack = " ".join([
+            h.get("title", "") or "",
+            h.get("snippet", "") or "",
+            h.get("summary", "") or "",
+        ]).lower()
+        # Count distinct terms present (substring match — FTS5 already handles
+        # stemming upstream, so we keep this simple).
+        present = sum(1 for t in terms if t in haystack)
+        coverage = present / n_terms
+        raw = h.get("score", 0.0)
+        h["_coverage"] = coverage
+        h["_raw_score"] = raw
+        h["score"] = raw * (_COVERAGE_FLOOR + (1.0 - _COVERAGE_FLOOR) * coverage)
+    hits.sort(key=lambda h: h["score"], reverse=True)
+    return hits
+
+
+def _confidence_from_scores(scores: list[float]) -> str:
+    """Map raw FTS scores to a coarse confidence label.
+
+    The thresholds are intentionally generous on the low end — when retrieval
+    finds *anything* we still let the agent see it, but mark it 'low' so the
+    workflow forces verification.
+    """
+    if not scores:
+        return "low"
+    top = scores[0]
+    gap = top - (scores[1] if len(scores) > 1 else 0.0)
+    if top >= 1.0 and gap >= 0.2:
+        return "high"
+    if top >= 0.5:
+        return "medium"
+    return "low"
+
+
+@mcp.tool()
+async def get_answer(
+    question: str,
+    scope: str | None = None,
+    repo: str | None = None,
+) -> dict:
+    """One-call RAG: answer a code question. Always your first call.
+
+    Returns {answer, citations, confidence, fallback_targets}. If
+    confidence=="high" and the answer names concrete files/symbols, cite it
+    directly — do NOT verify by Read. Only fall back to search_codebase +
+    get_context if confidence=="low" or the answer is hedged.
+
+    Args:
+        question: developer question.
+        scope: optional path prefix to restrict retrieval (e.g. "src/pkg/").
+        repo: repository identifier; usually omitted.
+    """
+    t0 = time.perf_counter()
+    if not question or not question.strip():
+        return {
+            "answer": "",
+            "citations": [],
+            "confidence": "low",
+            "fallback_targets": [],
+            "retrieval": [],
+            "error": "question is required",
+            "_meta": _build_meta(timing_ms=(time.perf_counter() - t0) * 1000),
+        }
+
+    async with get_session(_state._session_factory) as session:
+        repository = await _get_repo(session, repo)
+        repo_id = repository.id
+
+    # --- Cache lookup --------------------------------------------------------
+    # Scope: ignore the (rare) `scope` argument in the cache key for now;
+    # scoped queries are uncommon and including scope would balloon hit rate
+    # variance. We hash on (repo_id, normalized_question) only.
+    qhash = _hash_question(question)
+    async with get_session(_state._session_factory) as session:
+        res = await session.execute(
+            select(AnswerCache).where(
+                AnswerCache.repository_id == repo_id,
+                AnswerCache.question_hash == qhash,
+            )
+        )
+        cached = res.scalar_one_or_none()
+    if cached is not None:
+        with contextlib.suppress(Exception):
+            payload = _json.loads(cached.payload_json)
+            payload["_meta"] = _build_meta(
+                timing_ms=(time.perf_counter() - t0) * 1000,
+                cached=True,
+                hint=_answer_hint(payload.get("confidence", "low"), len(payload.get("retrieval", []))),
+            )
+            return payload
+
+    # --- Retrieval (FTS) ---------------------------------------------------
+    raw_hits: list[Any] = []
+    if _state._fts is not None:
+        with contextlib.suppress(Exception):
+            # Pull a wider candidate set so the term-coverage re-ranker has
+            # room to push conjunctive matches up the list before we cap to 5.
+            raw_hits = await asyncio.wait_for(
+                _state._fts.search(question, limit=15), timeout=5.0
+            )
+
+    # Hydrate hits with target_path + summary from the Page table.
+    hits: list[dict] = []
+    if raw_hits:
+        page_ids = [h.page_id for h in raw_hits]
+        async with get_session(_state._session_factory) as session:
+            res = await session.execute(
+                select(
+                    Page.id,
+                    Page.target_path,
+                    Page.summary,
+                    Page.page_type,
+                ).where(Page.id.in_(page_ids))
+            )
+            meta_by_id = {
+                row[0]: {
+                    "target_path": row[1],
+                    "summary": row[2] or "",
+                    "page_type": row[3],
+                }
+                for row in res.all()
+            }
+        for h in raw_hits:
+            meta = meta_by_id.get(h.page_id, {})
+            target_path = meta.get("target_path", "")
+            if scope and target_path and not target_path.startswith(scope):
+                continue
+            hits.append(
+                {
+                    "page_id": h.page_id,
+                    "title": h.title,
+                    "target_path": target_path,
+                    "page_type": meta.get("page_type", h.page_type),
+                    "snippet": h.snippet,
+                    "summary": meta.get("summary", ""),
+                    "score": float(h.score or 0.0),
+                }
+            )
+
+    # Term-coverage re-rank before the cap so conjunctive matches survive.
+    hits = _rerank_by_coverage(hits, question)
+    # Intersection-retrieval boost for relational questions (multi-entity).
+    # Pages at the intersection of two split-FTS halves get a 2× bonus.
+    with contextlib.suppress(Exception):
+        await _intersection_boost(question, hits)
+    # Always cap retrieval hits at 5 for the response payload.
+    hits = hits[:5]
+
+    # Enrich each file_page hit with its top-N WikiSymbol rows. This is the
+    # critical fix for symbol-level questions — without it the LLM only sees
+    # file-level summaries and consistently refuses to identify specific
+    # classes/functions named in the question.
+    if hits:
+        with contextlib.suppress(Exception):
+            async with get_session(_state._session_factory) as session:
+                await _hydrate_symbols_for_hits(session, repo_id, hits)
+
+    fallback_targets = [
+        h["target_path"] for h in hits if h.get("target_path")
+    ]
+
+    if not hits:
+        return {
+            "answer": "",
+            "citations": [],
+            "confidence": "low",
+            "fallback_targets": [],
+            "retrieval": [],
+            "note": (
+                "No wiki hits for this question. Fall back to "
+                "search_codebase or Grep to locate candidate files."
+            ),
+            "_meta": _build_meta(
+                timing_ms=(time.perf_counter() - t0) * 1000,
+                hint=_answer_hint("low", 0),
+            ),
+        }
+
+    # --- Confidence gate ---------------------------------------------------
+    # Skip synthesis when retrieval is NOT clearly dominant. The dominance
+    # ratio (top score / second score) is the sole gating criterion: above
+    # the threshold the top hit is reliably the right answer; below it the
+    # top-1 / top-2 ambiguity is large enough that we hand the agent ranked
+    # excerpts and let it ground in source.
+    #
+    # Coverage (fraction of query terms present in the top hit) is also
+    # available via the re-ranker and is used to bias score-based ranking,
+    # but is intentionally NOT used as a hard gate here. Natural-language
+    # questions rarely have all their content terms co-occurring in a single
+    # page (typical coverage is 0.15–0.25), so a coverage threshold over-
+    # fires on confidently-dominant retrievals and degrades the cheap path.
+    if len(hits) >= 2:
+        top_score = hits[0].get("score", 0.0)
+        second_score = hits[1].get("score", 0.0) or 1e-9
+        dominant = (top_score / second_score) >= _DOMINANCE_RATIO
+        if not dominant:
+            # Enrich top hits with substantive excerpts so the agent has
+            # real material to ground in (not one-line summaries).
+            await _enrich_gated_excerpts(hits)
+            return {
+                "answer": "",
+                "citations": [],
+                "confidence": "low",
+                "fallback_targets": fallback_targets,
+                "retrieval": hits[:_GATED_RETURN_HITS],
+                "note": (
+                    "Multiple plausible candidates — synthesis skipped to "
+                    "avoid anchoring on a wrong frame. Each retrieval entry "
+                    "includes an excerpt from the page; read them and pick "
+                    "the one that actually answers the question."
+                ),
+                "_meta": _build_meta(
+                    timing_ms=(time.perf_counter() - t0) * 1000,
+                    hint=_answer_hint("low", len(hits)),
+                ),
+            }
+
+    # Confidence is the only axis we gate on. We deliberately do NOT add a
+    # second gate keyed on question shape (e.g. relational questions
+    # containing connectives like "between", "and", "from"). Relational vs
+    # non-relational is the wrong axis to gate on: the hard relational
+    # failures already surface as low-dominance retrievals and are caught
+    # by the gate above, while a shape-based gate over-fires on confidently
+    # dominant relational questions and pushes cost back onto the agent's
+    # own reasoning loop.
+
+    # --- Synthesis (LLM) ---------------------------------------------------
+    provider = _resolve_provider_for_answer()
+    if provider is None:
+        # Retrieval-only mode (no provider). Return the hits so the agent can
+        # at least skip the search_codebase step.
+        return {
+            "answer": "",
+            "citations": [],
+            "confidence": "low",
+            "fallback_targets": fallback_targets,
+            "retrieval": hits,
+            "note": (
+                "No LLM provider configured (set REPOWISE_PROVIDER + API key). "
+                "Returning retrieval hits only — Read the listed files to answer."
+            ),
+            "_meta": _build_meta(
+                timing_ms=(time.perf_counter() - t0) * 1000,
+                hint=_answer_hint("low", len(hits)),
+            ),
+        }
+
+    user_prompt = _USER_TEMPLATE.format(
+        question=question.strip(),
+        n=len(hits),
+        context=_build_context_block(hits),
+    )
+
+    answer_text = ""
+    try:
+        response = await asyncio.wait_for(
+            provider.generate(
+                system_prompt=_SYSTEM_PROMPT,
+                user_prompt=user_prompt,
+                max_tokens=512,
+                temperature=0.2,
+            ),
+            timeout=30.0,
+        )
+        answer_text = (response.content or "").strip()
+    except Exception as exc:
+        _log.warning("get_answer LLM call failed: %s", exc)
+        return {
+            "answer": "",
+            "citations": [],
+            "confidence": "low",
+            "fallback_targets": fallback_targets,
+            "retrieval": hits,
+            "note": f"LLM synthesis failed ({type(exc).__name__}). Read the listed files to answer.",
+            "_meta": _build_meta(
+                timing_ms=(time.perf_counter() - t0) * 1000,
+                hint=_answer_hint("low", len(hits)),
+            ),
+        }
+
+    citations = [
+        h["target_path"] for h in hits if h["target_path"] and h["target_path"] in answer_text
+    ]
+    if not citations:
+        # Fall back to top-2 retrieval paths so the agent always has something to verify.
+        citations = fallback_targets[:2]
+
+    # Compute confidence from the dominance ratio (top hit vs second hit).
+    # The dominance ratio is a more reliable separator than absolute BM25
+    # thresholds, which tend to label most retrievals "high" indiscriminately.
+    if len(hits) >= 2:
+        _top = hits[0].get("score", 0.0)
+        _second = hits[1].get("score", 0.0) or 1e-9
+        _ratio = _top / _second
+    else:
+        _ratio = float("inf") if hits else 0.0
+    if _ratio >= _DOMINANCE_RATIO:
+        confidence = "high"
+    else:
+        confidence = "medium"
+
+    payload = {
+        "answer": answer_text,
+        "citations": citations,
+        "confidence": confidence,
+        "fallback_targets": fallback_targets,
+        "retrieval": hits,
+    }
+    # When confidence is high, document what the signal means so the consumer
+    # knows it can cite the answer directly without falling back to Read.
+    if confidence == "high":
+        payload["note"] = (
+            "High confidence: top retrieval result clearly dominates "
+            f"(dominance ratio {_ratio:.2f}x). Cite this answer directly; "
+            "no further verification needed unless the question explicitly "
+            "requires checking additional files."
+        )
+
+    # Persist to cache. Best-effort: cache failures must NEVER block the
+    # response (we already have the answer in hand).
+    if answer_text:
+        with contextlib.suppress(Exception):
+            async with get_session(_state._session_factory) as session:
+                row = AnswerCache(
+                    repository_id=repo_id,
+                    question_hash=qhash,
+                    question=question.strip(),
+                    payload_json=_json.dumps(payload),
+                    provider_name=getattr(provider, "provider_name", "") or "",
+                    model_name=getattr(provider, "model_name", "") or "",
+                )
+                session.add(row)
+                await session.commit()
+
+    payload["_meta"] = _build_meta(
+        timing_ms=(time.perf_counter() - t0) * 1000,
+        hint=_answer_hint(confidence, len(hits)),
+    )
+    return payload
diff --git a/packages/server/src/repowise/server/mcp_server/tool_context.py b/packages/server/src/repowise/server/mcp_server/tool_context.py
index daf8631..2214b1e 100644
--- a/packages/server/src/repowise/server/mcp_server/tool_context.py
+++ b/packages/server/src/repowise/server/mcp_server/tool_context.py
@@ -4,8 +4,28 @@
 
 import asyncio
 import json
+import logging
 from typing import Any
 
+logger = logging.getLogger(__name__)
+
+# --- Output size budget -------------------------------------------------------
+# The Claude Code harness rejects MCP tool results whose stringified form exceeds
+# ~10k tokens (it refuses to inline them and then refuses to Read the spilled
+# file). When that happens the agent falls back to multiple get_symbol calls,
+# each of which re-plays the cached system prompt — a significant cost driver
+# on dense files in long multi-turn agent sessions.
+#
+# We therefore cap get_context output well below that ceiling. 8000 tokens
+# leaves headroom for the wrapping JSON envelope and _meta fields the harness
+# adds on top. The estimator is intentionally dependency-free: 4 chars/token is
+# the widely-quoted average for English + code on BPE tokenizers and is within
+# ~20% of tiktoken for typical wiki content. Precise counting is unnecessary
+# because we only need to stay comfortably under the hard limit.
+_TOKEN_BUDGET = 8000
+_CHARS_PER_TOKEN = 4
+_CHAR_BUDGET = _TOKEN_BUDGET * _CHARS_PER_TOKEN
+
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 
@@ -14,20 +34,48 @@
     DecisionRecord,
     GitMetadata,
     GraphEdge,
+    GraphNode,
     Page,
     Repository,
     WikiSymbol,
 )
 from repowise.server.mcp_server import _state
 from repowise.server.mcp_server._helpers import _get_repo
+from repowise.server.mcp_server._meta import build_meta as _build_meta
+from repowise.server.mcp_server._meta import context_hint as _context_hint
 from repowise.server.mcp_server._server import mcp
 
 
+def _synthesize_structural_summary(
+    file_path: str, classes: list[str], functions: list[str]
+) -> str:
+    """Build a deterministic 1-line summary when no LLM-generated summary exists.
+
+    Used in --index-only mode (no wiki pages) and as a fallback when an LLM
+    page predates the summary column. Always returns a non-empty string so the
+    agent never sees a missing field.
+    """
+    name = file_path.rsplit("/", 1)[-1]
+    parts: list[str] = []
+    if classes:
+        head = ", ".join(classes[:3])
+        more = f" (+{len(classes) - 3} more)" if len(classes) > 3 else ""
+        parts.append(f"defines {head}{more}")
+    if functions:
+        head = ", ".join(functions[:3])
+        more = f" (+{len(functions) - 3} more)" if len(functions) > 3 else ""
+        parts.append(f"function{'s' if len(functions) > 1 else ''} {head}{more}")
+    if not parts:
+        return f"{name}: empty or non-symbol file"
+    return f"{name}: " + "; ".join(parts) + "."
+
+
 async def _resolve_one_target(
     session: AsyncSession,
     repository: Repository,
     target: str,
     include: set[str] | None,
+    compact: bool = False,
 ) -> dict:
     """Resolve a single target and return its full context."""
     repo_id = repository.id
@@ -102,57 +150,76 @@ async def _resolve_one_target(
                     file_path_for_git = target
 
     if target_type is None:
-        # F1: check git_metadata — file may exist but have no wiki page
+        # Fallback 1: index-only mode (no wiki pages) — return graph node + symbols if present
         res = await session.execute(
-            select(GitMetadata).where(
-                GitMetadata.repository_id == repo_id,
-                GitMetadata.file_path == target,
+            select(GraphNode).where(
+                GraphNode.repository_id == repo_id,
+                GraphNode.node_id == target,
             )
         )
-        meta = res.scalar_one_or_none()
-        if meta:
-            return {
-                "target": target,
-                "error": (
-                    f"'{target}' exists in the repository but has no wiki page. "
-                    "This usually means the file has too few symbols or is below "
-                    "the PageRank threshold. Run `repowise update` to regenerate docs."
-                ),
-                "exists_in_git": True,
-                "last_commit_at": meta.last_commit_at.isoformat() if meta.last_commit_at else None,
-                "primary_owner": meta.primary_owner_name,
-                "is_hotspot": meta.is_hotspot,
-            }
-
-        # F5: fuzzy path suggestions — match by filename or partial path
-        tail = target.rsplit("/", 1)[-1]
-        res = await session.execute(
-            select(GitMetadata.file_path)
-            .where(
-                GitMetadata.repository_id == repo_id,
-                GitMetadata.file_path.contains(tail),
+        gnode = res.scalar_one_or_none()
+        if gnode is not None:
+            target_type = "file"
+            file_path_for_git = target
+            page = None  # no wiki page; subsequent blocks must guard for this
+
+        # Fallback 2: check git_metadata — file may exist but have no wiki page AND no graph node
+        if target_type is None:
+            res = await session.execute(
+                select(GitMetadata).where(
+                    GitMetadata.repository_id == repo_id,
+                    GitMetadata.file_path == target,
+                )
             )
-            .limit(5)
-        )
-        suggestions = [row[0] for row in res.all() if row[0] != target]
-        if suggestions:
-            return {
-                "target": target,
-                "error": f"Target not found: '{target}'",
-                "suggestions": suggestions,
-            }
+            meta = res.scalar_one_or_none()
+            if meta:
+                return {
+                    "target": target,
+                    "error": (
+                        f"'{target}' exists in the repository but has no wiki page. "
+                        "This usually means the file has too few symbols or is below "
+                        "the PageRank threshold. Run `repowise update` to regenerate docs."
+                    ),
+                    "exists_in_git": True,
+                    "last_commit_at": meta.last_commit_at.isoformat() if meta.last_commit_at else None,
+                    "primary_owner": meta.primary_owner_name,
+                    "is_hotspot": meta.is_hotspot,
+                }
 
-        return {"target": target, "error": f"Target not found: '{target}'"}
+        # Fallback 3: fuzzy path suggestions — match by filename or partial path.
+        # Only runs if the prior fallbacks didn't resolve the target.
+        if target_type is None:
+            tail = target.rsplit("/", 1)[-1]
+            res = await session.execute(
+                select(GitMetadata.file_path)
+                .where(
+                    GitMetadata.repository_id == repo_id,
+                    GitMetadata.file_path.contains(tail),
+                )
+                .limit(5)
+            )
+            suggestions = [row[0] for row in res.all() if row[0] != target]
+            if suggestions:
+                return {
+                    "target": target,
+                    "error": f"Target not found: '{target}'",
+                    "suggestions": suggestions,
+                }
+            return {"target": target, "error": f"Target not found: '{target}'"}
 
     result_data["target"] = target
     result_data["type"] = target_type
 
     # --- Docs ---
     if include is None or "docs" in include:
+        want_full_doc = bool(include and "full_doc" in include)
         docs: dict[str, Any] = {}
         if target_type == "file":
-            docs["title"] = page.title
-            docs["content_md"] = page.content
+            if page is not None:
+                docs["title"] = page.title
+                docs["summary"] = page.summary or ""
+                if want_full_doc:
+                    docs["content_md"] = page.content
             # Symbols in this file
             res = await session.execute(
                 select(WikiSymbol).where(
@@ -161,22 +228,73 @@ async def _resolve_one_target(
                 )
             )
             symbols = res.scalars().all()
-            docs["symbols"] = [
-                {"name": s.name, "kind": s.kind, "signature": s.signature} for s in symbols
-            ]
-            # Importers
-            res = await session.execute(
-                select(GraphEdge).where(
-                    GraphEdge.repository_id == repo_id,
-                    GraphEdge.target_node_id == target,
+            classes = [s.name for s in symbols if s.kind == "class"]
+            functions = [s.name for s in symbols if s.kind in ("function", "method")]
+            if compact:
+                # Compact mode: name+kind+signature+line only. Drop docstring,
+                # start_line/end_line range, structure block, and imported_by.
+                # Used by agents that already know the file and just want a
+                # cheap signature index.
+                docs["symbols"] = [
+                    {
+                        "name": s.name,
+                        "kind": s.kind,
+                        "signature": s.signature,
+                        "line": s.start_line,
+                    }
+                    for s in symbols
+                ]
+                if not docs.get("summary"):
+                    docs["summary"] = _synthesize_structural_summary(
+                        target, classes, functions
+                    )
+            else:
+                docs["symbols"] = [
+                    {
+                        "name": s.name,
+                        "kind": s.kind,
+                        "signature": s.signature,
+                        "start_line": s.start_line,
+                        "end_line": s.end_line,
+                        "docstring": (s.docstring or "")[:400],
+                    }
+                    for s in symbols
+                ]
+                # Structure summary block — quick scan of what's in the file
+                total_loc = max((s.end_line for s in symbols), default=0)
+                avg_complexity = (
+                    sum(s.complexity_estimate for s in symbols) / len(symbols)
+                    if symbols
+                    else 0
                 )
-            )
-            importers = res.scalars().all()
-            docs["imported_by"] = [e.source_node_id for e in importers]
+                docs["structure"] = {
+                    "classes": classes,
+                    "functions": functions,
+                    "symbol_count": len(symbols),
+                    "total_loc": total_loc,
+                    "avg_complexity": round(avg_complexity, 2),
+                }
+                # Fallback summary: if no Page (index-only mode) or page.summary
+                # is empty, synthesize a deterministic one-liner from structure.
+                if not docs.get("summary"):
+                    docs["summary"] = _synthesize_structural_summary(
+                        target, classes, functions
+                    )
+                # Importers
+                res = await session.execute(
+                    select(GraphEdge).where(
+                        GraphEdge.repository_id == repo_id,
+                        GraphEdge.target_node_id == target,
+                    )
+                )
+                importers = res.scalars().all()
+                docs["imported_by"] = [e.source_node_id for e in importers]
 
         elif target_type == "module":
             docs["title"] = page.title
-            docs["content_md"] = page.content
+            docs["summary"] = page.summary or ""
+            if want_full_doc:
+                docs["content_md"] = page.content
             # Child file pages
             res = await session.execute(
                 select(Page).where(
@@ -203,10 +321,13 @@ async def _resolve_one_target(
             docs["signature"] = sym.signature
             docs["file_path"] = sym.file_path
             docs["docstring"] = sym.docstring or ""
-            # File page content as documentation
+            # File page summary (full content gated behind include=["full_doc"])
             sym_page_id = f"file_page:{sym.file_path}"
             sym_page = await session.get(Page, sym_page_id)
-            docs["documentation"] = sym_page.content if sym_page else ""
+            if sym_page is not None:
+                docs["file_summary"] = sym_page.summary or ""
+                if want_full_doc:
+                    docs["documentation"] = sym_page.content
             # Used by
             res = await session.execute(
                 select(GraphEdge).where(
@@ -349,39 +470,241 @@ async def _resolve_one_target(
     return result_data
 
 
+def _estimate_tokens(obj: Any) -> int:
+    """Cheap upper-bound token estimate for an arbitrary JSON-serialisable object.
+
+    Serialises to compact JSON (the wire format the MCP layer eventually emits)
+    and divides by ``_CHARS_PER_TOKEN``. We use the serialised form — not just
+    raw text fields — because structural JSON overhead (quotes, braces, field
+    names) is non-trivial and is what the downstream tokenizer actually sees.
+    """
+    return len(json.dumps(obj, separators=(",", ":"), default=str)) // _CHARS_PER_TOKEN
+
+
+# Heavy optional fields we can strip from a target's docs block without losing
+# its identity. Ordering matters: earlier entries are dropped first because they
+# carry the most bytes per unit of navigational value.
+_HEAVY_DOC_FIELDS: tuple[str, ...] = ("content_md", "documentation", "file_summary")
+
+
+def _symbol_priority(sym: dict[str, Any], query_terms: set[str]) -> tuple[int, int, int]:
+    """Return a sort key (higher = keep) for a symbol within a target.
+
+    Priority order (language-agnostic — no Python-specific heuristics):
+      1. Exact name match against any user query term.
+      2. Substring / case-insensitive match against query terms.
+      3. Kind rank: classes/types outrank functions/methods which outrank the
+         rest. This mirrors navigational usefulness across Python, TS, Go,
+         Rust, C++, etc. where a type anchors a module more than a helper fn.
+      4. PageRank / centrality if present on the dict (forward-compatible —
+         ``get_context`` doesn't currently populate it but ``_resolve_one_target``
+         may in the future).
+    """
+    name = (sym.get("name") or "").lower()
+    exact = 1 if name and name in query_terms else 0
+    fuzzy = 1 if any(t and t in name for t in query_terms) else 0
+    kind = (sym.get("kind") or "").lower()
+    kind_rank = {
+        "class": 3, "interface": 3, "struct": 3, "trait": 3, "type": 3, "enum": 3,
+        "function": 2, "method": 2,
+    }.get(kind, 1)
+    centrality = int((sym.get("pagerank") or sym.get("centrality") or 0) * 1000)
+    return (exact * 10 + fuzzy * 5 + kind_rank, centrality, -len(json.dumps(sym, default=str)))
+
+
+def _query_terms_for(target: str) -> set[str]:
+    """Derive cheap query terms from a target string for symbol prioritisation.
+
+    ``get_context`` has no explicit query argument, so we fall back to the
+    target identifier itself — the tail of a file path, or the raw symbol name.
+    This is deliberately coarse: it just nudges symbol retention toward the
+    thing the caller asked about.
+    """
+    tail = target.rsplit("/", 1)[-1].lower()
+    # Strip common extension if present (language-agnostic: split once on '.').
+    if "." in tail:
+        tail = tail.rsplit(".", 1)[0]
+    return {t for t in (tail, target.lower()) if t}
+
+
+def _truncate_to_budget(
+    result: dict[str, Any],
+    char_budget: int = _CHAR_BUDGET,
+) -> dict[str, Any]:
+    """Cap the ``get_context`` response at roughly ``_TOKEN_BUDGET`` tokens.
+
+    Strategy (applied in order, stopping as soon as the budget is met):
+
+    1. **Strip heavy optional doc fields** (``content_md``, ``documentation``,
+       ``file_summary``) from each target. These are 1–2k tokens apiece and
+       duplicate information the agent can re-request via ``full_doc``.
+    2. **Shrink symbol lists within each target**, keeping the highest-priority
+       symbols per ``_symbol_priority``. This preserves the navigational index
+       (names, signatures, line numbers) while dropping bulk docstrings.
+    3. **Drop whole targets** from the tail of the list. Per spec we prefer
+       keeping fewer full-fidelity targets over many stubs, so once symbols
+       can't shrink further we evict entire targets rather than gutting them.
+
+    Adds ``truncated: bool``, ``dropped_targets: list[str]``, and
+    ``dropped_symbols: dict[target, list[name]]`` top-level fields — additive
+    only, existing callers are unaffected.
+
+    Edge cases:
+      * Empty ``targets`` → returns unchanged with ``truncated=False``.
+      * A single target whose symbol list alone busts the budget → we reduce
+        symbols down to 1 and accept the overshoot rather than returning an
+        empty response. The ``truncated`` flag still fires.
+      * Targets that carry an ``error`` field (not-found) are cheap and are
+        preserved unless literally nothing else fits.
+    """
+    result.setdefault("truncated", False)
+    result.setdefault("dropped_targets", [])
+    result.setdefault("dropped_symbols", {})
+
+    targets: dict[str, Any] = result.get("targets") or {}
+    if not targets:
+        return result
+
+    def _size() -> int:
+        return len(json.dumps(result, separators=(",", ":"), default=str))
+
+    if _size() <= char_budget:
+        return result
+
+    # Stage 1: strip heavy optional doc fields across all targets.
+    for name, tgt in targets.items():
+        docs = tgt.get("docs") if isinstance(tgt, dict) else None
+        if not isinstance(docs, dict):
+            continue
+        for field in _HEAVY_DOC_FIELDS:
+            if field in docs:
+                docs.pop(field, None)
+                result["truncated"] = True
+        if _size() <= char_budget:
+            return result
+
+    # Stage 2: prioritise symbols within each target. We iterate from the
+    # largest target down so the biggest offenders shrink first.
+    def _target_cost(item: tuple[str, Any]) -> int:
+        return len(json.dumps(item[1], default=str))
+
+    for tgt_name, tgt in sorted(targets.items(), key=_target_cost, reverse=True):
+        docs = tgt.get("docs") if isinstance(tgt, dict) else None
+        if not isinstance(docs, dict):
+            continue
+        symbols = docs.get("symbols")
+        if not isinstance(symbols, list) or not symbols:
+            continue
+        query_terms = _query_terms_for(tgt_name)
+        ordered = sorted(symbols, key=lambda s: _symbol_priority(s, query_terms), reverse=True)
+        kept: list[dict[str, Any]] = []
+        dropped: list[str] = []
+        # Add symbols one at a time from highest priority until we hit the cap.
+        for sym in ordered:
+            docs["symbols"] = kept + [sym]
+            if _size() <= char_budget:
+                kept.append(sym)
+            else:
+                dropped.append(sym.get("name") or "<anonymous>")
+        if not kept and ordered:
+            # Edge case: a single symbol is larger than the budget. Keep one
+            # (truncating its docstring) rather than returning zero symbols —
+            # the caller at least learns the target resolved.
+            head = dict(ordered[0])
+            if isinstance(head.get("docstring"), str):
+                head["docstring"] = head["docstring"][:200]
+            kept = [head]
+            dropped = [s.get("name") or "<anonymous>" for s in ordered[1:]]
+        docs["symbols"] = kept
+        if dropped:
+            result["dropped_symbols"][tgt_name] = dropped
+            result["truncated"] = True
+        if _size() <= char_budget:
+            return result
+
+    # Stage 3: drop whole targets, largest first, until we fit. Prefer to keep
+    # error-only targets (they're tiny and signal "not found" to the caller).
+    def _evictable_order() -> list[str]:
+        items = list(targets.items())
+        items.sort(
+            key=lambda kv: (
+                0 if isinstance(kv[1], dict) and "error" in kv[1] else 1,
+                len(json.dumps(kv[1], default=str)),
+            ),
+            reverse=True,
+        )
+        return [k for k, _ in items]
+
+    for name in _evictable_order():
+        if len(targets) <= 1:
+            break
+        targets.pop(name, None)
+        result["dropped_targets"].append(name)
+        result["truncated"] = True
+        if _size() <= char_budget:
+            break
+
+    if result["truncated"]:
+        logger.info(
+            "get_context truncated to budget",
+            extra={
+                "char_budget": char_budget,
+                "token_budget": _TOKEN_BUDGET,
+                "final_chars": _size(),
+                "dropped_targets": result["dropped_targets"],
+                "dropped_symbol_counts": {
+                    k: len(v) for k, v in result["dropped_symbols"].items()
+                },
+            },
+        )
+    return result
+
+
 @mcp.tool()
 async def get_context(
     targets: list[str],
     include: list[str] | None = None,
+    compact: bool = True,
     repo: str | None = None,
 ) -> dict:
-    """Get complete context for one or more targets (files, modules, or symbols).
+    """Get a compact navigation index for files/modules/symbols. Batch targets.
 
-    Pass ALL relevant targets in a single call rather than calling this tool
-    multiple times. Each target is resolved automatically — pass file paths
-    like "src/auth/service.py", module paths like "src/auth", or symbol names
-    like "AuthService".
+    Default returns title + summary + symbol signatures (~3× smaller than
+    full mode). For full structure block + imported_by + docstrings pass
+    compact=False. For the full wiki content_md pass include=["docs","full_doc"].
 
-    Example: get_context(["src/auth/service.py", "src/auth/middleware.py", "AuthService"])
-
-    Optional `include` parameter filters response fields:
-    ["docs", "ownership", "last_change", "decisions", "freshness"]
-    Default: all fields returned.
+    Example: get_context(["src/auth/service.py", "src/auth/middleware.py"])
 
     Args:
-        targets: List of file paths, module paths, or symbol names to look up.
-        include: Optional list of fields to include. Default returns all.
-        repo: Repository path, name, or ID.
+        targets: file paths, module paths, or symbol names.
+        include: ["docs"] (default) | add "full_doc"/"ownership"/"last_change".
+        compact: default True (signatures only). False adds structure+imports+docstrings.
+        repo: usually omitted.
     """
-    include_set = set(include) if include else None
+    # Default to docs-only when include is omitted. The other blocks
+    # (ownership/last_change/decisions/freshness) are 200–500 bytes each and
+    # bloat every subsequent agent turn via cache replay. Callers that want
+    # them must pass include explicitly.
+    include_set = set(include) if include else {"docs"}
 
+    import time as _time
+    _t0 = _time.perf_counter()
     async with get_session(_state._session_factory) as session:
         repository = await _get_repo(session, repo)
 
         results = await asyncio.gather(
-            *[_resolve_one_target(session, repository, t, include_set) for t in targets]
+            *[
+                _resolve_one_target(session, repository, t, include_set, compact)
+                for t in targets
+            ]
         )
 
-    return {
+    response: dict[str, Any] = {
         "targets": {r["target"]: r for r in results},
+        "_meta": _build_meta(
+            timing_ms=(_time.perf_counter() - _t0) * 1000,
+            hint=_context_hint(targets, compact),
+        ),
     }
+    # Enforce the global token cap. See ``_truncate_to_budget`` for strategy.
+    return _truncate_to_budget(response)
diff --git a/packages/server/src/repowise/server/mcp_server/tool_risk.py b/packages/server/src/repowise/server/mcp_server/tool_risk.py
index 7cad1a7..fa48127 100644
--- a/packages/server/src/repowise/server/mcp_server/tool_risk.py
+++ b/packages/server/src/repowise/server/mcp_server/tool_risk.py
@@ -220,8 +220,15 @@ async def _assess_one_target(
 
     hotspot_score = meta.churn_percentile or 0.0
 
-    # Co-change partners
+    # Co-change partners — keep only the top-5 by frequency. Larger lists make
+    # MCP responses verbose without adding signal: top-5 captures the bulk of
+    # the temporal-coupling mass and keeps tool output tight for LLM agents.
     partners = json.loads(meta.co_change_partners_json)
+    partners_sorted = sorted(
+        partners,
+        key=lambda p: p.get("co_change_count", p.get("count", 0)) or 0,
+        reverse=True,
+    )[:5]
     import_related = import_links.get(target, set())
     co_changes = [
         {
@@ -230,7 +237,7 @@ async def _assess_one_target(
             "last_co_change": p.get("last_co_change"),
             "has_import_link": p.get("file_path", p.get("path", "")) in import_related,
         }
-        for p in partners
+        for p in partners_sorted
     ]
 
     owner = meta.primary_owner_name or "unknown"
diff --git a/packages/server/src/repowise/server/mcp_server/tool_search.py b/packages/server/src/repowise/server/mcp_server/tool_search.py
index f5e3869..4c00878 100644
--- a/packages/server/src/repowise/server/mcp_server/tool_search.py
+++ b/packages/server/src/repowise/server/mcp_server/tool_search.py
@@ -24,13 +24,15 @@ async def search_codebase(
     page_type: str | None = None,
     repo: str | None = None,
 ) -> dict:
-    """Semantic search over the full wiki. Ask in natural language.
+    """Semantic search over the wiki. Use when ``get_answer`` did not return
+    a confident result and you need to discover candidate pages by topic.
 
     Args:
-        query: Natural language search query (e.g. "how does authentication work?").
-        limit: Maximum results to return (default 5).
-        page_type: Optional filter by page type (file_page, module_page, etc.).
-        repo: Repository path, name, or ID.
+        query: natural-language search query.
+        limit: maximum number of results to return (default 5).
+        page_type: optional filter on page kind (e.g. ``file_page``,
+            ``module_page``, ``symbol_spotlight``).
+        repo: repository identifier; usually omitted in single-repo deployments.
     """
     async with get_session(_state._session_factory) as session:
         # Ensure repo exists
diff --git a/packages/server/src/repowise/server/mcp_server/tool_symbol.py b/packages/server/src/repowise/server/mcp_server/tool_symbol.py
new file mode 100644
index 0000000..29cd03b
--- /dev/null
+++ b/packages/server/src/repowise/server/mcp_server/tool_symbol.py
@@ -0,0 +1,368 @@
+"""MCP Tool: get_symbol — byte-precise source retrieval for a single symbol.
+
+This is the structural counterpart to get_context. Where get_context returns
+file-level narrative (summary, symbol list, importers), get_symbol returns
+the actual source bytes of one named symbol — function body, class body, or
+method — by slicing the on-disk source file using the line range stored on
+the WikiSymbol row at index time.
+
+Why a separate tool instead of "include source" on get_context?
+  * Granularity: a single function is ~30 lines vs a 300-line file. Cuts the
+    cached prompt prefix by ~10× when the agent only needs one symbol.
+  * Predictability: response size is bounded by the symbol size, never the
+    file size — no surprise 50 KB payloads.
+  * No reparsing: the bytes come straight from disk via the persisted line
+    range. Tree-sitter never runs at retrieval time.
+
+The tool is intentionally additive — get_context remains the right call for
+"explain this file" or "what's the relationship between A and B" questions.
+get_symbol is for "show me the body of this function".
+
+Resolution strategy (in order):
+  1. Exact match on WikiSymbol.symbol_id (the canonical "{path}::{name}" key)
+  2. Exact match on (file_path, qualified_name) — supports class.method form
+  3. Exact match on (file_path, name) — supports unqualified names
+
+Returns a flat dict (not wrapped in `targets`) so the agent can pipe the
+`source` field straight to its scratch buffer.
+"""
+
+from __future__ import annotations
+
+import time
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import select
+
+from repowise.core.persistence.database import get_session
+from repowise.core.persistence.models import WikiSymbol
+from repowise.server.mcp_server import _state
+from repowise.server.mcp_server._helpers import _get_repo
+from repowise.server.mcp_server._meta import build_meta as _build_meta
+from repowise.server.mcp_server._meta import symbol_hint as _symbol_hint
+from repowise.server.mcp_server._server import mcp
+
+_log = __import__("logging").getLogger("repowise.mcp.symbol")
+
+# Safety cap so a misconfigured WikiSymbol row pointing at a giant file
+# can never blow up the agent's context window. Tuned to ~12 KB of source.
+_MAX_SOURCE_LINES = 400
+
+
+def _parse_symbol_id(symbol_id: str) -> tuple[str | None, str | None]:
+    """Split a "{path}::{name}" id. Either side may be None if missing.
+
+    Tolerant of double-colons in qualified names like "Foo::Bar::baz" by
+    splitting on the FIRST "::" only — the first segment is always the file
+    path. Returns (file_path, name) where name may itself contain "::" for
+    nested qualified forms ("Class::method").
+    """
+    if not symbol_id or "::" not in symbol_id:
+        return symbol_id or None, None
+    file_part, _, name_part = symbol_id.partition("::")
+    return (file_part or None, name_part or None)
+
+
+# Separators used between name segments AFTER the file path. Different
+# languages use different conventions: Python/TS/Go use ".", C++/Rust use
+# "::", and some tools emit "/". The lookup must be uniform across all of
+# them — we never encode a single language's rule.
+_NAME_SEPARATORS = (".", "::", "/")
+
+
+def _name_variants(name: str) -> list[str]:
+    """Generate all separator variants of a qualified name segment.
+
+    Given "App.update_template_context" we yield the same name with every
+    supported separator between segments, so a DB storing "App::method"
+    still resolves when the agent passed dot-form (or vice versa).
+
+    Operates only on the *name* (post file-path), never on the path itself.
+    """
+    if not name:
+        return []
+    # Split on any of the known separators to get atomic segments.
+    segments = [name]
+    for sep in _NAME_SEPARATORS:
+        next_segments: list[str] = []
+        for seg in segments:
+            next_segments.extend(seg.split(sep))
+        segments = next_segments
+    segments = [s for s in segments if s]
+    if not segments:
+        return [name]
+    variants: list[str] = []
+    seen: set[str] = set()
+    for sep in _NAME_SEPARATORS:
+        v = sep.join(segments)
+        if v not in seen:
+            seen.add(v)
+            variants.append(v)
+    # Also include the original as-is in case it used a mixed separator.
+    if name not in seen:
+        variants.append(name)
+    return variants
+
+
+def _symbol_id_variants(symbol_id: str) -> list[str]:
+    """Generate {file_path}::{name_variant} for every name separator form."""
+    file_path, name = _parse_symbol_id(symbol_id)
+    if not file_path or not name:
+        return [symbol_id]
+    out: list[str] = []
+    seen: set[str] = set()
+    for nv in _name_variants(name):
+        sid = f"{file_path}::{nv}"
+        if sid not in seen:
+            seen.add(sid)
+            out.append(sid)
+    if symbol_id not in seen:
+        out.append(symbol_id)
+    return out
+
+
+def _bare_name(name: str) -> str:
+    """Return the last name segment regardless of separator style."""
+    tail = name
+    for sep in _NAME_SEPARATORS:
+        tail = tail.rsplit(sep, 1)[-1]
+    return tail
+
+
+def _pick_canonical(
+    rows: list[WikiSymbol], queried_file_path: str | None
+) -> WikiSymbol | None:
+    """Deterministically select one row from a candidate list.
+
+    Priority:
+      1. file_path matches the file_path embedded in the queried symbol_id
+      2. deterministic tiebreak on the (id) primary key (ascending)
+    """
+    if not rows:
+        return None
+    if len(rows) == 1:
+        return rows[0]
+    if queried_file_path:
+        matching = [r for r in rows if r.file_path == queried_file_path]
+        if matching:
+            rows = matching
+    # Deterministic: lowest id wins. (No confidence column today; leaving
+    # room to add one without changing the fallback.)
+    rows_sorted = sorted(rows, key=lambda r: (r.id or ""))
+    if len(rows) > 1:
+        _log.warning(
+            "get_symbol: %d duplicate rows for lookup (file=%s); picked id=%s",
+            len(rows),
+            queried_file_path,
+            rows_sorted[0].id,
+        )
+    return rows_sorted[0]
+
+
+async def _resolve_symbol(
+    session, repo_id: str, symbol_id: str
+) -> WikiSymbol | None:
+    """Look up a symbol by id, qualified_name, or bare name. None if absent.
+
+    Language-agnostic: the qualified-name portion of the symbol_id is
+    normalized across ``.``, ``::`` and ``/`` separators before matching,
+    so callers can pass any of ``Class.method``, ``Class::method``, or
+    ``Class/method`` and still resolve. Only the name part is normalized —
+    file paths are never rewritten.
+
+    Duplicate-safe: every query uses ``.all()`` + :func:`_pick_canonical`
+    instead of ``.scalar_one_or_none()``. The canonical constraint
+    ``uq_wiki_symbol`` on ``(repository_id, symbol_id)`` already prevents
+    duplicates on the primary key, but the fallback lookups on
+    ``(file_path, qualified_name)`` and ``(file_path, name)`` can legitimately
+    return several rows (e.g. overloads, re-exports, conditional defs).
+    """
+    file_path, _name = _parse_symbol_id(symbol_id)
+    variants = _symbol_id_variants(symbol_id)
+
+    # 1. Exact symbol_id — try every separator variant.
+    res = await session.execute(
+        select(WikiSymbol).where(
+            WikiSymbol.repository_id == repo_id,
+            WikiSymbol.symbol_id.in_(variants),
+        )
+    )
+    rows = list(res.scalars().all())
+    picked = _pick_canonical(rows, file_path)
+    if picked is not None:
+        return picked
+
+    _, name = _parse_symbol_id(symbol_id)
+    if not name:
+        return None
+
+    name_variants = _name_variants(name)
+
+    # 2. Match on (file_path, qualified_name) across name variants.
+    if file_path:
+        res = await session.execute(
+            select(WikiSymbol).where(
+                WikiSymbol.repository_id == repo_id,
+                WikiSymbol.file_path == file_path,
+                WikiSymbol.qualified_name.in_(name_variants),
+            )
+        )
+        rows = list(res.scalars().all())
+        picked = _pick_canonical(rows, file_path)
+        if picked is not None:
+            return picked
+
+        # 3. Match on (file_path, name) — last segment of qualified name.
+        bare = _bare_name(name)
+        res = await session.execute(
+            select(WikiSymbol).where(
+                WikiSymbol.repository_id == repo_id,
+                WikiSymbol.file_path == file_path,
+                WikiSymbol.name == bare,
+            )
+        )
+        rows = list(res.scalars().all())
+        picked = _pick_canonical(rows, file_path)
+        if picked is not None:
+            return picked
+
+    return None
+
+
+def _slice_source(
+    repo_path: Path, file_path: str, start_line: int, end_line: int, context_lines: int
+) -> tuple[str, int, int, int | None]:
+    """Read the file and return (source, actual_start, actual_end, total_lines).
+
+    Returns ("", start, end, None) on read failure. Lines are 1-indexed in the
+    inputs and outputs to match WikiSymbol storage. Honors _MAX_SOURCE_LINES.
+    """
+    abs_path = (repo_path / file_path).resolve()
+    # Defense in depth: never read outside the repo root, even if the
+    # WikiSymbol.file_path was somehow tampered with.
+    try:
+        abs_path.relative_to(repo_path.resolve())
+    except ValueError:
+        _log.warning("get_symbol path escape attempt: %s", file_path)
+        return "", start_line, end_line, None
+    try:
+        text = abs_path.read_text(encoding="utf-8", errors="replace")
+    except OSError as exc:
+        _log.warning("get_symbol read failed for %s: %s", abs_path, exc)
+        return "", start_line, end_line, None
+
+    lines = text.splitlines()
+    total = len(lines)
+
+    # 1-indexed inclusive range, then expand by context_lines on both sides.
+    s = max(1, min(start_line, total))
+    e = max(s, min(end_line, total))
+    if context_lines > 0:
+        s = max(1, s - context_lines)
+        e = min(total, e + context_lines)
+
+    span = e - s + 1
+    if span > _MAX_SOURCE_LINES:
+        # Truncate from the tail; the head usually has the signature
+        # which is what the agent needs to ground itself.
+        e = s + _MAX_SOURCE_LINES - 1
+        span = _MAX_SOURCE_LINES
+
+    sliced = "\n".join(lines[s - 1 : e])
+    return sliced, s, e, total
+
+
+@mcp.tool()
+async def get_symbol(
+    symbol_id: str,
+    context_lines: int = 0,
+    repo: str | None = None,
+) -> dict:
+    """Return the exact source body of one symbol by ID. Trust the result.
+
+    Cheaper than Read for "show me X" / "what does Y do" questions.
+    Returns {file, name, kind, signature, start_line, end_line, source}.
+    On miss returns {error: "Symbol not found"}.
+
+    Args:
+        symbol_id: "path/to/file.py::SymbolName" (canonical id from get_context).
+        context_lines: extra source lines before/after (0–50, default 0).
+        repo: usually omitted.
+    """
+    t0 = time.perf_counter()
+    if not symbol_id or not symbol_id.strip():
+        return {
+            "symbol_id": symbol_id,
+            "error": "symbol_id is required",
+            "_meta": _build_meta(timing_ms=(time.perf_counter() - t0) * 1000),
+        }
+    if context_lines < 0 or context_lines > 50:
+        # Bound context_lines to a sane range — runaway values would
+        # defeat the whole point of symbol-level retrieval.
+        context_lines = max(0, min(50, context_lines))
+
+    async with get_session(_state._session_factory) as session:
+        repository = await _get_repo(session, repo)
+        row = await _resolve_symbol(session, repository.id, symbol_id)
+
+    if row is None:
+        return {
+            "symbol_id": symbol_id,
+            "error": (
+                f"Symbol not found: {symbol_id!r}. Use get_context to list "
+                "available symbols in the file, then try again with the "
+                "exact symbol_id from that response."
+            ),
+            "_meta": _build_meta(timing_ms=(time.perf_counter() - t0) * 1000),
+        }
+
+    if not _state._repo_path:
+        return {
+            "symbol_id": symbol_id,
+            "error": "MCP server has no repo path configured",
+            "_meta": _build_meta(timing_ms=(time.perf_counter() - t0) * 1000),
+        }
+
+    repo_root = Path(_state._repo_path)
+    source, start, end, total = _slice_source(
+        repo_root, row.file_path, row.start_line, row.end_line, context_lines
+    )
+
+    if not source:
+        return {
+            "symbol_id": symbol_id,
+            "file": row.file_path,
+            "name": row.name,
+            "kind": row.kind,
+            "signature": row.signature,
+            "error": (
+                "Symbol metadata exists but source file could not be read. "
+                "The file may have been moved or deleted since indexing."
+            ),
+            "_meta": _build_meta(timing_ms=(time.perf_counter() - t0) * 1000),
+        }
+
+    truncated = (end - start + 1) >= _MAX_SOURCE_LINES and (
+        row.end_line - row.start_line + 1 + 2 * context_lines
+    ) > _MAX_SOURCE_LINES
+
+    return {
+        "symbol_id": row.symbol_id,
+        "file": row.file_path,
+        "name": row.name,
+        "kind": row.kind,
+        "qualified_name": row.qualified_name,
+        "signature": row.signature,
+        "language": row.language,
+        "start_line": start,
+        "end_line": end,
+        "symbol_start_line": row.start_line,
+        "symbol_end_line": row.end_line,
+        "source": source,
+        "truncated": truncated,
+        "_meta": _build_meta(
+            timing_ms=(time.perf_counter() - t0) * 1000,
+            hint=_symbol_hint(row.symbol_id, row.end_line, row.start_line),
+        ),
+    }
diff --git a/plugins/claude-code/DEVELOPER.md b/plugins/claude-code/DEVELOPER.md
index d9edaf5..e2cd66e 100644
--- a/plugins/claude-code/DEVELOPER.md
+++ b/plugins/claude-code/DEVELOPER.md
@@ -46,7 +46,7 @@ repowise-plugin/                    # Standalone repo root
 
 **`.claude-plugin/marketplace.json`** — Makes the repo a self-hosted marketplace. The `plugins[].source: "."` tells Claude Code the plugin root is the repo root itself. Without this file, users can't `/plugin install` from this repo.
 
-**`.mcp.json`** — When the plugin is enabled, Claude Code auto-starts `repowise mcp` as an MCP server. This is what gives Claude access to the 8 tools. Uses `mcpServers` wrapper key. The `repowise` binary must be on PATH (the init command handles installation).
+**`.mcp.json`** — When the plugin is enabled, Claude Code auto-starts `repowise mcp` as an MCP server. This is what gives Claude access to the 10 tools. Uses `mcpServers` wrapper key. The `repowise` binary must be on PATH (the init command handles installation).
 
 **`commands/*.md`** — Markdown files that become `/repowise:<filename>` slash commands. Frontmatter defines `description`, `allowed-tools`, etc. The `$ARGUMENTS` placeholder captures user input after the command name.
 
diff --git a/tests/unit/ingestion/test_graph.py b/tests/unit/ingestion/test_graph.py
index f9069a1..702f627 100644
--- a/tests/unit/ingestion/test_graph.py
+++ b/tests/unit/ingestion/test_graph.py
@@ -185,6 +185,114 @@ def test_parallel_imports_merged(self) -> None:
         assert "bar" in names
 
 
+# ---------------------------------------------------------------------------
+# Stem disambiguation — protects against the historical PageRank inflation
+# bug where a test fixture named like the package (e.g. tests/.../flask.py)
+# was the only file with stem "flask" in the stem map (because the real
+# src/flask/__init__.py registered under stem "__init__"), so every internal
+# `from flask import X` resolved to the test fixture, giving it massive
+# in-degree and dominating PageRank.
+# ---------------------------------------------------------------------------
+
+
+class TestStemDisambiguation:
+    def test_init_py_registers_under_parent_dir(self) -> None:
+        """`from flask import X` resolves to src/flask/__init__.py, not a
+        test fixture named flask.py."""
+        b = GraphBuilder()
+        b.add_file(_parsed("src/flask/__init__.py"))
+        b.add_file(_parsed("tests/test_apps/cliapp/inner1/inner2/flask.py"))
+        b.add_file(_parsed("src/flask/app.py", imports=[_imp("flask")]))
+        b.build()
+        g = b.graph()
+        assert g.has_edge("src/flask/app.py", "src/flask/__init__.py")
+        assert not g.has_edge(
+            "src/flask/app.py", "tests/test_apps/cliapp/inner1/inner2/flask.py"
+        )
+
+    def test_test_fixture_loses_to_source_file(self) -> None:
+        """When two files share a stem and one is under tests/, the
+        non-test file wins regardless of insertion order."""
+        # Insert test file FIRST so dict iteration would have favored it
+        # under the old last-write-wins logic.
+        b = GraphBuilder()
+        b.add_file(_parsed("tests/fixtures/widget.py"))
+        b.add_file(_parsed("src/widget.py"))
+        b.add_file(_parsed("main.py", imports=[_imp("widget")]))
+        b.build()
+        g = b.graph()
+        assert g.has_edge("main.py", "src/widget.py")
+        assert not g.has_edge("main.py", "tests/fixtures/widget.py")
+
+    def test_resolution_is_deterministic_across_orderings(self) -> None:
+        """Two builders with files added in opposite orders must produce
+        the same edge — resolution cannot depend on dict iteration."""
+        files = ["src/widget.py", "tests/fixtures/widget.py", "examples/widget.py"]
+
+        def build_with_order(order: list[str]) -> str | None:
+            b = GraphBuilder()
+            for f in order:
+                b.add_file(_parsed(f))
+            b.add_file(_parsed("main.py", imports=[_imp("widget")]))
+            b.build()
+            edges = list(b.graph().out_edges("main.py"))
+            return edges[0][1] if edges else None
+
+        target_a = build_with_order(files)
+        target_b = build_with_order(list(reversed(files)))
+        assert target_a == target_b == "src/widget.py"
+
+    def test_parent_dir_match_beats_shorter_path(self) -> None:
+        """A nested file whose parent directory matches the stem beats a
+        shallower file whose parent doesn't — canonical package layout
+        is the strongest signal."""
+        b = GraphBuilder()
+        # Shallower path, parent dir doesn't match stem
+        b.add_file(_parsed("vendor/util.py"))
+        # Deeper path, but parent dir == stem (canonical layout)
+        b.add_file(_parsed("src/util/util.py"))
+        b.add_file(_parsed("main.py", imports=[_imp("util")]))
+        b.build()
+        assert b.graph().has_edge("main.py", "src/util/util.py")
+
+    def test_src_layout_direct_match(self) -> None:
+        """`from flask.app import X` finds src/flask/app.py via the new
+        src/ candidate, not via stem fallback."""
+        b = GraphBuilder()
+        b.add_file(_parsed("src/flask/app.py"))
+        # Decoy: another app.py with the same stem in a deep test tree.
+        b.add_file(_parsed("tests/test_apps/cliapp/app.py"))
+        b.add_file(_parsed("main.py", imports=[_imp("flask.app")]))
+        b.build()
+        assert b.graph().has_edge("main.py", "src/flask/app.py")
+
+    def test_repo_root_init_does_not_crash(self) -> None:
+        """A repo-root __init__.py has no parent directory name; it must
+        be skipped from the stem map without crashing the build."""
+        b = GraphBuilder()
+        b.add_file(_parsed("__init__.py"))
+        b.add_file(_parsed("main.py", imports=[_imp("anything")]))
+        b.build()  # must not raise
+        # No edge expected — stem "anything" is unresolvable
+        assert b.graph().number_of_edges() == 0
+
+    def test_go_stem_collision_prefers_parent_match(self) -> None:
+        """Go: `import .../calculator` prefers calculator/calculator.go
+        over a test fixture with the same filename."""
+        b = GraphBuilder()
+        b.add_file(_parsed("internal/testdata/calculator.go", language="go"))
+        b.add_file(_parsed("calculator/calculator.go", language="go"))
+        b.add_file(
+            _parsed(
+                "main.go",
+                language="go",
+                imports=[_imp("github.com/example/app/calculator")],
+            )
+        )
+        b.build()
+        assert b.graph().has_edge("main.go", "calculator/calculator.go")
+
+
 # ---------------------------------------------------------------------------
 # TypeScript import resolution
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/server/test_mcp.py b/tests/unit/server/test_mcp.py
index 4b751f2..1799c0e 100644
--- a/tests/unit/server/test_mcp.py
+++ b/tests/unit/server/test_mcp.py
@@ -704,6 +704,95 @@ async def test_get_context_not_found(setup_mcp):
     assert "error" in t
 
 
+# ---- get_context: truncation ----
+
+
+def _make_big_response(n_targets: int = 5, n_symbols: int = 80, body_chars: int = 4000) -> dict:
+    """Build a synthetic get_context response well over the 32 KB budget."""
+    targets = {}
+    for i in range(n_targets):
+        name = f"pkg/mod_{i}/file_{i}.ext"
+        targets[name] = {
+            "target": name,
+            "type": "file",
+            "docs": {
+                "title": f"File {i}",
+                "summary": "s" * 200,
+                "content_md": "x" * body_chars,
+                "symbols": [
+                    {
+                        "name": f"Sym{i}_{j}",
+                        "kind": "class" if j % 5 == 0 else "function",
+                        "signature": f"sig_{j}(...)",
+                        "start_line": j * 10,
+                        "end_line": j * 10 + 8,
+                        "docstring": "d" * 300,
+                    }
+                    for j in range(n_symbols)
+                ],
+            },
+        }
+    return {"targets": targets, "_meta": {"timing_ms": 1.0}}
+
+
+def test_truncate_to_budget_enforces_cap():
+    from repowise.server.mcp_server.tool_context import (
+        _CHAR_BUDGET,
+        _truncate_to_budget,
+    )
+
+    big = _make_big_response()
+    raw_size = len(json.dumps(big, separators=(",", ":"), default=str))
+    assert raw_size > _CHAR_BUDGET, "fixture must exceed budget to be meaningful"
+
+    out = _truncate_to_budget(big)
+    final_size = len(json.dumps(out, separators=(",", ":"), default=str))
+    assert final_size <= _CHAR_BUDGET
+    assert out["truncated"] is True
+    # At least one target must survive.
+    assert len(out["targets"]) >= 1
+
+
+def test_truncate_flags_and_dropped_fields_populate():
+    from repowise.server.mcp_server.tool_context import _truncate_to_budget
+
+    big = _make_big_response(n_targets=6, n_symbols=60, body_chars=5000)
+    out = _truncate_to_budget(big)
+
+    assert out["truncated"] is True
+    # Either whole targets were dropped, or individual symbols were dropped —
+    # both are acceptable outcomes; at least one must be populated.
+    dropped_any = bool(out["dropped_targets"]) or bool(out["dropped_symbols"])
+    assert dropped_any
+    # Heavy optional fields should have been stripped from surviving targets.
+    for tgt in out["targets"].values():
+        assert "content_md" not in tgt.get("docs", {})
+    # Dropped symbol lists (if any) must reference actual symbol names.
+    for tgt_name, names in out["dropped_symbols"].items():
+        assert tgt_name in big["targets"] or tgt_name not in out["targets"]
+        assert all(isinstance(n, str) for n in names)
+
+
+def test_truncate_noop_when_under_budget():
+    from repowise.server.mcp_server.tool_context import _truncate_to_budget
+
+    small = {
+        "targets": {
+            "a.py": {
+                "target": "a.py",
+                "type": "file",
+                "docs": {"title": "A", "symbols": [{"name": "f", "kind": "function"}]},
+            }
+        },
+        "_meta": {},
+    }
+    out = _truncate_to_budget(small)
+    assert out["truncated"] is False
+    assert out["dropped_targets"] == []
+    assert out["dropped_symbols"] == {}
+    assert "content_md" not in out["targets"]["a.py"]["docs"]  # wasn't there anyway
+
+
 # ---- Tool 3: get_risk ----
 
 
diff --git a/tests/unit/server/test_tool_symbol.py b/tests/unit/server/test_tool_symbol.py
new file mode 100644
index 0000000..6a9c1cb
--- /dev/null
+++ b/tests/unit/server/test_tool_symbol.py
@@ -0,0 +1,168 @@
+"""Tests for the get_symbol MCP tool resolution logic.
+
+These exercise :func:`_resolve_symbol` directly against a test DB session
+so we don't need to spin up the full MCP server. They cover two
+reliability bugs that previously caused unnecessary agent retries:
+
+  * Separator-style mismatch between ``Class.method`` and ``Class::method``
+  * ``MultipleResultsFound`` when duplicate rows share a lookup key
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from repowise.core.persistence.database import get_session
+from repowise.core.persistence.models import WikiSymbol, _new_uuid
+from repowise.server.mcp_server.tool_symbol import (
+    _name_variants,
+    _pick_canonical,
+    _resolve_symbol,
+    _symbol_id_variants,
+)
+from tests.unit.server.conftest import create_test_repo
+
+
+async def _add(session_factory, repo_id: str, **overrides):
+    defaults = dict(
+        id=_new_uuid(),
+        repository_id=repo_id,
+        file_path="src/flask/sansio/app.py",
+        symbol_id="src/flask/sansio/app.py::App::update_template_context",
+        name="update_template_context",
+        qualified_name="App::update_template_context",
+        kind="method",
+        signature="def update_template_context(self, context)",
+        start_line=1,
+        end_line=5,
+        visibility="public",
+        language="python",
+    )
+    defaults.update(overrides)
+    async with get_session(session_factory) as session:
+        session.add(WikiSymbol(**defaults))
+    return defaults["id"]
+
+
+def test_name_variants_language_agnostic() -> None:
+    # Dot form and double-colon form should yield the same set of variants.
+    dot = set(_name_variants("App.update_template_context"))
+    colon = set(_name_variants("App::update_template_context"))
+    assert dot == colon
+    assert "App.update_template_context" in dot
+    assert "App::update_template_context" in dot
+    assert "App/update_template_context" in dot
+
+    # Nested qualifiers (C++/Rust style) normalize too.
+    variants = set(_name_variants("ns::Outer::Inner::fn"))
+    assert "ns.Outer.Inner.fn" in variants
+    assert "ns::Outer::Inner::fn" in variants
+
+
+def test_symbol_id_variants_preserves_file_path() -> None:
+    # Path segments must NEVER be rewritten — only the name after "::".
+    sids = _symbol_id_variants("src/flask/sansio/app.py::App.method")
+    assert "src/flask/sansio/app.py::App.method" in sids
+    assert "src/flask/sansio/app.py::App::method" in sids
+    # The file path slashes should remain untouched.
+    for sid in sids:
+        assert sid.startswith("src/flask/sansio/app.py::")
+
+
+@pytest.mark.asyncio
+async def test_resolve_symbol_dot_and_colon_forms_equivalent(
+    client, app, session_factory
+) -> None:
+    repo = await create_test_repo(client)
+    await _add(session_factory, repo["id"])  # stored with "::" form
+
+    async with get_session(session_factory) as session:
+        row_colon = await _resolve_symbol(
+            session,
+            repo["id"],
+            "src/flask/sansio/app.py::App::update_template_context",
+        )
+        row_dot = await _resolve_symbol(
+            session,
+            repo["id"],
+            "src/flask/sansio/app.py::App.update_template_context",
+        )
+
+    assert row_colon is not None
+    assert row_dot is not None
+    assert row_colon.id == row_dot.id
+    assert row_dot.name == "update_template_context"
+
+
+@pytest.mark.asyncio
+async def test_resolve_symbol_duplicate_rows_picks_canonical(
+    client, app, session_factory
+) -> None:
+    """When the (file_path, qualified_name) lookup returns several rows,
+    we must return one canonical row instead of raising MultipleResultsFound.
+    """
+    repo = await create_test_repo(client)
+    # Two rows share the same (file_path, qualified_name, name) — simulates
+    # the 'from .x import y' re-export case.
+    await _add(
+        session_factory,
+        repo["id"],
+        id="aaaa" + "0" * 28,
+        symbol_id="src/flask/sansio/blueprints.py::BlueprintSetupState::add_url_rule#1",
+        file_path="src/flask/sansio/blueprints.py",
+        qualified_name="BlueprintSetupState::add_url_rule",
+        name="add_url_rule",
+    )
+    await _add(
+        session_factory,
+        repo["id"],
+        id="bbbb" + "0" * 28,
+        symbol_id="src/flask/sansio/blueprints.py::BlueprintSetupState::add_url_rule#2",
+        file_path="src/flask/sansio/blueprints.py",
+        qualified_name="BlueprintSetupState::add_url_rule",
+        name="add_url_rule",
+    )
+
+    async with get_session(session_factory) as session:
+        row = await _resolve_symbol(
+            session,
+            repo["id"],
+            "src/flask/sansio/blueprints.py::BlueprintSetupState.add_url_rule",
+        )
+
+    assert row is not None
+    # Deterministic tiebreak: lowest id wins.
+    assert row.id.startswith("aaaa")
+    assert row.file_path == "src/flask/sansio/blueprints.py"
+
+
+@pytest.mark.asyncio
+async def test_resolve_symbol_nonexistent_returns_none(
+    client, app, session_factory
+) -> None:
+    repo = await create_test_repo(client)
+    await _add(session_factory, repo["id"])
+
+    async with get_session(session_factory) as session:
+        row = await _resolve_symbol(
+            session,
+            repo["id"],
+            "src/flask/sansio/app.py::App.this_method_does_not_exist",
+        )
+
+    assert row is None
+
+
+def test_pick_canonical_prefers_matching_file_path() -> None:
+    class Fake:
+        def __init__(self, id_: str, path: str) -> None:
+            self.id = id_
+            self.file_path = path
+
+    rows = [Fake("zzzz", "other/path.py"), Fake("aaaa", "src/target.py")]
+    picked = _pick_canonical(rows, "src/target.py")  # type: ignore[arg-type]
+    assert picked.id == "aaaa"  # type: ignore[union-attr]
+
+    # No file_path hint: fall back to lowest id.
+    picked = _pick_canonical(rows, None)  # type: ignore[arg-type]
+    assert picked.id == "aaaa"  # type: ignore[union-attr]
diff --git a/website/claude-md-generator.md b/website/claude-md-generator.md
index 67059ff..760f0ab 100644
--- a/website/claude-md-generator.md
+++ b/website/claude-md-generator.md
@@ -47,7 +47,7 @@ What goes into the generated content:
 | Entry points | Detected `main.py`, `__main__`, CLI entry files |
 | Tech stack | Languages, frameworks, databases, infra |
 | Hotspots | Top 5 high-churn files with percentile rank |
-| MCP tool instructions | Mandatory usage table for all 8 tools |
+| MCP tool instructions | Mandatory usage table for all 10 tools |
 | Codebase conventions | Active architectural decisions + build commands |
 
 ---
diff --git a/website/concepts.md b/website/concepts.md
index 9305d09..36a4537 100644
--- a/website/concepts.md
+++ b/website/concepts.md
@@ -161,7 +161,7 @@ The dependency graph is loaded into memory at server startup for fast traversal.
 
 ## The MCP server
 
-The MCP server sits on top of the persistence layer and exposes everything to AI coding assistants via 8 tools. It's the primary interface between repowise and Claude Code, Cursor, Cline, or any other MCP-compatible editor.
+The MCP server sits on top of the persistence layer and exposes everything to AI coding assistants via 10 tools. It's the primary interface between repowise and Claude Code, Cursor, Cline, or any other MCP-compatible editor.
 
 When you run `repowise mcp`, the server starts in stdio mode and your editor can begin calling tools. The tools are designed to answer the questions an AI needs to make good decisions about your code — not just "what is this file" but "should I edit it", "why is it structured this way", and "what will break if I change it".
 
diff --git a/website/index.md b/website/index.md
index 9df1bb8..5cc2f78 100644
--- a/website/index.md
+++ b/website/index.md
@@ -24,7 +24,7 @@ repowise generates and maintains a structured wiki for any codebase. It tracks c
 | **Git intelligence** | Churn hotspots, ownership, bus factor, change patterns |
 | **Dead code detection** | Finds confirmed unused exports, functions, and types |
 | **Decision intelligence** | Captures *why* code is structured the way it is |
-| **MCP server** | 8 tools for AI assistants (Claude Code, Cursor, Windsurf, Cline) |
+| **MCP server** | 10 tools for AI assistants (Claude Code, Cursor, Windsurf, Cline) |
 | **Web dashboard** | Browse wiki, search, and explore architecture diagrams |
 | **Multi-language** | Python, TypeScript, JavaScript, Go, Rust, Java, C/C++, Kotlin, Ruby |
 
diff --git a/website/mcp-server.md b/website/mcp-server.md
index 41abdca..e1d421e 100644
--- a/website/mcp-server.md
+++ b/website/mcp-server.md
@@ -22,7 +22,7 @@ Connect repowise to Claude Code, Cursor, Cline, or any MCP-compatible editor.
 
 ## Overview
 
-The MCP (Model Context Protocol) server is how repowise talks to AI coding assistants. Once connected, your editor's AI can call 8 tools to query your codebase wiki — getting docs, ownership, risk signals, dependency paths, and architectural decisions in a single call.
+The MCP (Model Context Protocol) server is how repowise talks to AI coding assistants. Once connected, your editor's AI can call 10 tools to query your codebase wiki — synthesizing answers, looking up symbols, fetching docs, ownership, risk signals, dependency paths, and architectural decisions.
 
 Start the server with:
 
@@ -139,7 +139,55 @@ Clients connect to `http://localhost:7338/sse` and receive server-sent events. C
 
 ---
 
-## The 8 tools
+## The 10 tools
+
+### `get_answer(question, scope?)`
+
+One-call RAG over the wiki layer. Runs retrieval, gates on confidence, and synthesizes a 2–5 sentence answer with concrete file/symbol citations. Responses are cached per repository by question hash, so repeated questions cost nothing on the second call.
+
+**Parameters:**
+- `question` (string) — natural-language developer question
+- `scope` (optional, string) — path prefix to restrict retrieval (e.g. `"src/auth/"`)
+
+**Returns:**
+- `answer` (string) — synthesized 2–5 sentence answer
+- `citations` (list of strings) — file paths backing the answer
+- `confidence` (string) — `"high"`, `"medium"`, or `"low"`. High-confidence answers can be cited directly without verification reads; lower confidence indicates the agent should fall back to `search_codebase` or `Read`.
+- `fallback_targets` (list of strings) — top retrieval hits the agent should `Read` if it does not trust the synthesized answer
+- `retrieval` (list) — raw top-N hits with snippets
+
+**When to use:** First call on any code question. Collapses the typical "search → read → reason" loop into a single round-trip.
+
+**Example:**
+```
+get_answer(question="how does the request context get pushed and popped per request")
+
+→ answer: "Flask pushes a RequestContext onto _request_ctx_stack at the start
+  of every request via Flask.wsgi_app, and pops it in the corresponding
+  finally clause. The push happens in src/flask/app.py::Flask.wsgi_app."
+  citations: ["src/flask/app.py", "src/flask/ctx.py"]
+  confidence: "high"
+```
+
+---
+
+### `get_symbol(symbol_id)`
+
+Resolves a fully-qualified symbol identifier to its definition. Returns the source body, signature, file location, line range, and any associated docstring without the agent having to grep then read.
+
+**Parameters:**
+- `symbol_id` (string) — qualified id of the form `path/to/file.py::ClassName::method_name`. Both `::` and `.` are accepted as the symbol separator (`Class::method` and `Class.method` resolve identically).
+
+**Returns:**
+- `symbol_id`, `name`, `kind` (`class`, `function`, `method`, …)
+- `file_path`, `start_line`, `end_line`
+- `signature` (recovered from source so base classes, decorators, and full type annotations are preserved)
+- `body` (the symbol's source code)
+- `docstring`
+
+**When to use:** When the question names a specific class, function, or method and you want its source without a separate `Read` call.
+
+---
 
 ### `get_overview()`
 
@@ -159,13 +207,14 @@ Bus factor risk: git_indexer.py (1 author)
 
 ---
 
-### `get_context(targets, include?)`
+### `get_context(targets, include?, compact?)`
 
 Returns rich context for one or more files, modules, or symbols: documentation, ownership, last change, governing decisions, and freshness status.
 
 **Parameters:**
 - `targets` (list of strings) — file paths, module names, or symbol names
 - `include` (optional) — subset of `["docs", "ownership", "last_change", "decisions", "freshness"]`
+- `compact` (optional, default `True`) — when `True`, drops the `structure` block, the `imported_by` list, and per-symbol docstrings/end-line fields to keep the response under ~10K characters. Pass `compact=False` to receive the full payload, e.g. when you specifically need the import-graph dependents or every symbol docstring on a dense file.
 
 **When to use:** Before reading or editing any file. Faster and richer than reading the raw source.
 

From fdb6ecd60281d0b88f1893fc6c5564530543f4e2 Mon Sep 17 00:00:00 2001
From: Swati Ahuja <swatiahuja.ahuja@gmail.com>
Date: Thu, 9 Apr 2026 19:00:59 +0530
Subject: [PATCH 3/3] test fix

---
 tests/unit/persistence/test_models.py |  1 +
 tests/unit/server/test_mcp.py         | 18 +++++++++++++++---
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/tests/unit/persistence/test_models.py b/tests/unit/persistence/test_models.py
index d4f9f81..5855be4 100644
--- a/tests/unit/persistence/test_models.py
+++ b/tests/unit/persistence/test_models.py
@@ -306,5 +306,6 @@ def test_base_includes_all_models():
         "chat_messages",
         "llm_costs",
         "security_findings",
+        "answer_cache",
     }
     assert expected == table_names
diff --git a/tests/unit/server/test_mcp.py b/tests/unit/server/test_mcp.py
index 1799c0e..bd0cb75 100644
--- a/tests/unit/server/test_mcp.py
+++ b/tests/unit/server/test_mcp.py
@@ -611,7 +611,11 @@ async def test_get_overview_repo_not_found(setup_mcp):
 async def test_get_context_single_file(setup_mcp):
     from repowise.server.mcp_server import get_context
 
-    result = await get_context(["src/auth/service.py"])
+    result = await get_context(
+        ["src/auth/service.py"],
+        include=["docs", "full_doc", "ownership", "last_change", "decisions", "freshness"],
+        compact=False,
+    )
     targets = result["targets"]
     assert "src/auth/service.py" in targets
     t = targets["src/auth/service.py"]
@@ -642,7 +646,11 @@ async def test_get_context_single_file(setup_mcp):
 async def test_get_context_single_module(setup_mcp):
     from repowise.server.mcp_server import get_context
 
-    result = await get_context(["src/auth"])
+    result = await get_context(
+        ["src/auth"],
+        include=["docs", "full_doc", "ownership", "last_change", "decisions", "freshness"],
+        compact=False,
+    )
     targets = result["targets"]
     assert "src/auth" in targets
     t = targets["src/auth"]
@@ -658,7 +666,11 @@ async def test_get_context_single_module(setup_mcp):
 async def test_get_context_single_symbol(setup_mcp):
     from repowise.server.mcp_server import get_context
 
-    result = await get_context(["AuthService"])
+    result = await get_context(
+        ["AuthService"],
+        include=["docs", "full_doc"],
+        compact=False,
+    )
     targets = result["targets"]
     assert "AuthService" in targets
     t = targets["AuthService"]