From fe715fa5dce7f6b50a1832a48c75cc54d32083bd Mon Sep 17 00:00:00 2001
From: imer <s3574240@rz.uni-frankfurt.de>
Date: Fri, 15 May 2026 16:54:57 +0200
Subject: [PATCH] feat(code-audit): add PR auditor as second code-audit slice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`openworkers audit pr <github-url>` extracts atomic claims from a PR
description and verdicts each against the actual unified diff. Same
pipeline shape as the README auditor (planner → deterministic
researcher → checker + trust gate → critic), proving that the
`SourceAdapter` abstraction generalises to a second backend.

New modules:
- core/sources/github.py — GitHubAdapter over a unified diff via a
  PrSpec value object; live fetch (fetch_pr_from_github) is a sibling
  helper, decoupled from the adapter so tests stay network-free.
  parse_pr_url + load_pr_fixture as supporting helpers.
- core/orchestrator/pr_flow.py — PrAuditOrchestrator.
- core/orchestrator/audit_prompts.py — shared audit-prompt loader
  (extracted from readme_flow.py so each new auditor registers its
  templates in one place).
- prompts/code_audit/pr_planner.md + pr_checker.md — PR-specific claim
  types (add | remove | fix | refactor | test | behavior | doc | other)
  and diff-aware verdict rules.

Schema:
- core/schemas_audit.py exposes AuditClaim / AuditClaimList as aliases
  of ReadmeClaim / ReadmeClaimList so PR code reads cleanly without
  churning the README slice. AuditReport.target captures the audited
  artefact id (PR URL, etc.).

Agents:
- providers/code_audit_agents.py adds PrPlannerAgent + PrCheckerAgent
  alongside the README versions. The same _enforce_trust_gate runs
  after the LLM responds, so any claim with no diff evidence is
  forced to `unsupported` regardless of LLM output. AuditCriticAgent
  is reused as-is.

CLI:
- `openworkers audit pr <url>` (or `--fixture <dir>` for offline runs)
  with optional `--token` falling back to GITHUB_TOKEN / GH_TOKEN.

Tests:
- tests/fixtures/sample_pr/ — canned PR JSON + unified diff with one
  verified, one drifted, one contradicted, one fabricated (no-evidence)
  claim.
- tests/code_audit/test_pr_flow.py — URL parser, fixture loader,
  adapter grep, end-to-end audit with verdict distribution, and an
  explicit trust-gate-override assertion for the hallucinated
  WIDGETLIB_DEBUG claim.

Docs: README.md, ROADMAP.md, CHANGELOG.md, AGENTS.md updated with the
new slice, shared pipeline section, and PR-audit-flow walkthrough.

Verification: 159/159 tests pass (153 existing + 6 new PR tests),
mypy strict clean on new modules, ruff clean on new files, black
formatted. CLI smoke-runs in text and JSON modes against the fixture.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 AGENTS.md                           |  38 +++-
 CHANGELOG.md                        |   1 +
 README.md                           |   8 +-
 ROADMAP.md                          |   2 +-
 apps/cli/main.py                    |  49 +++++
 core/orchestrator/audit_prompts.py  |  45 ++++
 core/orchestrator/pr_flow.py        | 206 ++++++++++++++++++
 core/orchestrator/readme_flow.py    |  36 +---
 core/schemas_audit.py               |  13 ++
 core/sources/__init__.py            |  18 +-
 core/sources/github.py              | 318 ++++++++++++++++++++++++++++
 prompts/code_audit/pr_checker.md    |  53 +++++
 prompts/code_audit/pr_planner.md    |  52 +++++
 providers/code_audit_agents.py      | 119 +++++++++++
 tests/code_audit/test_pr_flow.py    | 279 ++++++++++++++++++++++++
 tests/fixtures/sample_pr/diff.patch |  43 ++++
 tests/fixtures/sample_pr/pr.json    |  16 ++
 17 files changed, 1249 insertions(+), 47 deletions(-)
 create mode 100644 core/orchestrator/audit_prompts.py
 create mode 100644 core/orchestrator/pr_flow.py
 create mode 100644 core/sources/github.py
 create mode 100644 prompts/code_audit/pr_checker.md
 create mode 100644 prompts/code_audit/pr_planner.md
 create mode 100644 tests/code_audit/test_pr_flow.py
 create mode 100644 tests/fixtures/sample_pr/diff.patch
 create mode 100644 tests/fixtures/sample_pr/pr.json
diff --git a/AGENTS.md b/AGENTS.md
index e7eb004..159836c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -18,24 +18,29 @@ core/
   blackboard/        # Redis-backed shared state (thesis-only for now)
   orchestrator/
     thesis_flow.py   # ThesisOrchestrator — legacy, do not break
-    readme_flow.py   # ReadmeAuditOrchestrator — new code-audit slice
+    readme_flow.py   # ReadmeAuditOrchestrator — README slice
+    pr_flow.py       # PrAuditOrchestrator — PR slice
+    audit_prompts.py # Shared template loader for all audit prompts
     compiler.py      # PromptCompiler for thesis (blackboard → prompt vars)
   router/            # Provider tier routing (quality/balanced/cheap)
   memory/episodic.py # Qdrant episodic memory (thesis)
   schemas.py         # Thesis Pydantic models
-  schemas_audit.py   # Code-audit Pydantic models (kept separate on purpose)
+  schemas_audit.py   # Code-audit Pydantic models (AuditClaim aliases ReadmeClaim)
   sources/
     base.py          # SourceAdapter ABC — the new evidence-backend contract
     local_repo.py    # LocalRepoAdapter — grep over a local repo
+    github.py        # GitHubAdapter (over PR diff) + PrSpec + live fetcher
 providers/
   unified.py         # UnifiedLLM: provider fallback, breakers, DRY_RUN path
   thesis_agents.py   # Thesis agent suite — untouched, keep passing
-  code_audit_agents.py # README planner, checker, critic + trust gate
+  code_audit_agents.py # README + PR planners/checkers + trust gate + critic
   budget.py          # BudgetGuard (contextvars-scoped session ceiling)
   resilience.py      # Tenacity + pybreaker glue
 prompts/
-  *.md               # Thesis templates (head_planner, specialist_*, ...)
-  code_audit/*.md    # Audit templates (readme_planner, readme_checker, ...)
+  *.md                       # Thesis templates (head_planner, specialist_*, ...)
+  code_audit/readme_*.md     # README auditor templates
+  code_audit/pr_*.md         # PR auditor templates
+  code_audit/audit_critic.md # Shared critic template (used by both)
 tools/mcp/           # Literature MCP tools; will migrate behind SourceAdapter
 apps/
   cli/main.py        # Single argparse CLI for both `thesis ...` and `audit ...`
@@ -65,7 +70,18 @@ This overwrites whatever the LLM said. A confidently hallucinating checker that
 
 Mirror this pattern when you add new auditors (PR auditor, compliance auditor, etc.): keep the LLM creative, but enforce the trust invariant in Python.
 
-## The README-audit flow (current slice)
+## Shared audit pipeline (README and PR slices)
+
+Both auditors follow the same four-stage shape, parameterised by adapter and prompts:
+
+1. **Planner (LLM)** — extracts atomic factual claims as `AuditClaim` (alias of `ReadmeClaim`). The README planner reads `claim_type ∈ {feature, install, usage, requirement, metric, api, other}`; the PR planner reads `claim_type ∈ {add, remove, fix, refactor, test, behavior, doc, other}`. Same schema, domain-specific enums in the prompt.
+2. **Researcher (deterministic Python)** — runs `adapter.search_any(claim.search_hints, limit=N)` over whichever `SourceAdapter` the orchestrator is bound to. README slice uses `LocalRepoAdapter`; PR slice uses `GitHubAdapter`. Adapters never call an LLM and never reach the network at audit time (the PR adapter does its fetch ahead, returning a `PrSpec`).
+3. **Checker (LLM + trust gate)** — judges each `(claim, evidence)` pair. Trust gate runs after the LLM responds.
+4. **Critic (LLM)** — adversarial pass. `AuditCriticAgent` is shared across slices.
+
+`core/orchestrator/audit_prompts.py` holds the template registry — adding a new auditor just means registering its prompts there. `core/schemas_audit.py` exposes `AuditClaim` / `AuditClaimList` as aliases of the (legacy-named) `ReadmeClaim` / `ReadmeClaimList`. Rename can happen when the third auditor lands; until then the alias keeps PR code readable.
+
+## The README-audit flow (slice 1)
 
 1. **Planner (LLM)** — reads the README, extracts atomic factual claims with verbatim quotes + grep-friendly search hints. Schema: `ReadmeClaimList`.
 2. **Researcher (deterministic Python)** — uses `LocalRepoAdapter.search_any(hints)` to retrieve evidence snippets from the repo. **No LLM call here** — it's just a filesystem grep with safety rails (path traversal guard, file-size cap, dir excludes).
@@ -74,6 +90,13 @@ Mirror this pattern when you add new auditors (PR auditor, compliance auditor, e
 
 The audited README is **excluded** from its own evidence pool — otherwise every fabricated claim could "verify itself" against the README quote. See `ReadmeAuditOrchestrator.audit` for the exclusion logic.
 
+## The PR-audit flow (slice 2)
+
+1. **Fetcher** — `fetch_pr_from_github(url, token)` hits the GitHub REST API for PR metadata + unified diff + changed files, returning a `PrSpec`. Tests skip this and use `load_pr_fixture(directory)` to build `PrSpec` from `pr.json` + `diff.patch`. Anonymous requests work for public repos but hit the 60/hour rate limit fast; pass a token explicitly or via `GITHUB_TOKEN` / `GH_TOKEN`.
+2. **Adapter** — `GitHubAdapter(pr_spec)` parses the diff once into `DiffHunk` objects (path + new-file line offset + hunk text). `search_any(terms)` greps hunk bodies, stripping the leading `+`/`-`/space marker before matching so a hint of `+` doesn't spuriously match every added line. One hit per hunk to avoid duplicate snippets.
+3. **Planner / Checker** — `PrPlannerAgent` + `PrCheckerAgent` mirror their README cousins but use PR-specific prompts. The checker's prompt instructs it to interpret `+` lines as additions, `-` as removals — semantic decisions live in the prompt; mechanical retrieval stays in the adapter.
+4. **No artefact-exclusion needed** — the PR description is never part of the diff, so the self-evidence problem doesn't arise here. Different shape from README, same trust invariant.
+
 ## Coexistence rules
 
 - **Do not break the thesis path.** The full thesis test suite (`tests/test_*.py` minus `tests/code_audit/`) must stay green. Thesis is being deprecated *gradually*, not yanked.
@@ -100,7 +123,8 @@ The audited README is **excluded** from its own evidence pool — otherwise ever
 See `ROADMAP.md` for the full picture. Short version:
 
 - ✅ Slice 1 (shipped): README auditor.
-- 🚧 Next slices: PR auditor (`audit pr <url>`), compliance auditor, architecture auditor. All slot in behind the same `SourceAdapter` + agent-suite + trust-gate pattern.
+- ✅ Slice 2 (shipped): PR auditor (`audit pr <url>`).
+- 🚧 Next slices: compliance auditor, architecture auditor. All slot in behind the same `SourceAdapter` + agent-suite + trust-gate pattern.
 - 🚧 Layered source adapters: repo (highest trust) → language specs / RFCs → dependency source. The literature MCP tools will migrate behind the same contract.
 - 🚧 Cherry-picked from the v1.0 plan: tool/source registry, light provider-registry abstraction (Ollama later for local inference on private repos), structlog audit trail.
 - ⏸️ Deferred: PyPI packaging, Typer CLI rewrite, OTel, Smart truncation, Ollama. Not blocking the audit-track expansion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e708e4..ec6d024 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ All notable changes to OpenWorkers are documented here. The format is loosely ba
 ## [Unreleased]
 
 ### Added
+- **Code-audit track — PR auditor (second slice).** New `openworkers audit pr <github-url>` CLI subcommand verifies a pull request description against its actual diff. Same pipeline shape as the README auditor (planner → deterministic researcher → checker + trust gate → critic), with two new layers: `core/sources/github.py` (`GitHubAdapter` implementing `SourceAdapter` over a unified diff, plus `PrSpec` value object, `parse_pr_url`, `fetch_pr_from_github` for live GitHub fetch with optional `GITHUB_TOKEN`/`GH_TOKEN`, and `load_pr_fixture` for offline tests) and `core/orchestrator/pr_flow.py` (`PrAuditOrchestrator`). New agents `PrPlannerAgent` and `PrCheckerAgent` in `providers/code_audit_agents.py`; `AuditCriticAgent` reused as-is. New prompts `prompts/code_audit/pr_planner.md` + `pr_checker.md` with PR-specific claim types (`add | remove | fix | refactor | test | behavior | doc | other`) and diff-aware verdict rules. Audit-prompt rendering extracted from `readme_flow.py` into `core/orchestrator/audit_prompts.py` so each new auditor can register templates without touching unrelated modules. `tests/fixtures/sample_pr/` contains a canned PR (json + unified diff) with verified / drifted / contradicted / fabricated claims; `tests/code_audit/test_pr_flow.py` asserts verdict distribution and an explicit trust-gate override.
 - **Code-audit track — README auditor (first slice).** New `openworkers audit readme <repo>` CLI subcommand verifies every factual claim in a README against the actual repository, emitting `verified | drifted | unsupported | contradicted` verdicts with cited file paths. Pipeline: planner (LLM) → researcher (deterministic grep via new `LocalRepoAdapter`) → checker (LLM + post-LLM trust gate) → critic (LLM adversarial pass). New modules: `core/sources/` (`SourceAdapter` ABC + `LocalRepoAdapter`), `core/schemas_audit.py` (Pydantic audit models), `core/orchestrator/readme_flow.py` (`ReadmeAuditOrchestrator`), `providers/code_audit_agents.py` (planner / checker / critic + `_enforce_trust_gate` invariant), `prompts/code_audit/*.md` (audit templates). The trust gate is enforced in code, not delegated to prompts: any claim with no retrieved evidence is forced to `unsupported` regardless of LLM output. The audited README is excluded from its own evidence pool so fabricated claims cannot self-verify. `tests/code_audit/test_readme_flow.py` exercises the full flow with a stubbed `UnifiedLLM.generate_fn` and an `tests/fixtures/sample_repo/` containing a deliberate mix of verified / drifted / contradicted / fabricated claims. Thesis pipeline untouched.
 - **Contributor onboarding doc** `AGENTS.md` capturing project DNA, code-audit slice design, trust-gate invariant, conventions, and the recipe for adding new auditors.
 - **RAG over user PDFs** (first incremental v1.0 slice). New `tools/mcp/rag.py` with sentence-aware chunker, `RAGIndexer` (PDF/text → Qdrant via PyMuPDF + FastEmbed `BAAI/bge-small-en-v1.5`), and `RAGSearchTool` (registered as `rag_search` in `ToolRegistry`). Collections namespaced under `rag_*` so they cannot collide with `thesis_corpus` or `episodes`. New CLI: `thesis ingest add|list|delete`. New flag: `thesis research ... --rag-collection <name>` makes the researcher pull from the user collection alongside arXiv/SS. New field: `ResearchContext.rag_collection`. `tests/test_rag.py` covers chunking edge cases, BOM/text extraction, collection naming, indexer round-trip, privacy gating, and idempotent re-ingest.
diff --git a/README.md b/README.md
index 1864570..de4ac04 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Both domains share the same DNA: a hierarchical pipeline (planner → researcher
 
 ## Code audit *(new track)*
 
-`openworkers audit readme <repo>` extracts every factual claim from a README and verdicts each one against the actual repository:
+`openworkers audit readme <repo>` and `openworkers audit pr <github-pr-url>` extract every factual claim from a README or PR description and verdict each one against the actual repository / diff:
 
 | Verdict | Meaning |
 |---|---|
@@ -26,9 +26,11 @@ Both domains share the same DNA: a hierarchical pipeline (planner → researcher
 | `contradicted` | Code directly disproves the claim |
 | `unsupported` | No evidence in the repo — enforced in code, not delegated to the LLM |
 
-The pipeline is planner (LLM extracts claims) → researcher (deterministic grep via `LocalRepoAdapter`) → checker (LLM judges + trust gate forces `unsupported` when evidence is empty) → critic (adversarial pass). The audited README is excluded from its own evidence pool, so fabricated claims cannot verify themselves.
+Both auditors use the same pipeline: planner (LLM extracts claims) → researcher (deterministic grep via a `SourceAdapter` — `LocalRepoAdapter` for repos, `GitHubAdapter` for PR diffs) → checker (LLM judges + trust gate forces `unsupported` when evidence is empty) → critic (adversarial pass). The audited artefact is excluded from its own evidence pool, so fabricated claims cannot verify themselves.
 
-Roadmap for this track: PR auditor (PR description vs. diff), compliance auditor (security/policy claims vs. code), architecture auditor (design doc vs. implementation). See [AGENTS.md](AGENTS.md) for the contributor recipe.
+`audit pr` reads `GITHUB_TOKEN` or `GH_TOKEN` for higher rate limits; offline testing uses canned fixtures via `--fixture <dir>` (see `tests/fixtures/sample_pr/`).
+
+Roadmap for this track: compliance auditor (security/policy claims vs. code), architecture auditor (design doc vs. implementation), layered source adapters (specs / RFCs / dependency source), tool/source registry, Ollama for local inference on private repos. See [AGENTS.md](AGENTS.md) for the contributor recipe.
 
 ## What it does
 
diff --git a/ROADMAP.md b/ROADMAP.md
index 1a6ef3d..6ba201a 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -24,7 +24,7 @@
 A second domain alongside the thesis assistant: audit factual claims in technical artefacts against the codebase. Same DNA — multi-agent, structured JSON, never fabricates, trust gate refuses verdicts without evidence — applied to a domain where trustworthy automated review matters to OSS maintainers and contributors. See [AGENTS.md](AGENTS.md) for the contributor recipe and trust-gate invariant.
 
 - ✅ **README auditor** *(first slice, shipped)*. `openworkers audit readme <repo>` extracts atomic claims from a README and verdicts each one against the actual codebase as `verified | drifted | unsupported | contradicted`. Trust gate is enforced in `providers/code_audit_agents.py::_enforce_trust_gate`, not in prompts. The audited README is excluded from its own evidence pool. New `SourceAdapter` abstraction (`core/sources/`) with `LocalRepoAdapter`.
-- 🚧 **PR auditor** — `openworkers audit pr <url>`. Verify the PR description against the actual diff; flag scope creep, missing tests, undocumented changes. Needs a `GitHubAdapter` implementing `SourceAdapter`.
+- ✅ **PR auditor** *(second slice, shipped)*. `openworkers audit pr <github-url>` extracts atomic claims from a PR description and verdicts each against the actual unified diff. New `GitHubAdapter` (in `core/sources/github.py`) implements `SourceAdapter` over a unified diff via a `PrSpec` value object; live fetch lives in `fetch_pr_from_github` and is decoupled from the adapter for offline testing. PR-specific prompts under `prompts/code_audit/pr_*.md` use diff-aware verdict rules (e.g., `add` claims must match `+`-prefixed hunks).
 - 🚧 **Compliance auditor** — `openworkers audit compliance <repo>`. Verify security/policy claims ("inputs sanitized", "no secrets", "auth required on X") against the code.
 - 🚧 **Architecture auditor** — verify RFC / design-doc claims against implementation, language specs, and dependency source.
 - 🚧 **Layered source adapters** — repo (highest trust) → language specs / RFCs (`SpecAdapter`) → dependency source (`DependencyAdapter`). The existing literature MCP tools (arXiv / Semantic Scholar / CrossRef) will migrate behind the same `SourceAdapter` contract.
diff --git a/apps/cli/main.py b/apps/cli/main.py
index 5998922..7f23796 100644
--- a/apps/cli/main.py
+++ b/apps/cli/main.py
@@ -9,6 +9,7 @@
     format_session_text,
 )
 from core.memory.episodic import EpisodicMemory
+from core.orchestrator.pr_flow import PrAuditOrchestrator, format_pr_report_text
 from core.orchestrator.readme_flow import ReadmeAuditOrchestrator, format_report_text
 from core.orchestrator.thesis_flow import ThesisOrchestrator
 from core.router.engine import Router
@@ -235,9 +236,32 @@ async def cmd_audit_dispatch(args):
     """Route `audit <subcommand>` to its handler."""
     if args.audit_action == "readme":
         return await cmd_audit_readme(args)
+    if args.audit_action == "pr":
+        return await cmd_audit_pr(args)
     raise SystemExit(f"Unknown audit action: {args.audit_action}")
 
 
+async def cmd_audit_pr(args):
+    """Run the PR auditor against a GitHub PR URL (or a local fixture dir)."""
+    from core.sources.github import fetch_pr_from_github, load_pr_fixture
+
+    unified = create_unified_llm()
+    orch = PrAuditOrchestrator(unified=unified)
+
+    if args.fixture:
+        pr_spec = load_pr_fixture(args.fixture)
+    else:
+        pr_spec = await asyncio.to_thread(fetch_pr_from_github, args.url, args.token)
+
+    report, critique = await orch.audit(pr_spec)
+    if args.format == "json":
+        payload = {"report": report.model_dump(), "critique": critique.model_dump()}
+        _output(payload, "json", args.output)
+    else:
+        _output(format_pr_report_text(report, critique), "text", args.output)
+    return report
+
+
 async def cmd_audit_readme(args):
     """Run the README auditor on a local repo."""
     unified = create_unified_llm()
@@ -399,6 +423,31 @@ def build_parser() -> argparse.ArgumentParser:
     )
     add_output_args(p_audit_readme)
 
+    p_audit_pr = audit_sub.add_parser(
+        "pr",
+        help="Verify a PR description against the actual diff",
+    )
+    p_audit_pr.add_argument(
+        "url",
+        type=str,
+        nargs="?",
+        default="",
+        help="GitHub PR URL (https://github.com/owner/repo/pull/N)",
+    )
+    p_audit_pr.add_argument(
+        "--fixture",
+        type=str,
+        default=None,
+        help="Audit a fixture directory containing pr.json + diff.patch instead of hitting GitHub",
+    )
+    p_audit_pr.add_argument(
+        "--token",
+        type=str,
+        default=None,
+        help="GitHub token (default: $GITHUB_TOKEN or $GH_TOKEN)",
+    )
+    add_output_args(p_audit_pr)
+
     p_ingest = sub.add_parser("ingest", help="Manage user RAG collections (PDF/text -> Qdrant)")
     ingest_sub = p_ingest.add_subparsers(dest="ingest_action", required=True)
 
diff --git a/core/orchestrator/audit_prompts.py b/core/orchestrator/audit_prompts.py
new file mode 100644
index 0000000..edc6f44
--- /dev/null
+++ b/core/orchestrator/audit_prompts.py
@@ -0,0 +1,45 @@
+"""Shared prompt loader for the code-audit family of orchestrators.
+
+Lives outside any single flow module so each new auditor can register
+its templates without circular imports. Deliberately not reusing
+``PromptCompiler``: that compiler is wired to extract thesis blackboard
+state, which audit flows don't use. Audit templates only need
+``{{ var }}`` substitution.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+_AUDIT_PROMPT_DIR = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
+    "prompts",
+    "code_audit",
+)
+
+# Registry of template name → filename under ``prompts/code_audit/``.
+# Each new auditor appends its entries here; the renderer accepts any
+# registered name without further changes elsewhere.
+TEMPLATE_FILES: dict[str, str] = {
+    "readme_planner": "readme_planner.md",
+    "readme_checker": "readme_checker.md",
+    "pr_planner": "pr_planner.md",
+    "pr_checker": "pr_checker.md",
+    "audit_critic": "audit_critic.md",
+}
+
+
+def render_audit_prompt(name: str, variables: dict[str, Any]) -> str:
+    filename = TEMPLATE_FILES.get(name)
+    if not filename:
+        raise ValueError(f"Unknown audit template: {name}")
+    path = os.path.join(_AUDIT_PROMPT_DIR, filename)
+    try:
+        with open(path, encoding="utf-8") as f:
+            template = f.read()
+    except OSError:
+        return f"[Template {name} not found at {path}]"
+    for key, value in variables.items():
+        template = template.replace("{{ " + key + " }}", str(value))
+    return template
diff --git a/core/orchestrator/pr_flow.py b/core/orchestrator/pr_flow.py
new file mode 100644
index 0000000..852bb4c
--- /dev/null
+++ b/core/orchestrator/pr_flow.py
@@ -0,0 +1,206 @@
+"""PR audit orchestrator.
+
+Verifies a pull request's description against its actual diff. Same
+pipeline shape as ``ReadmeAuditOrchestrator``:
+
+    planner (LLM)  →  researcher (deterministic grep over diff)
+                   →  checker (LLM + trust gate)
+                   →  critic (LLM adversarial pass)
+
+The "researcher" is a content-aware grep on the unified diff via
+``GitHubAdapter`` — no LLM call there, no network call once we have the
+``PrSpec``. Token-free for tests.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from collections import Counter
+from typing import Any, Callable
+
+from core.orchestrator.audit_prompts import render_audit_prompt as _render_audit_prompt
+from core.schemas_audit import (
+    ALL_VERDICTS,
+    VERDICT_UNSUPPORTED,
+    AuditClaim,
+    AuditCritique,
+    AuditReport,
+    ClaimEvidence,
+    ClaimVerdict,
+    EvidenceRef,
+)
+from core.sources.github import GitHubAdapter, PrSpec
+from providers.code_audit_agents import (
+    AuditCriticAgent,
+    PrCheckerAgent,
+    PrPlannerAgent,
+)
+from providers.unified import UnifiedLLM
+
+logger = logging.getLogger(__name__)
+
+
+class PrAuditOrchestrator:
+    """Audit a PR description against the diff it carries."""
+
+    def __init__(
+        self,
+        unified: UnifiedLLM,
+        prompt_renderer: Callable[[str, dict[str, Any]], str] | None = None,
+        max_evidence_per_claim: int = 5,
+    ) -> None:
+        self.unified = unified
+        self.render = prompt_renderer or _render_audit_prompt
+        self.max_evidence_per_claim = max_evidence_per_claim
+        self.planner = PrPlannerAgent(unified=unified, prompt_renderer=self.render)
+        self.checker = PrCheckerAgent(unified=unified, prompt_renderer=self.render)
+        self.critic = AuditCriticAgent(unified=unified, prompt_renderer=self.render)
+
+    async def audit(self, pr: PrSpec) -> tuple[AuditReport, AuditCritique]:
+        start = time.time()
+        adapter = GitHubAdapter(pr)
+        errors: list[str] = []
+
+        description = pr.description
+        target = pr.url or f"{pr.owner}/{pr.repo}#{pr.number}"
+
+        if not description.strip():
+            empty = AuditReport(
+                repo_path=f"{pr.owner}/{pr.repo}",
+                target=target,
+                verdicts=[],
+                summary=dict.fromkeys(ALL_VERDICTS, 0),
+                errors=["PR description is empty — nothing to audit."],
+            )
+            return empty, AuditCritique()
+
+        # ── Stage 1: Planner extracts claims from PR title + body ──
+        try:
+            planner_result = await self.planner.execute(
+                pr_description=description,
+                pr_url=pr.url,
+                changed_files=pr.changed_files,
+            )
+            claim_list = planner_result["output"]
+        except Exception as e:
+            errors.append(f"planner: {e}")
+            from core.schemas_audit import AuditClaimList
+
+            claim_list = AuditClaimList(claims=[], readme_path=target)
+
+        # ── Stage 2: Researcher retrieves diff evidence (deterministic) ──
+        evidence = await asyncio.to_thread(self._retrieve_all_evidence, claim_list.claims, adapter)
+
+        # ── Stage 3: Checker renders verdicts (with trust gate) ──
+        if claim_list.claims:
+            try:
+                checker_result = await self.checker.execute(claim_list.claims, evidence)
+                verdicts: list[ClaimVerdict] = list(checker_result["output"].verdicts)
+            except Exception as e:
+                errors.append(f"checker: {e}")
+                verdicts = [
+                    ClaimVerdict(
+                        claim_id=c.claim_id,
+                        claim_text=c.claim_text,
+                        verdict=VERDICT_UNSUPPORTED,
+                        confidence=0.0,
+                        evidence_paths=[],
+                        notes=f"Checker failed: {e}",
+                    )
+                    for c in claim_list.claims
+                ]
+        else:
+            verdicts = []
+
+        # ── Stage 4: Critic adversarial pass ──
+        try:
+            critic_result = await self.critic.execute(verdicts, description)
+            critique: AuditCritique = critic_result["output"]
+        except Exception as e:
+            errors.append(f"critic: {e}")
+            critique = AuditCritique()
+
+        summary = Counter(v.verdict for v in verdicts)
+        report = AuditReport(
+            repo_path=f"{pr.owner}/{pr.repo}",
+            target=target,
+            verdicts=verdicts,
+            summary={v: int(summary.get(v, 0)) for v in ALL_VERDICTS},
+            errors=errors,
+        )
+
+        elapsed_ms = int((time.time() - start) * 1000)
+        logger.info(
+            "pr_audit completed pr=%s claims=%d elapsed_ms=%d errors=%d",
+            target,
+            len(verdicts),
+            elapsed_ms,
+            len(errors),
+        )
+        return report, critique
+
+    def _retrieve_all_evidence(
+        self,
+        claims: list[AuditClaim],
+        adapter: GitHubAdapter,
+    ) -> list[ClaimEvidence]:
+        out: list[ClaimEvidence] = []
+        for claim in claims:
+            snippets = adapter.search_any(claim.search_hints, limit=self.max_evidence_per_claim)
+            refs = [
+                EvidenceRef(
+                    path=s.path,
+                    line_start=s.line_start,
+                    line_end=s.line_end,
+                    text=s.text,
+                    source=s.source,
+                )
+                for s in snippets
+            ]
+            out.append(ClaimEvidence(claim_id=claim.claim_id, snippets=refs))
+        return out
+
+
+def format_pr_report_text(
+    report: AuditReport,
+    critique: AuditCritique | None = None,
+) -> str:
+    """Pretty-print a PR audit report for terminal output."""
+    lines: list[str] = []
+    lines.append(f"PR audit — {report.target or report.repo_path}")
+    lines.append("")
+    if report.summary:
+        summary_line = "  ".join(f"{k}={v}" for k, v in report.summary.items())
+        lines.append(f"Summary: {summary_line}")
+        lines.append("")
+    for v in report.verdicts:
+        marker = {
+            "verified": "✓",
+            "drifted": "≠",
+            "contradicted": "✗",
+            "unsupported": "?",
+        }.get(v.verdict, "·")
+        lines.append(f"{marker} [{v.verdict.upper():12s}] {v.claim_id}: {v.claim_text}")
+        for p in v.evidence_paths:
+            lines.append(f"      ↳ {p}")
+        if v.notes:
+            lines.append(f"      note: {v.notes}")
+    if report.errors:
+        lines.append("")
+        lines.append("Errors:")
+        for e in report.errors:
+            lines.append(f"  - {e}")
+    if critique is not None:
+        lines.append("")
+        lines.append("Critic pass:")
+        for wv in critique.weak_verdicts:
+            lines.append(f"  weak:    {wv}")
+        for mc in critique.missed_claims:
+            lines.append(f"  missed:  {mc}")
+        for sg in critique.suggestions:
+            lines.append(f"  suggest: {sg}")
+        if critique.overall_assessment:
+            lines.append(f"  → {critique.overall_assessment}")
+    return "\n".join(lines)
diff --git a/core/orchestrator/readme_flow.py b/core/orchestrator/readme_flow.py
index 503419c..d446de9 100644
--- a/core/orchestrator/readme_flow.py
+++ b/core/orchestrator/readme_flow.py
@@ -20,12 +20,12 @@
 
 import asyncio
 import logging
-import os
 import time
 from collections import Counter
 from pathlib import Path
 from typing import Any, Callable
 
+from core.orchestrator.audit_prompts import render_audit_prompt as _render_audit_prompt
 from core.schemas_audit import (
     ALL_VERDICTS,
     VERDICT_UNSUPPORTED,
@@ -48,40 +48,6 @@
 logger = logging.getLogger(__name__)
 
 
-_AUDIT_PROMPT_DIR = os.path.join(
-    os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
-    "prompts",
-    "code_audit",
-)
-
-_TEMPLATE_FILES = {
-    "readme_planner": "readme_planner.md",
-    "readme_checker": "readme_checker.md",
-    "audit_critic": "audit_critic.md",
-}
-
-
-def _render_audit_prompt(name: str, variables: dict[str, Any]) -> str:
-    """Tiny placeholder-substitution renderer for audit prompts.
-
-    Deliberately not reusing PromptCompiler: that compiler is wired to
-    extract blackboard state, which this slice doesn't use. Audit
-    templates only need ``{{ var }}`` substitution.
-    """
-    filename = _TEMPLATE_FILES.get(name)
-    if not filename:
-        raise ValueError(f"Unknown audit template: {name}")
-    path = os.path.join(_AUDIT_PROMPT_DIR, filename)
-    try:
-        with open(path, encoding="utf-8") as f:
-            template = f.read()
-    except OSError:
-        return f"[Template {name} not found at {path}]"
-    for key, value in variables.items():
-        template = template.replace("{{ " + key + " }}", str(value))
-    return template
-
-
 class ReadmeAuditOrchestrator:
     """Run a README audit end-to-end against a local repo."""
 
diff --git a/core/schemas_audit.py b/core/schemas_audit.py
index 577b195..aadd022 100644
--- a/core/schemas_audit.py
+++ b/core/schemas_audit.py
@@ -41,6 +41,15 @@ class ReadmeClaimList(BaseModel):
     readme_path: str = ""
 
 
+# Cross-domain aliases. README and PR claims share the same shape; the
+# differing-in-name dance is purely for call-site readability. When a
+# third auditor lands we'll rename ReadmeClaim → AuditClaim cleanly;
+# until then the alias keeps PR code self-explanatory without churning
+# the README slice's imports.
+AuditClaim = ReadmeClaim
+AuditClaimList = ReadmeClaimList
+
+
 class EvidenceRef(BaseModel):
     """Adapter-agnostic citation handle. Mirrors ``EvidenceSnippet``
     in shape but is the JSON-serialisable form passed across the
@@ -77,6 +86,10 @@ class ClaimVerdictList(BaseModel):
 class AuditReport(BaseModel):
     repo_path: str
     readme_path: str = ""
+    # Free-form identifier of the audited artefact (PR URL, doc path, …).
+    # Kept optional so the README slice can leave it blank and stay
+    # backwards-compatible with the prior shape.
+    target: str = ""
     verdicts: list[ClaimVerdict] = Field(default_factory=list)
     summary: dict[str, int] = Field(default_factory=dict)
     errors: list[str] = Field(default_factory=list)
diff --git a/core/sources/__init__.py b/core/sources/__init__.py
index c384f96..fbb0660 100644
--- a/core/sources/__init__.py
+++ b/core/sources/__init__.py
@@ -8,6 +8,22 @@
 """
 
 from core.sources.base import EvidenceSnippet, SourceAdapter
+from core.sources.github import (
+    GitHubAdapter,
+    PrSpec,
+    fetch_pr_from_github,
+    load_pr_fixture,
+    parse_pr_url,
+)
 from core.sources.local_repo import LocalRepoAdapter
 
-__all__ = ["EvidenceSnippet", "LocalRepoAdapter", "SourceAdapter"]
+__all__ = [
+    "EvidenceSnippet",
+    "GitHubAdapter",
+    "LocalRepoAdapter",
+    "PrSpec",
+    "SourceAdapter",
+    "fetch_pr_from_github",
+    "load_pr_fixture",
+    "parse_pr_url",
+]
diff --git a/core/sources/github.py b/core/sources/github.py
new file mode 100644
index 0000000..5cf41e8
--- /dev/null
+++ b/core/sources/github.py
@@ -0,0 +1,318 @@
+"""GitHub PR adapter.
+
+Implements ``SourceAdapter`` over the **unified diff** of a pull
+request. The adapter is intentionally constructed from a ``PrSpec``
+value object rather than reaching out to the GitHub API in its
+constructor:
+
+- Tests stay pure: build a ``PrSpec`` from a canned fixture and audit
+  it without touching the network.
+- Future-proof: a GitLab / Gerrit adapter can share the same diff
+  grepping logic by handing us a ``PrSpec``.
+
+The network fetch (``fetch_pr_from_github``) lives in this module as a
+sibling helper, separate from the adapter, so the adapter has no
+dependency on httpx and remains trivially testable.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+from collections.abc import Iterable
+from dataclasses import dataclass, field
+
+import httpx
+
+from core.sources.base import EvidenceSnippet, SourceAdapter
+
+_PR_URL_RE = re.compile(
+    r"^https?://github\.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)/pull/(?P<number>\d+)(?:[/?#].*)?$",
+    re.IGNORECASE,
+)
+
+_HUNK_HEADER_RE = re.compile(r"^@@ -\d+(?:,\d+)? \+(?P<new_start>\d+)(?:,\d+)? @@")
+_DIFF_FILE_HEADER_RE = re.compile(r"^diff --git a/(?P<old>.+) b/(?P<new>.+)$")
+
+_MAX_DIFF_BYTES = 1_000_000  # GitHub already caps; this is a defence-in-depth bound.
+
+
+@dataclass
+class DiffHunk:
+    """A single hunk lifted from a unified diff.
+
+    ``new_start`` is the 1-based line number the hunk applies to in the
+    new file — useful for citation handles like ``foo.py:42``.
+    """
+
+    path: str
+    new_start: int
+    text: str
+
+
+@dataclass
+class PrSpec:
+    """The minimal projection of a PR that the auditor needs.
+
+    Network-free: construct directly in tests; ``fetch_pr_from_github``
+    populates it in production.
+    """
+
+    owner: str
+    repo: str
+    number: int
+    title: str
+    body: str
+    base_sha: str = ""
+    head_sha: str = ""
+    diff: str = ""
+    changed_files: list[str] = field(default_factory=list)
+    url: str = ""
+
+    @property
+    def description(self) -> str:
+        """Title + body joined for claim extraction."""
+        if not self.body:
+            return self.title
+        return f"{self.title}\n\n{self.body}"
+
+
+def parse_pr_url(url: str) -> tuple[str, str, int]:
+    """Parse a GitHub PR URL into (owner, repo, number).
+
+    Raises ``ValueError`` on anything that isn't a PR URL. We keep this
+    strict so a typo (e.g., ``/issues/`` instead of ``/pull/``) fails
+    loud rather than hitting a 404 mid-pipeline.
+    """
+    match = _PR_URL_RE.match(url.strip())
+    if not match:
+        raise ValueError(f"Not a GitHub PR URL: {url!r}")
+    return match["owner"], match["repo"], int(match["number"])
+
+
+class GitHubAdapter(SourceAdapter):
+    """Searches the unified diff of a PR for evidence of claims.
+
+    The "repo" the adapter sees is the *diff*, not the working tree.
+    Claims phrased as "adds X" need to match additions; "removes Y"
+    needs to match deletions. The checker prompt is responsible for
+    that semantic interpretation — the adapter's job is just to
+    surface relevant hunk excerpts.
+    """
+
+    name = "github_pr"
+
+    def __init__(self, pr: PrSpec) -> None:
+        self.pr = pr
+        self._hunks: list[DiffHunk] = list(_iter_hunks(pr.diff))
+
+    def search(self, query: str, limit: int = 5) -> list[EvidenceSnippet]:
+        query = (query or "").strip()
+        if not query:
+            return []
+        pattern = re.compile(re.escape(query), re.IGNORECASE)
+        return self._search_pattern(pattern, limit)
+
+    def search_any(self, terms: Iterable[str], limit: int = 5) -> list[EvidenceSnippet]:
+        cleaned = [re.escape(t.strip()) for t in terms if t and t.strip()]
+        if not cleaned:
+            return []
+        pattern = re.compile("|".join(cleaned), re.IGNORECASE)
+        return self._search_pattern(pattern, limit)
+
+    def _search_pattern(self, pattern: re.Pattern[str], limit: int) -> list[EvidenceSnippet]:
+        hits: list[EvidenceSnippet] = []
+        for hunk in self._hunks:
+            if len(hits) >= limit:
+                break
+            # Drop the leading +/-/space marker before matching so a
+            # hint that happens to be ``+`` (rare) does not spuriously
+            # match every added line.
+            for offset, line in enumerate(hunk.text.splitlines()):
+                payload = line[1:] if line[:1] in "+- " else line
+                if pattern.search(payload):
+                    hits.append(
+                        EvidenceSnippet(
+                            path=hunk.path,
+                            line_start=hunk.new_start + offset,
+                            line_end=hunk.new_start + offset,
+                            text=hunk.text,
+                            source=self.name,
+                        )
+                    )
+                    # One hit per hunk: the whole hunk is already in
+                    # ``text``, so multiple line matches in the same
+                    # hunk would just duplicate the snippet.
+                    break
+        return hits
+
+    def fetch(self, path: str, line_start: int = 0, line_end: int = 0) -> EvidenceSnippet:
+        for hunk in self._hunks:
+            if hunk.path != path:
+                continue
+            if line_start and not (
+                hunk.new_start <= line_start <= hunk.new_start + len(hunk.text.splitlines())
+            ):
+                continue
+            return EvidenceSnippet(
+                path=hunk.path,
+                line_start=hunk.new_start,
+                line_end=hunk.new_start + len(hunk.text.splitlines()) - 1,
+                text=hunk.text,
+                source=self.name,
+            )
+        return EvidenceSnippet(path=path, line_start=0, line_end=0, text="", source=self.name)
+
+
+def _iter_hunks(diff_text: str) -> Iterable[DiffHunk]:
+    """Yield ``DiffHunk`` from a unified diff.
+
+    The parser is deliberately small and accepts the GitHub-flavoured
+    output (``diff --git`` headers, ``@@`` hunk headers, ``+``/``-``
+    prefixed lines). It tolerates unfamiliar lines by skipping them —
+    binary-file markers, mode change lines, etc.
+    """
+    current_path = ""
+    current_hunk: list[str] | None = None
+    current_new_start = 0
+    for raw_line in diff_text.splitlines():
+        file_match = _DIFF_FILE_HEADER_RE.match(raw_line)
+        if file_match:
+            if current_hunk is not None and current_path:
+                yield DiffHunk(
+                    path=current_path,
+                    new_start=current_new_start,
+                    text="\n".join(current_hunk),
+                )
+            current_path = file_match["new"]
+            current_hunk = None
+            continue
+        hunk_match = _HUNK_HEADER_RE.match(raw_line)
+        if hunk_match:
+            if current_hunk is not None and current_path:
+                yield DiffHunk(
+                    path=current_path,
+                    new_start=current_new_start,
+                    text="\n".join(current_hunk),
+                )
+            current_new_start = int(hunk_match["new_start"])
+            current_hunk = []
+            continue
+        if current_hunk is not None and raw_line[:1] in {"+", "-", " "}:
+            current_hunk.append(raw_line)
+    if current_hunk is not None and current_path:
+        yield DiffHunk(
+            path=current_path,
+            new_start=current_new_start,
+            text="\n".join(current_hunk),
+        )
+
+
+def fetch_pr_from_github(
+    url: str,
+    token: str | None = None,
+    client: httpx.Client | None = None,
+    timeout: float = 15.0,
+) -> PrSpec:
+    """Pull PR metadata + unified diff from the GitHub REST API.
+
+    Token resolution order: explicit ``token`` arg → ``GITHUB_TOKEN``
+    env → ``GH_TOKEN`` env. Anonymous requests work for public repos
+    but bump into the 60/hour rate limit fast, so we always surface
+    the token miss in the logs rather than hiding it.
+    """
+    owner, repo, number = parse_pr_url(url)
+    token = token or os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") or ""
+    headers = {
+        "Accept": "application/vnd.github+json",
+        "X-GitHub-Api-Version": "2022-11-28",
+        "User-Agent": "openworkers-pr-auditor",
+    }
+    if token:
+        headers["Authorization"] = f"Bearer {token}"
+
+    owns_client = client is None
+    http = client or httpx.Client(timeout=timeout, headers=headers)
+    try:
+        api_root = f"https://api.github.com/repos/{owner}/{repo}/pulls/{number}"
+        pr_resp = http.get(api_root, headers=headers if owns_client else None)
+        pr_resp.raise_for_status()
+        pr_data = pr_resp.json()
+
+        diff_resp = http.get(
+            api_root,
+            headers={
+                **(headers if owns_client else {}),
+                "Accept": "application/vnd.github.v3.diff",
+            },
+        )
+        diff_resp.raise_for_status()
+        diff_text = diff_resp.text[:_MAX_DIFF_BYTES]
+
+        files_resp = http.get(
+            f"{api_root}/files",
+            headers=headers if owns_client else None,
+            params={"per_page": 100},
+        )
+        files_resp.raise_for_status()
+        files_payload = files_resp.json()
+    finally:
+        if owns_client:
+            http.close()
+
+    changed_files = [f.get("filename", "") for f in files_payload if f.get("filename")]
+    return PrSpec(
+        owner=owner,
+        repo=repo,
+        number=number,
+        title=str(pr_data.get("title", "")),
+        body=str(pr_data.get("body") or ""),
+        base_sha=str(pr_data.get("base", {}).get("sha", "")),
+        head_sha=str(pr_data.get("head", {}).get("sha", "")),
+        diff=diff_text,
+        changed_files=changed_files,
+        url=url,
+    )
+
+
+def load_pr_fixture(directory: str) -> PrSpec:
+    """Build a ``PrSpec`` from a canned fixture directory.
+
+    Layout:
+        <directory>/pr.json   # subset of the GitHub PR API response
+        <directory>/diff.patch # unified diff
+
+    Useful for tests and offline demos; mirrors the shape returned by
+    ``fetch_pr_from_github`` without the network.
+    """
+    base = directory.rstrip("/")
+    with open(f"{base}/pr.json", encoding="utf-8") as f:
+        pr_data = json.load(f)
+    try:
+        with open(f"{base}/diff.patch", encoding="utf-8") as f:
+            diff_text = f.read()
+    except FileNotFoundError:
+        diff_text = ""
+    return PrSpec(
+        owner=str(pr_data.get("owner", "fixture")),
+        repo=str(pr_data.get("repo", "fixture")),
+        number=int(pr_data.get("number", 0)),
+        title=str(pr_data.get("title", "")),
+        body=str(pr_data.get("body", "")),
+        base_sha=str(pr_data.get("base_sha", "")),
+        head_sha=str(pr_data.get("head_sha", "")),
+        diff=diff_text,
+        changed_files=list(pr_data.get("changed_files", [])),
+        url=str(pr_data.get("url", "")),
+    )
+
+
+__all__ = [
+    "DiffHunk",
+    "GitHubAdapter",
+    "PrSpec",
+    "fetch_pr_from_github",
+    "load_pr_fixture",
+    "parse_pr_url",
+]
diff --git a/prompts/code_audit/pr_checker.md b/prompts/code_audit/pr_checker.md
new file mode 100644
index 0000000..a7dab7e
--- /dev/null
+++ b/prompts/code_audit/pr_checker.md
@@ -0,0 +1,53 @@
+# SPECIALIST: PR CHECKER
+
+You are the PR CHECKER agent for the openworkers code-audit pipeline.
+
+## Your Role
+For each PR claim, decide whether the retrieved **diff evidence** supports, drifts from, contradicts, or fails to support it. You are the trust gate: if there is no evidence, the verdict is `unsupported` — never `verified`.
+
+## Input
+The user message contains:
+- `CLAIMS`: JSON list of `{claim_id, claim_text, claim_type, search_hints}`.
+- `DIFF EVIDENCE`: JSON list of `{claim_id, snippets: [{path, line_start, line_end, text, source}]}`.
+
+Each snippet is a diff hunk from the PR. Lines beginning with `+` are added, `-` removed, ` ` unchanged context.
+
+Each claim has exactly one evidence entry (possibly with an empty `snippets` list).
+
+## Output: ClaimVerdictList (JSON)
+Return one JSON object with this exact schema. No prose, no markdown fences.
+
+```json
+{
+  "verdicts": [
+    {
+      "claim_id": "claim-01",
+      "claim_text": "<verbatim from input>",
+      "verdict": "{{ verdict_values }}",
+      "confidence": 0.0,
+      "evidence_paths": ["<path:line_start-line_end>"],
+      "notes": "<one sentence why>"
+    }
+  ]
+}
+```
+
+## Verdict Rules (diff-aware)
+- **`verified`**: the diff shows exactly what the claim says.
+  - `claim_type=add` → snippets contain `+`-prefixed lines introducing the named thing.
+  - `claim_type=remove` → snippets contain `-`-prefixed lines removing it.
+  - `claim_type=refactor` → snippets contain both `+` and `-` lines consistent with the rename / signature change.
+  - `claim_type=test` → snippets are in test files.
+  - `claim_type=fix` → snippets touch the area implicated by the bug.
+- **`drifted`**: the diff is related but **does not match the claim's specifics**. README says "adds `--port` flag" but diff adds `--bind`. Note must state PR-text vs. diff divergence.
+- **`contradicted`**: the diff directly disproves the claim. PR says "no telemetry added" but diff adds a telemetry emitter; PR says "removes X" but X is untouched.
+- **`unsupported`**: snippet list empty, or snippets are irrelevant boilerplate (whitespace, lockfile noise, unrelated files). **You must emit `unsupported` if the snippet list is empty.** Confidence 0.0.
+
+## Trustworthiness Gate
+You **never** fabricate evidence. You **never** mark a claim `verified` without at least one snippet that materially supports it. If unsure, prefer `unsupported` over `verified`.
+
+## Format
+- One verdict per input claim, same `claim_id`.
+- `confidence` in [0.0, 1.0].
+- `evidence_paths` lists `path:line_start-line_end` strings drawn only from the provided snippets.
+- No commentary outside the JSON object.
diff --git a/prompts/code_audit/pr_planner.md b/prompts/code_audit/pr_planner.md
new file mode 100644
index 0000000..6415e32
--- /dev/null
+++ b/prompts/code_audit/pr_planner.md
@@ -0,0 +1,52 @@
+# SPECIALIST: PR PLANNER
+
+You are the PR PLANNER agent for the openworkers code-audit pipeline.
+
+## Your Role
+Read a pull request title + body and extract every **atomic factual claim** the author makes about the diff. Each claim must be independently verifiable against the actual changes. Quote verbatim — do not paraphrase.
+
+## Input
+The user message contains the PR's URL, the list of changed files, and the title + body between `---BEGIN PR DESCRIPTION---` / `---END PR DESCRIPTION---`.
+
+PR URL: `{{ pr_url }}`
+Files changed: `{{ files_changed }}`
+
+## Output: ReadmeClaimList (JSON)
+Return one JSON object with this exact schema. No prose, no markdown fences.
+
+```json
+{
+  "readme_path": "{{ pr_url }}",
+  "claims": [
+    {
+      "claim_id": "claim-01",
+      "claim_text": "<verbatim quote from the PR title or body>",
+      "claim_type": "add | remove | fix | refactor | test | behavior | doc | other",
+      "search_hints": ["<identifier>", "<filename>", "<function name>"]
+    }
+  ]
+}
+```
+
+## What counts as a claim
+- **add**: "Adds X", "introduces Y", "new module Z"
+- **remove**: "Removes X", "deletes Y", "drops support for Z"
+- **fix**: "Fixes bug in X", "resolves Y", "fixes #123"
+- **refactor**: "Refactors X to use Y", "renames Z to W", "extracts X"
+- **test**: "Adds tests for X", "covers Y in tests"
+- **behavior**: "Default changes from A to B", "X now returns Y"
+- **doc**: "Documents X", "updates README for Y"
+
+## Rules
+- **Atomic**: split compound sentences. "Adds X and removes Y" → two claims.
+- **Verbatim**: `claim_text` must quote the PR text directly (you may trim leading bullets / numbering).
+- **Skip**: marketing prose, motivation paragraphs ("we should care because…"), thank-yous, screenshots, reviewer call-outs.
+- **Include**: every concrete add/remove/refactor/fix/test/behavior/doc statement that the diff is expected to demonstrate.
+- **Hints**: 2–6 grep-friendly tokens — module names, function names, file paths, flag names, env var names. Skip generic English words.
+- **Stable IDs**: sequential `claim-01`, `claim-02`, … so downstream agents can reference them.
+- If the PR description contains zero verifiable claims (e.g., it's empty or pure motivation), return `"claims": []`.
+
+## Forbidden
+- Inventing claims not present in the description.
+- Producing prose or summary outside the JSON.
+- Wrapping JSON in markdown code fences.
diff --git a/providers/code_audit_agents.py b/providers/code_audit_agents.py
index f84f801..f537106 100644
--- a/providers/code_audit_agents.py
+++ b/providers/code_audit_agents.py
@@ -38,6 +38,8 @@
 
 __all__ = [
     "AuditCriticAgent",
+    "PrCheckerAgent",
+    "PrPlannerAgent",
     "ReadmeCheckerAgent",
     "ReadmePlannerAgent",
     "_schema_for",
@@ -377,3 +379,120 @@ async def execute(
 
 def new_claim_id() -> str:
     return f"claim-{uuid.uuid4().hex[:8]}"
+
+
+# ──────────────────────────────────────────────────────────────────────
+# PR-audit agents
+#
+# Parallel to the README ones rather than a shared base class: per
+# AGENTS.md, two examples is not yet a pattern. When the third auditor
+# (compliance) lands, ``ClaimPlannerAgent`` / ``ClaimCheckerAgent`` can
+# absorb both — but premature extraction here would commit us to an
+# interface we'd inevitably regret.
+# ──────────────────────────────────────────────────────────────────────
+
+
+class PrPlannerAgent:
+    """Extracts atomic factual claims from a PR title + body."""
+
+    def __init__(self, unified: UnifiedLLM, prompt_renderer: Callable[[str, dict[str, Any]], str]):
+        self.unified = unified
+        self.render = prompt_renderer
+
+    async def execute(
+        self,
+        pr_description: str,
+        pr_url: str = "",
+        changed_files: list[str] | None = None,
+    ) -> dict[str, Any]:
+        changed_files = changed_files or []
+        system_prompt = self.render(
+            "pr_planner",
+            {"pr_url": pr_url, "files_changed": ", ".join(changed_files[:25])},
+        )
+        user_prompt = (
+            "Extract every atomic factual claim from the PR description below. "
+            "Return a JSON object matching the ReadmeClaimList schema (same shape, "
+            "claim_type drawn from add | remove | fix | refactor | test | behavior | "
+            "doc | other).\n\n"
+            f"PR URL: {pr_url}\n"
+            f"Changed files ({len(changed_files)}): "
+            f"{', '.join(changed_files[:25]) if changed_files else '(none)'}\n\n"
+            "---BEGIN PR DESCRIPTION---\n"
+            f"{pr_description}\n"
+            "---END PR DESCRIPTION---"
+        )
+        response = await self.unified.generate(
+            prompt=user_prompt,
+            system_prompt=system_prompt,
+            mode="quality",
+            response_schema=_schema_for(ReadmeClaimList),
+        )
+        parsed = _parse_structured(response.content, ReadmeClaimList)
+        parsed = _normalise_claim_list(parsed, pr_url)
+        return {
+            "agent": "pr_planner",
+            "tier": "head",
+            "status": "success",
+            "output": parsed,
+            "provider": response.provider_used,
+            "model": response.model,
+            "latency_ms": response.latency_ms,
+            "cost_estimate_usd": response.cost_estimate_usd,
+            "dry_run": response.dry_run,
+            "fallback_used": response.fallback_used,
+        }
+
+
+class PrCheckerAgent:
+    """Renders verdicts for PR claims against diff evidence.
+
+    Identical trust-gate semantics as ``ReadmeCheckerAgent``: any claim
+    with no retrieved hunk evidence is forced to ``unsupported`` after
+    the LLM responds.
+    """
+
+    def __init__(self, unified: UnifiedLLM, prompt_renderer: Callable[[str, dict[str, Any]], str]):
+        self.unified = unified
+        self.render = prompt_renderer
+
+    async def execute(
+        self,
+        claims: list[ReadmeClaim],
+        evidence: list[ClaimEvidence],
+    ) -> dict[str, Any]:
+        evidence_by_claim = {e.claim_id: e for e in evidence}
+        evidence_payload = [e.model_dump() for e in evidence]
+        claims_payload = [c.model_dump() for c in claims]
+
+        system_prompt = self.render(
+            "pr_checker",
+            {"verdict_values": " | ".join(ALL_VERDICTS)},
+        )
+        user_prompt = (
+            "Judge each PR claim against its retrieved diff evidence and return "
+            "a ClaimVerdictList JSON object.\n\n"
+            f"CLAIMS:\n{json.dumps(claims_payload, indent=2)}\n\n"
+            f"DIFF EVIDENCE:\n{json.dumps(evidence_payload, indent=2)}"
+        )
+        response = await self.unified.generate(
+            prompt=user_prompt,
+            system_prompt=system_prompt,
+            mode="balanced",
+            response_schema=_schema_for(ClaimVerdictList),
+        )
+        parsed = _parse_structured(response.content, ClaimVerdictList)
+        verdicts = _enforce_trust_gate(parsed.verdicts, claims, evidence_by_claim)
+        parsed.verdicts = verdicts
+        return {
+            "agent": "pr_checker",
+            "tier": "middle",
+            "status": "success",
+            "output": parsed,
+            "provider": response.provider_used,
+            "model": response.model,
+            "latency_ms": response.latency_ms,
+            "cost_estimate_usd": response.cost_estimate_usd,
+            "dry_run": response.dry_run,
+            "fallback_used": response.fallback_used,
+        }
diff --git a/tests/code_audit/test_pr_flow.py b/tests/code_audit/test_pr_flow.py
new file mode 100644
index 0000000..5869e1e
--- /dev/null
+++ b/tests/code_audit/test_pr_flow.py
@@ -0,0 +1,279 @@
+"""End-to-end test for the PR auditor.
+
+Same stub-LLM pattern as ``test_readme_flow.py``: we drive the full
+flow (planner → diff-grep → checker + trust gate → critic) against a
+canned PR fixture so we can assert verdict distribution deterministically.
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from core.orchestrator.pr_flow import PrAuditOrchestrator
+from core.schemas_audit import (
+    VERDICT_CONTRADICTED,
+    VERDICT_DRIFTED,
+    VERDICT_UNSUPPORTED,
+    VERDICT_VERIFIED,
+)
+from core.sources.github import (
+    GitHubAdapter,
+    PrSpec,
+    load_pr_fixture,
+    parse_pr_url,
+)
+from providers.unified import UnifiedLLM
+
+FIXTURE_PR = Path(__file__).resolve().parent.parent / "fixtures" / "sample_pr"
+
+
+_PLANNER_CLAIMS = {
+    "readme_path": "https://github.com/example/widgetlib/pull/42",
+    "claims": [
+        {
+            "claim_id": "claim-01",
+            "claim_text": "adds a `widgetctl --port 9000` CLI for launching the dashboard",
+            "claim_type": "add",
+            "search_hints": ["widgetctl", "--port", "9000", "argparse"],
+        },
+        {
+            "claim_id": "claim-02",
+            "claim_text": "adds a new `Widget.render_html` method",
+            "claim_type": "add",
+            "search_hints": ["render_html", "Widget"],
+        },
+        {
+            "claim_id": "claim-03",
+            "claim_text": "adds tests covering the new render path",
+            "claim_type": "test",
+            "search_hints": ["test_render_html", "render_html"],
+        },
+        {
+            "claim_id": "claim-04",
+            "claim_text": "removes the deprecated `legacy_render` helper",
+            "claim_type": "remove",
+            "search_hints": ["legacy_render"],
+        },
+        {
+            "claim_id": "claim-05",
+            "claim_text": "introduces no telemetry",
+            "claim_type": "behavior",
+            "search_hints": ["telemetry", "emit_telemetry"],
+        },
+        {
+            "claim_id": "claim-06",
+            "claim_text": "Adds WIDGETLIB_DEBUG=1 flag for verbose logging",
+            "claim_type": "add",
+            "search_hints": ["WIDGETLIB_DEBUG"],
+        },
+        {
+            "claim_id": "claim-07",
+            "claim_text": "Performance: render is now 3x faster on the bench",
+            "claim_type": "behavior",
+            "search_hints": ["bench", "benchmark", "perf"],
+        },
+    ],
+}
+
+
+_CHECKER_VERDICTS = {
+    "verdicts": [
+        {
+            "claim_id": "claim-01",
+            "claim_text": _PLANNER_CLAIMS["claims"][0]["claim_text"],
+            "verdict": VERDICT_DRIFTED,
+            "confidence": 0.8,
+            "evidence_paths": ["widgetlib/cli.py"],
+            "notes": "PR claims --port 9000; diff adds --bind defaulting to 127.0.0.1:8000.",
+        },
+        {
+            "claim_id": "claim-02",
+            "claim_text": _PLANNER_CLAIMS["claims"][1]["claim_text"],
+            "verdict": VERDICT_VERIFIED,
+            "confidence": 0.95,
+            "evidence_paths": ["widgetlib/widget.py"],
+            "notes": "render_html method added on Widget.",
+        },
+        {
+            "claim_id": "claim-03",
+            "claim_text": _PLANNER_CLAIMS["claims"][2]["claim_text"],
+            "verdict": VERDICT_VERIFIED,
+            "confidence": 0.9,
+            "evidence_paths": ["tests/test_render.py"],
+            "notes": "Test file added asserting render_html output.",
+        },
+        {
+            "claim_id": "claim-04",
+            "claim_text": _PLANNER_CLAIMS["claims"][3]["claim_text"],
+            "verdict": VERDICT_DRIFTED,
+            "confidence": 0.7,
+            "evidence_paths": ["widgetlib/cli.py"],
+            "notes": "Diff touches cli.py near _legacy() but the helper is not removed.",
+        },
+        {
+            "claim_id": "claim-05",
+            "claim_text": _PLANNER_CLAIMS["claims"][4]["claim_text"],
+            "verdict": VERDICT_CONTRADICTED,
+            "confidence": 0.95,
+            "evidence_paths": ["widgetlib/telemetry.py"],
+            "notes": "Diff adds a telemetry emitter that posts to telemetry.example.com.",
+        },
+        # claim-06: LLM hallucinates verified — trust gate must override
+        {
+            "claim_id": "claim-06",
+            "claim_text": _PLANNER_CLAIMS["claims"][5]["claim_text"],
+            "verdict": VERDICT_VERIFIED,
+            "confidence": 0.85,
+            "evidence_paths": ["widgetlib/cli.py"],
+            "notes": "Hallucinated by checker — trust gate must overwrite.",
+        },
+        {
+            "claim_id": "claim-07",
+            "claim_text": _PLANNER_CLAIMS["claims"][6]["claim_text"],
+            "verdict": VERDICT_UNSUPPORTED,
+            "confidence": 0.0,
+            "evidence_paths": [],
+            "notes": "No benchmark or measurement in the diff.",
+        },
+    ]
+}
+
+
+_CRITIC_RESPONSE = {
+    "weak_verdicts": [],
+    "missed_claims": [],
+    "suggestions": ["Require benchmarks for performance claims in CI."],
+    "overall_assessment": "Caught one drift in CLI flag, one contradicted no-telemetry, and two unsupported claims.",
+}
+
+
+def _make_stub_unified() -> UnifiedLLM:
+    llm = UnifiedLLM()
+    llm.dry_run = False
+    llm.set_available_providers(["anthropic"])
+
+    async def fake_generate(
+        provider: str,
+        model: str,
+        prompt: str,
+        system_prompt: str,
+        response_schema: Any,
+    ) -> str:
+        if "PR PLANNER" in system_prompt:
+            return json.dumps(_PLANNER_CLAIMS)
+        if "PR CHECKER" in system_prompt:
+            return json.dumps(_CHECKER_VERDICTS)
+        if "AUDIT CRITIC" in system_prompt:
+            return json.dumps(_CRITIC_RESPONSE)
+        return "{}"
+
+    llm.set_generate_fn(fake_generate)
+    return llm
+
+
+@pytest.fixture
+def stubbed_unified(monkeypatch) -> UnifiedLLM:
+    monkeypatch.setenv("DRY_RUN", "false")
+    for tier in ("QUALITY", "BALANCED", "CHEAP"):
+        monkeypatch.setenv(f"THESIS_{tier}_PROVIDER", "anthropic")
+        monkeypatch.setenv(f"THESIS_{tier}_MODEL", "claude-sonnet-4-20250514")
+    return _make_stub_unified()
+
+
+def test_parse_pr_url_happy_path():
+    owner, repo, number = parse_pr_url("https://github.com/example/widgetlib/pull/42")
+    assert (owner, repo, number) == ("example", "widgetlib", 42)
+
+
+def test_parse_pr_url_rejects_non_pr():
+    with pytest.raises(ValueError):
+        parse_pr_url("https://github.com/example/widgetlib/issues/42")
+
+
+def test_load_pr_fixture_round_trips():
+    pr = load_pr_fixture(str(FIXTURE_PR))
+    assert pr.number == 42
+    assert pr.title.startswith("feat:")
+    assert "widgetctl" in pr.body
+    assert pr.diff, "fixture diff should be non-empty"
+    assert "telemetry.py" in pr.diff
+
+
+def test_github_adapter_finds_hits_in_diff_only():
+    pr = load_pr_fixture(str(FIXTURE_PR))
+    adapter = GitHubAdapter(pr)
+    # Real addition in the diff
+    hits = adapter.search_any(["render_html"], limit=10)
+    assert any(h.path == "widgetlib/widget.py" for h in hits)
+    # Hallucinated env var: nowhere in the diff
+    assert adapter.search_any(["WIDGETLIB_DEBUG"], limit=10) == []
+    # Telemetry contradicts the "no telemetry" claim — should surface
+    telemetry_hits = adapter.search_any(["telemetry", "emit_telemetry"], limit=10)
+    assert any(h.path == "widgetlib/telemetry.py" for h in telemetry_hits)
+
+
+@pytest.mark.asyncio
+async def test_pr_audit_end_to_end(stubbed_unified):
+    pr = load_pr_fixture(str(FIXTURE_PR))
+    orch = PrAuditOrchestrator(unified=stubbed_unified)
+    report, critique = await orch.audit(pr)
+
+    by_id = {v.claim_id: v for v in report.verdicts}
+    assert len(report.verdicts) == 7
+
+    # Verified claims have evidence in the diff
+    assert by_id["claim-02"].verdict == VERDICT_VERIFIED
+    assert by_id["claim-02"].evidence_paths
+    assert by_id["claim-03"].verdict == VERDICT_VERIFIED
+
+    # Drifted: PR says --port, diff adds --bind
+    assert by_id["claim-01"].verdict == VERDICT_DRIFTED
+
+    # Contradicted: PR says "no telemetry" but diff adds it
+    assert by_id["claim-05"].verdict == VERDICT_CONTRADICTED
+
+    # Trust gate: hallucinated WIDGETLIB_DEBUG verdict overwritten
+    assert by_id["claim-06"].verdict == VERDICT_UNSUPPORTED, (
+        "Trust gate failed: stub hallucinated 'verified' for WIDGETLIB_DEBUG; "
+        "trust gate should have overridden because the diff has zero hits."
+    )
+    assert by_id["claim-06"].evidence_paths == []
+    assert by_id["claim-06"].confidence == 0.0
+
+    # Unsupported: performance claim with no benchmark evidence
+    assert by_id["claim-07"].verdict == VERDICT_UNSUPPORTED
+
+    # Summary tallies
+    assert sum(report.summary.values()) == len(report.verdicts)
+    assert report.summary[VERDICT_VERIFIED] >= 2
+    assert report.summary[VERDICT_UNSUPPORTED] >= 2
+
+    # Target field carries the PR URL
+    assert "pull/42" in report.target
+
+    # Critic ran
+    assert critique.suggestions
+
+
+@pytest.mark.asyncio
+async def test_pr_audit_empty_description(stubbed_unified):
+    """An empty PR description must produce a structured no-op report."""
+    pr = PrSpec(
+        owner="example",
+        repo="empty",
+        number=1,
+        title="",
+        body="",
+        diff="",
+        changed_files=[],
+        url="https://github.com/example/empty/pull/1",
+    )
+    orch = PrAuditOrchestrator(unified=stubbed_unified)
+    report, critique = await orch.audit(pr)
+    assert report.verdicts == []
+    assert report.errors and "empty" in report.errors[0].lower()
+    assert critique.weak_verdicts == []
diff --git a/tests/fixtures/sample_pr/diff.patch b/tests/fixtures/sample_pr/diff.patch
new file mode 100644
index 0000000..e4b67e7
--- /dev/null
+++ b/tests/fixtures/sample_pr/diff.patch
@@ -0,0 +1,43 @@
+diff --git a/widgetlib/cli.py b/widgetlib/cli.py
+@@ -1,5 +1,12 @@
+ import argparse
+
++def main():
++    parser = argparse.ArgumentParser(prog="widgetctl")
++    parser.add_argument("--bind", default="127.0.0.1:8000")
++    args = parser.parse_args()
++    print(f"Starting widgetctl on {args.bind}")
++
++
+ def _legacy():
+     return None
+diff --git a/widgetlib/widget.py b/widgetlib/widget.py
+@@ -1,7 +1,12 @@
+ class Widget:
+     def __init__(self, payload):
+         self.payload = payload
+
+     def render(self):
+         return f"<div>{self.payload}</div>"
++
++    def render_html(self):
++        return f"<section>{self.payload}</section>"
+diff --git a/widgetlib/telemetry.py b/widgetlib/telemetry.py
+@@ -0,0 +1,6 @@
++import urllib.request
++
++
++def emit_telemetry(event):
++    urllib.request.urlopen("https://telemetry.example.com", data=event.encode())
++
+diff --git a/tests/test_render.py b/tests/test_render.py
+@@ -0,0 +1,9 @@
++from widgetlib import Widget
++
++
++def test_render_html_returns_section_tag():
++    w = Widget("hi")
++    assert "<section>" in w.render_html()
++
++
++
diff --git a/tests/fixtures/sample_pr/pr.json b/tests/fixtures/sample_pr/pr.json
new file mode 100644
index 0000000..d832a61
--- /dev/null
+++ b/tests/fixtures/sample_pr/pr.json
@@ -0,0 +1,16 @@
+{
+  "owner": "example",
+  "repo": "widgetlib",
+  "number": 42,
+  "title": "feat: dashboard CLI + render performance",
+  "body": "## Summary\n\nThis PR adds a `widgetctl --port 9000` CLI for launching the dashboard. It also adds a new `Widget.render_html` method and adds tests covering the new render path. The PR removes the deprecated `legacy_render` helper. Importantly, this PR introduces **no telemetry**.\n\n## Notes\n\n- Performance: render is now 3x faster on the bench.\n- Adds WIDGETLIB_DEBUG=1 flag for verbose logging.",
+  "base_sha": "aaaaaaa",
+  "head_sha": "bbbbbbb",
+  "changed_files": [
+    "widgetlib/cli.py",
+    "widgetlib/widget.py",
+    "widgetlib/telemetry.py",
+    "tests/test_render.py"
+  ],
+  "url": "https://github.com/example/widgetlib/pull/42"
+}