diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a4c7dedf..c7b936da 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -8,9 +8,9 @@ name: CI — Quality Gates
 
 on:
   push:
-    branches: [main, develop]
+    branches: [main, develop, "phalanx/ci-fix/**"]
   pull_request:
-    branches: [main, develop]
+    branches: [main, develop, ci-fixer-e2e-test]
   workflow_dispatch:
 
 # Cancel in-progress runs on new push to same branch
diff --git a/alembic/versions/20260415_0001_ci_fix_context.py b/alembic/versions/20260415_0001_ci_fix_context.py
new file mode 100644
index 00000000..c8664909
--- /dev/null
+++ b/alembic/versions/20260415_0001_ci_fix_context.py
@@ -0,0 +1,34 @@
+"""ci_fix_run: add pipeline_context_json for multi-agent shared state
+
+Revision ID: 20260415_0001
+Revises: 20260412_0005
+Create Date: 2026-04-15
+"""
+
+from __future__ import annotations
+
+import sqlalchemy as sa
+from alembic import op
+
+revision = "20260415_0001"
+down_revision = "20260412_0005"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # Add pipeline_context_json — stores the full CIFixContext as a JSON blob.
+    # NULL for runs created before this migration; populated by new pipeline runs.
+    op.add_column(
+        "ci_fix_runs",
+        sa.Column(
+            "pipeline_context_json",
+            sa.Text(),
+            nullable=True,
+            comment="CIFixContext serialized as JSON — full multi-agent pipeline state",
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("ci_fix_runs", "pipeline_context_json")
diff --git a/docker/sandbox/go/Dockerfile b/docker/sandbox/go/Dockerfile
new file mode 100644
index 00000000..b93a6565
--- /dev/null
+++ b/docker/sandbox/go/Dockerfile
@@ -0,0 +1,16 @@
+FROM golang:1.22-alpine
+
+# Install staticcheck and golangci-lint for broader Go lint coverage
+RUN go install honnef.co/go/tools/cmd/staticcheck@2024.1.0 && \
+    go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.59.1
+
+# Create non-root user
+RUN adduser -D -u 1000 phalanx
+
+COPY ../reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh
+
+RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
+
+WORKDIR /workspace
+USER phalanx
diff --git a/docker/sandbox/node/Dockerfile b/docker/sandbox/node/Dockerfile
new file mode 100644
index 00000000..07a2e6ed
--- /dev/null
+++ b/docker/sandbox/node/Dockerfile
@@ -0,0 +1,19 @@
+FROM node:20-slim
+
+# Create non-root user
+RUN useradd -m -u 1000 -s /bin/bash phalanx 2>/dev/null || true
+
+# Install common Node tooling used by the CI fixer.
+RUN npm install -g \
+    eslint@8.57.0 \
+    typescript@5.4.5 \
+    jest@29.7.0 \
+    --no-fund --no-audit
+
+COPY ../reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh
+
+RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
+
+WORKDIR /workspace
+USER phalanx
diff --git a/docker/sandbox/python/Dockerfile b/docker/sandbox/python/Dockerfile
new file mode 100644
index 00000000..02a1dce9
--- /dev/null
+++ b/docker/sandbox/python/Dockerfile
@@ -0,0 +1,24 @@
+FROM python:3.12-slim
+
+# Create non-root user
+RUN useradd -m -u 1000 -s /bin/bash phalanx
+
+# Install pinned tool versions used by the CI fixer.
+# Versions are chosen to match the most common customer constraints.
+# When VersionParityAgent detects a mismatch, it installs the customer's
+# pinned version inside the running container before executing.
+RUN pip install --no-cache-dir \
+    ruff==0.4.4 \
+    mypy==1.10.0 \
+    pytest==8.2.0 \
+    pytest-asyncio==0.23.7
+
+# Copy reset script
+COPY ../reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh
+
+# Workspace dir — populated via docker cp by SandboxProvisioner
+RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
+
+WORKDIR /workspace
+USER phalanx
diff --git a/docker/sandbox/reset.sh b/docker/sandbox/reset.sh
new file mode 100644
index 00000000..6a7f2ab7
--- /dev/null
+++ b/docker/sandbox/reset.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+# Shared reset script — clears workspace and caches between fix runs.
+# Runs inside the container via: docker exec {id} sh /phalanx/reset.sh
+set -e
+rm -rf /workspace/* 2>/dev/null || true
+rm -rf /tmp/pip-* /tmp/npm-* /tmp/.cache /root/.cache 2>/dev/null || true
+echo "done"
diff --git a/docker/sandbox/rust/Dockerfile b/docker/sandbox/rust/Dockerfile
new file mode 100644
index 00000000..212acdea
--- /dev/null
+++ b/docker/sandbox/rust/Dockerfile
@@ -0,0 +1,15 @@
+FROM rust:1.77-slim
+
+# Install clippy and rustfmt (included in toolchain but ensure available)
+RUN rustup component add clippy rustfmt
+
+# Create non-root user
+RUN useradd -m -u 1000 -s /bin/bash phalanx
+
+COPY ../reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh
+
+RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
+
+WORKDIR /workspace
+USER phalanx
diff --git a/docs/MULTI_AGENT_CI_FIXER.md b/docs/MULTI_AGENT_CI_FIXER.md
new file mode 100644
index 00000000..ae8369c9
--- /dev/null
+++ b/docs/MULTI_AGENT_CI_FIXER.md
@@ -0,0 +1,365 @@
+# Multi-Agent CI Fixer — Architecture & Phased Plan
+
+> **Status:** Design doc — pre-implementation  
+> **Author:** FORGE Tech Lead  
+> **Date:** 2026-04-15
+
+---
+
+## 1. Problem Statement
+
+The current CI fixer is a single-agent loop. It works for simple lint violations but has fundamental gaps:
+
+1. **No real environment** — it runs linters in a cloned workspace but never actually runs the app or tests
+2. **Opens new PRs every run** — instead of committing to the existing failing PR
+3. **Scoped to the CI log only** — doesn't know if the base branch is already broken
+4. **No reproduction step** — fixes are applied without confirming the failure first
+5. **One agent does everything** — no separation of concerns, hard to scale, hard to trust
+
+The fix isn't to patch these one at a time. The fix is a coordinated multi-agent pipeline.
+
+---
+
+## 2. The Mental Model — How a Sr. Staff Engineer Actually Works
+
+When a senior engineer sees a red CI build:
+
+1. **Read the log** — understand exactly what failed and why
+2. **Reproduce it locally** — run the exact same command CI ran, confirm it fails
+3. **Fix it** — make the targeted change
+4. **Validate in the same environment** — run the command again, confirm it passes
+5. **Push to the same PR** — new commit, not a new PR
+6. **Done** — CI goes green on the next run
+
+This is the workflow the multi-agent system must replicate. Every agent maps to one of these steps.
+
+---
+
+## 3. Agent Roster & Responsibilities
+
+```
+CI Failure Event
+      │
+      ▼
+┌─────────────────┐
+│  Log Analyst    │  — parse CI logs → StructuredFailure + reproducer_cmd
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  Root Cause     │  — classify tier, stack, confidence, escalation decision
+│  Agent          │
+└────────┬────────┘
+         │
+    ┌────┴────┐
+    │         │
+    ▼         ▼
+[L1: Auto]  [L2: Escalate → comment on PR, done]
+    │
+    ▼
+┌─────────────────┐
+│  Sandbox        │  — detect stack, spin container from pre-warmed image
+│  Provisioner    │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  Reproducer     │  — run reproducer_cmd in sandbox, confirm failure
+│  Agent          │
+└────────┬────────┘
+         │
+    ┌────┴──────────┐
+    │               │
+    ▼               ▼
+[Confirmed]    [Not reproduced → flaky/env issue → comment, done]
+    │
+    ▼
+┌─────────────────┐
+│  Fix Agent      │  — apply fix, run validation in same sandbox
+│  (Claude Opus)  │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  Verifier       │  — smoke test the app, confirm nothing else broke
+│  Agent          │
+└────────┬────────┘
+         │
+         ▼
+┌─────────────────┐
+│  Commit Agent   │  — push commit to EXISTING PR (not a new one)
+└─────────────────┘
+```
+
+---
+
+## 4. Agent Specifications
+
+### 4.1 Log Analyst
+- **Model:** GPT-4.1 (fast, cheap, structured extraction)
+- **Input:** raw CI log text
+- **Output:** `StructuredFailure`
+  ```python
+  @dataclass
+  class StructuredFailure:
+      tool: str               # "ruff", "pytest", "mypy", "tsc", etc.
+      failure_type: str       # "lint", "test_regression", "build", "type_error"
+      errors: list[ParsedError]
+      reproducer_cmd: str     # exact command CI ran: "ruff check phalanx/ tests/"
+      failing_files: list[str]
+      log_excerpt: str
+      confidence: float
+  ```
+- **Existing code:** largely maps to current `LogParser` + `LLMClassifier` — refactor, don't rewrite
+
+### 4.2 Root Cause Agent
+- **Model:** GPT-4.1
+- **Input:** `StructuredFailure` + file contents of failing files
+- **Output:** `ClassifiedFailure`
+  ```python
+  @dataclass
+  class ClassifiedFailure:
+      tier: Literal["L1_auto", "L2_escalate"]
+      root_cause: str
+      hypothesis: str
+      stack: str              # "python", "node", "go", "java", "rust", "unknown"
+      confidence: float
+      escalation_reason: str  # populated if tier == L2
+  ```
+- **L1 criteria:** lint violations, unused imports, formatting, simple type annotation fixes
+- **L2 criteria:** test regression, logic bug, unknown stack, low confidence (<0.7)
+
+### 4.3 Sandbox Provisioner
+- **Model:** None — fully deterministic
+- **Input:** repo path + `ClassifiedFailure.stack`
+- **Output:** running Docker container ID + workspace path
+- **Stack detection order:**
+  1. File existence: `pyproject.toml` → python, `package.json` → node, `go.mod` → go, etc.
+  2. CI log hints: if detection fails, parse CI log for install commands
+  3. LLM fallback: give GPT-4.1 the root dir listing + CI log
+- **Pre-warmed images on prod:**
+  - `phalanx-sandbox:python` — python 3.12, pip, ruff, mypy, pytest
+  - `phalanx-sandbox:node` — node 22, npm, yarn, eslint, tsc
+  - `phalanx-sandbox:go` — go 1.22+
+  - `phalanx-sandbox:multi` — python + node combined
+- **Fallback:** if stack unknown after LLM → skip to Escalate path
+
+### 4.4 Reproducer Agent
+- **Model:** Claude Opus 4.6 (tool use: `run_command`)
+- **Input:** sandbox container, `reproducer_cmd`
+- **Output:** `ReproductionResult`
+  ```python
+  @dataclass
+  class ReproductionResult:
+      confirmed: bool
+      exit_code: int
+      output: str
+      verdict: Literal["confirmed", "flaky", "env_mismatch", "timeout"]
+  ```
+- **Logic:**
+  - Run `reproducer_cmd` in sandbox
+  - If it fails with same error → `confirmed`
+  - If it passes → `flaky` (env issue, not code bug)
+  - If it fails with a *different* error → `env_mismatch` (wrong stack/deps)
+  - If timeout → escalate
+
+### 4.5 Fix Agent
+- **Model:** Claude Opus 4.6 (tool use: `read_file`, `write_file`, `run_command`, `finish`)
+- **Input:** `StructuredFailure`, `ReproductionResult`, sandbox container
+- **Output:** `VerifiedPatch`
+  ```python
+  @dataclass
+  class VerifiedPatch:
+      files_modified: list[str]
+      validation_cmd: str
+      validation_output: str
+      success: bool
+  ```
+- **Constraints (unchanged from current design):**
+  - write_file: empty-content guard, 70% shrink guard
+  - sed/awk always available for large files
+  - Full-repo validation before declaring success
+  - Max 12 turns
+
+### 4.6 Verifier Agent
+- **Model:** Claude Opus 4.6
+- **Input:** sandbox container, `VerifiedPatch`, stack type
+- **Output:** `VerificationResult`
+- **What it does:**
+  - For Python: `pytest --tb=short -q` (no coverage, just pass/fail)
+  - For Node: `npm test` or `npm run lint`
+  - For unknown: skip (don't block on what we can't verify)
+- **Phase 1:** optional/best-effort — don't block commit if verifier times out
+- **Phase 2:** mandatory gate for test regression failures
+
+### 4.7 Commit Agent
+- **Model:** None — deterministic git ops
+- **Input:** `VerifiedPatch`, original PR info
+- **Output:** commit SHA pushed to existing PR branch
+- **Key behavior:**
+  - Look up open Phalanx fix PRs for this branch — if one exists, push to it
+  - If none exists, open one (draft, targeting the failing branch)
+  - Never open a second fix PR for the same branch
+  - Commit message: structured, references the original CI run ID
+
+---
+
+## 5. Shared Context Object
+
+All agents read from and write to a single `CIFixContext` object, persisted in DB:
+
+```python
+@dataclass
+class CIFixContext:
+    # Identity
+    ci_fix_run_id: UUID
+    repo: str
+    branch: str
+    commit_sha: str
+    original_build_id: str
+
+    # Agent outputs (written as pipeline progresses)
+    structured_failure: StructuredFailure | None
+    classified_failure: ClassifiedFailure | None
+    sandbox_id: str | None
+    reproduction_result: ReproductionResult | None
+    verified_patch: VerifiedPatch | None
+    verification_result: VerificationResult | None
+    commit_sha_fix: str | None
+
+    # Metadata
+    started_at: datetime
+    completed_at: datetime | None
+    final_status: Literal["fixed", "escalated", "flaky", "env_mismatch", "failed"]
+    pr_comment_posted: bool
+```
+
+This object is inspectable at any point. `GET /ci-fix-runs/{id}/context` returns the full pipeline state. No black boxes.
+
+---
+
+## 6. Fallback Ladder
+
+Every exit path produces a useful artifact. No silent failures.
+
+| Situation | Action |
+|-----------|--------|
+| Can reproduce + can fix | Verified patch committed to existing PR |
+| Can reproduce + can't fix (max turns) | Root cause comment on PR, engineer knows exactly what to look at |
+| Cannot reproduce (passes in sandbox) | "Looks flaky — reproduced cleanly locally. Recommend re-running CI." |
+| Unknown stack (after LLM fallback) | Structured failure analysis comment + stack hypothesis |
+| Base branch already broken | "Base branch has pre-existing failures. Fix those first." |
+| Sandbox provision fails | Fall back to current workspace-only mode (no env validation) |
+| Any agent timeout | Escalate with partial context, never hang |
+
+---
+
+## 7. Sandbox Architecture
+
+### Pre-warmed images
+Built once, stored on prod server. Rebuilt weekly via cron.
+
+```dockerfile
+# phalanx-sandbox:python
+FROM python:3.12-slim
+RUN pip install ruff mypy pytest pytest-asyncio pytest-cov
+# No app code — that gets mounted at runtime
+```
+
+### Container lifecycle
+```
+provision()  → docker run -d --rm -v {workspace}:/app -w /app phalanx-sandbox:python
+install()    → docker exec {id} pip install -e ".[dev]"   (~30s, cached layer)
+run(cmd)     → docker exec {id} {cmd}                     (~1-5s per command)
+teardown()   → docker stop {id}  (--rm handles cleanup)
+```
+
+### Dep caching
+Cache the installed dep layer per `requirements hash`. If `pyproject.toml` hasn't changed since last run, skip `pip install` — use cached layer. Reduces install time from ~30s to ~2s on repeat runs.
+
+### Security
+- Network isolated: `--network none` after dep install
+- No write access outside `/app`
+- Hard CPU/memory limits: `--cpus 1 --memory 2g`
+- Hard timeout: 5 minutes total per sandbox lifecycle
+
+---
+
+## 8. Quality Gates
+
+Every agent has unit tests. Pipeline has integration tests. Coverage target: **≥80%**.
+
+| Test type | What it covers |
+|-----------|----------------|
+| Unit — Log Analyst | Parses known log formats correctly, structured output |
+| Unit — Root Cause Agent | Classification tiers, confidence thresholds, escalation |
+| Unit — Sandbox Provisioner | Stack detection logic, all fallback paths |
+| Unit — Reproducer Agent | Confirmed/flaky/env_mismatch verdicts |
+| Unit — Fix Agent | Existing agentic loop tests + new sandbox integration |
+| Unit — Verifier Agent | Pass/fail/skip verdicts per stack type |
+| Unit — Commit Agent | PR continuity (no duplicate PRs), commit format |
+| Integration — full pipeline | End-to-end with a real ruff failure, real sandbox, real commit |
+| E2E — MESMD | No open CI failures after pipeline runs |
+
+---
+
+## 9. Phased Plan
+
+### Phase 1 — Solid Foundation (current sprint)
+**Goal:** Clean up what exists, establish the context object, add PR continuity.
+
+- [ ] Refactor `CIFixerAgent` into the DAG agent pattern (Log Analyst + Root Cause already exist, formalize them)
+- [ ] Introduce `CIFixContext` as the shared state object (DB-backed)
+- [ ] Commit Agent: check for existing fix PRs before opening new ones
+- [ ] Fix CI workflow triggers (PR #8 — in flight)
+- [ ] ≥80% unit test coverage on all existing ci_fixer modules
+- [ ] `GET /ci-fix-runs/{id}/context` endpoint — full pipeline state inspectable
+
+### Phase 2 — Sandbox + Reproduction (next sprint)
+**Goal:** The pipeline can reproduce failures, not just parse them.
+
+- [ ] Build pre-warmed sandbox images (python, node, multi)
+- [ ] `SandboxProvisioner` — stack detection + container lifecycle
+- [ ] `ReproducerAgent` — run `reproducer_cmd` in sandbox, produce `ReproductionResult`
+- [ ] Wire into existing pipeline: reproduction step before Fix Agent
+- [ ] Flaky detection: if sandbox passes, post "looks flaky" comment, skip fix
+- [ ] Dep layer caching per repo
+- [ ] ≥80% test coverage on new agents
+
+### Phase 3 — Verifier + Full E2E (sprint after)
+**Goal:** The pipeline can confirm the app works, not just that linting passes.
+
+- [ ] `VerifierAgent` — run test suite in sandbox post-fix
+- [ ] Sandbox network isolation + resource limits
+- [ ] Unknown stack LLM fallback path
+- [ ] Base branch health check before starting fix
+- [ ] Full pipeline integration test (real repo, real sandbox, real CI)
+- [ ] **MESMD proof:** trigger real CI failures, confirm pipeline fixes them all, CI stays green
+
+---
+
+## 10. Success Criteria
+
+Phase 1 done when:
+- No duplicate fix PRs ever opened for the same branch
+- `CIFixContext` fully populated and queryable via API
+- 80% unit test coverage across all ci_fixer modules
+
+Phase 2 done when:
+- Reproducer Agent correctly classifies confirmed vs flaky vs env_mismatch
+- Flaky failures generate a comment instead of a bad fix PR
+- Sandbox spins up in <5 seconds (from pre-warmed image)
+
+Phase 3 done when:
+- MESMD app: zero open CI failures after pipeline runs end-to-end
+- Verifier Agent confirms app smoke tests pass post-fix
+- Full pipeline runs in <3 minutes for a lint failure
+
+---
+
+## 11. What We Are NOT Building
+
+- Auto-merge — fix PRs are always draft, always human-approved before merge
+- Fix for logic bugs — if Root Cause Agent classifies it as a test regression the engineer introduced, it escalates, it does not fix
+- Multi-repo coordination — one pipeline per repo, no cross-repo fixes
+- Jenkins support — Phase 2 at earliest
diff --git a/docs/sandbox_pool_design.md b/docs/sandbox_pool_design.md
new file mode 100644
index 00000000..ebf90ae8
--- /dev/null
+++ b/docs/sandbox_pool_design.md
@@ -0,0 +1,272 @@
+# Sandbox Pool Design — Isolated Execution for CI Fixer
+
+## Status: Approved for implementation (Phase 3)
+
+## Problem
+
+`SandboxProvisioner.provision()` is currently a no-op — it returns a descriptor but
+never starts a container. The reproducer and verifier run commands as local
+subprocesses on the FORGE host. This means:
+
+- No env isolation: host ruff/mypy version may differ from the repo's pinned version
+- No filesystem isolation: a broken fix can dirty the host workspace
+- No resource limits: a hung test can block other fix runs
+- `docker run` cold-start (image pull + container create) costs 5–30s per fix run
+  if we naively start a container on demand
+
+---
+
+## Design: Pre-warmed Pool
+
+### Core idea
+
+Never cold-start a container during a fix run. Keep a small pool of ready containers
+per stack, already running with tools pre-installed. A fix run checks one out, uses
+it, and the pool refills asynchronously in the background.
+
+```
+┌────────────────────────────────────────────────────────────┐
+│  SandboxPool (lazy singleton, init after Celery fork)      │
+│                                                            │
+│  python: [🟢 ready] [🟢 ready] [🟡 warming]               │
+│  node:   [🟢 ready] [🟡 warming]                           │
+│  go:     [🟢 ready]                                        │
+│  rust:   [🟢 ready]                                        │
+└────────────────────────────────────────────────────────────┘
+        │ checkout(stack)                    ↑ checkin(container)
+        ▼                                    │
+┌────────────────────────────────────────────────────────────┐
+│  Fix run (ReproducerAgent + VerifierAgent)                  │
+│  1. pool.checkout("python") → PooledContainer              │
+│  2. bind-mount workspace into container                    │
+│  3. docker exec reproducer_cmd                             │
+│  4. docker exec fix validator                              │
+│  5. docker exec verifier (ruff/pytest/etc.)                │
+│  6. pool.checkin(container) → reset + async refill         │
+└────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## Components
+
+### 1. `PooledContainer` (dataclass)
+
+```python
+@dataclass
+class PooledContainer:
+    container_id: str      # Docker container ID (short hash)
+    stack: str             # "python" | "node" | "go" | "rust"
+    image: str             # e.g. "phalanx-sandbox-python:latest"
+    checked_out_at: float  # monotonic time — for reaper timeout detection
+    healthy: bool = True
+```
+
+### 2. `SandboxPool` (async singleton)
+
+One `asyncio.Queue` per stack. Each queue holds ready `PooledContainer` objects.
+
+Key methods:
+
+| Method | What it does |
+|--------|-------------|
+| `_warmup()` | Called once after lazy init. Starts `min_size` containers per stack. |
+| `checkout(stack, timeout)` | `asyncio.wait_for(queue.get(), timeout)`. Returns container or raises `SandboxUnavailableError`. |
+| `checkin(container)` | Runs reset script inside container, puts it back in queue. Triggers async `_refill`. |
+| `_start_container(stack)` | `docker run -d --rm --user phalanx --no-new-privileges -v /tmp:/hosttmp {image} sleep infinity` |
+| `_health_check(container)` | `docker exec {id} echo ok`. Returns bool. |
+| `_reset_container(container)` | `docker exec {id} /phalanx/reset.sh` — clears /workspace, /tmp, pip/npm cache. |
+| `_reaper()` | Background task. Every 60s: kill containers held > `max_hold_seconds`. Replace them. |
+| `shutdown()` | Kill all containers in all queues. Called on worker shutdown. |
+
+### 3. `SandboxResult` (upgraded)
+
+Two new fields added to existing dataclass:
+
+```python
+container_id: str = ""        # populated when pool checkout succeeds
+mount_path: str = "/workspace" # path inside the container
+```
+
+`available=True` + `container_id != ""` → real Docker exec path  
+`available=True` + `container_id == ""` → pool timeout, local subprocess fallback  
+`available=False` → sandbox_enabled=False, local subprocess fallback
+
+### 4. `SandboxProvisioner.provision()` (upgraded)
+
+```python
+async def provision(workspace_path, stack_hint=None) -> SandboxResult | None:
+    if not settings.sandbox_enabled:
+        return None
+    stack = stack_hint or self.detect_stack(workspace_path)
+    image = _STACK_IMAGES[stack]
+    sandbox_id = f"phalanx-sandbox-{uuid.uuid4().hex[:8]}"
+
+    pool = await get_sandbox_pool()          # lazy singleton, safe after fork
+    try:
+        container = await pool.checkout(stack, timeout=settings.sandbox_checkout_timeout_seconds)
+        # bind-mount workspace into the container
+        await pool.mount_workspace(container, workspace_path)
+        return SandboxResult(
+            sandbox_id=sandbox_id, stack=stack, image=image,
+            workspace_path=str(workspace_path),
+            container_id=container.container_id,
+        )
+    except SandboxUnavailableError:
+        log.warning("ci_fixer.sandbox_pool_exhausted", stack=stack)
+        return SandboxResult(
+            sandbox_id=sandbox_id, stack=stack, image=image,
+            workspace_path=str(workspace_path),
+            available=False,             # → local subprocess fallback
+        )
+```
+
+### 5. `ReproducerAgent._run_subprocess()` (upgraded)
+
+When `sandbox_result` has a `container_id`, wrap the command:
+
+```python
+if sandbox_result and sandbox_result.container_id:
+    cmd = f"docker exec {sandbox_result.container_id} sh -c {shlex.quote(cmd)}"
+```
+
+Otherwise falls through to current `asyncio.create_subprocess_shell` behavior.
+
+### 6. `VerifierAgent._run_cmd()` (upgraded)
+
+Same pattern — when `container_id` is set, prefix args with `["docker", "exec", container_id]`.
+
+---
+
+## Stack Images
+
+Custom images with tools pre-installed at pinned versions. Stored in `docker/sandbox/`.
+
+```
+docker/sandbox/
+  python/Dockerfile
+  node/Dockerfile
+  go/Dockerfile
+  rust/Dockerfile
+  reset.sh          # shared reset script copied into every image
+```
+
+### `reset.sh`
+
+```bash
+#!/bin/bash
+# Clear workspace and caches between fix runs.
+rm -rf /workspace/*
+rm -rf /tmp/pip-* /tmp/npm-* /root/.cache 2>/dev/null || true
+```
+
+### Python image example
+
+```dockerfile
+FROM python:3.12-slim
+RUN useradd -m -u 1000 phalanx
+RUN pip install --no-cache-dir ruff==0.4.4 mypy==1.10.0 pytest==8.2.0
+COPY reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh
+WORKDIR /workspace
+USER phalanx
+```
+
+---
+
+## Settings (new keys)
+
+```
+SANDBOX_POOL_MIN_SIZE=1           # containers to pre-warm per stack at startup
+SANDBOX_POOL_MAX_SIZE=2           # max simultaneous checked-out containers per stack
+SANDBOX_CHECKOUT_TIMEOUT_SECONDS=30   # wait for pool slot before falling back
+SANDBOX_MAX_HOLD_SECONDS=300      # reaper kills containers held longer than this
+SANDBOX_REAPER_INTERVAL_SECONDS=60    # how often reaper runs
+```
+
+Setting `SANDBOX_POOL_MIN_SIZE=0` disables pre-warming — containers start cold on first use.
+Setting `SANDBOX_ENABLED=false` disables the entire pool (existing behavior).
+
+---
+
+## Pool initialization and Celery fork safety
+
+**Problem**: Celery pre-forks workers. If the pool is a module-level singleton
+initialized before fork, child workers inherit a stale event loop reference → all
+`await` calls inside the pool fail.
+
+**Solution**: Lazy init behind an `asyncio.Lock`.
+
+```python
+_pool_instance: SandboxPool | None = None
+_pool_lock: asyncio.Lock | None = None
+
+async def get_sandbox_pool() -> SandboxPool:
+    global _pool_instance, _pool_lock
+    if _pool_lock is None:
+        _pool_lock = asyncio.Lock()       # created inside the child's event loop
+    async with _pool_lock:
+        if _pool_instance is None:
+            _pool_instance = SandboxPool()
+            await _pool_instance._warmup()
+    return _pool_instance
+```
+
+First call to `provision()` in each Celery child worker triggers this.
+Subsequent calls in the same worker reuse the warm pool.
+
+---
+
+## Fallback chain (no regressions possible)
+
+```
+sandbox_enabled=False
+    → return None → reproducer/verifier: local subprocess (today's behavior)
+
+sandbox_enabled=True, pool checkout times out (all slots busy)
+    → SandboxResult(available=False) → local subprocess fallback
+
+sandbox_enabled=True, Docker daemon not found
+    → SandboxResult(available=False) → local subprocess fallback
+
+sandbox_enabled=True, container health check fails
+    → discard container, start fresh one, retry checkout once
+    → if retry fails: SandboxResult(available=False) → local subprocess fallback
+
+sandbox_enabled=True, container_id populated
+    → docker exec {cmd} → real isolated execution
+```
+
+Every error path degrades to local subprocess. Fix runs never fail due to sandbox
+infrastructure issues.
+
+---
+
+## What is NOT in scope (future)
+
+- **Network isolation** (`--network none`) — useful but breaks `pip install` fallback
+- **CPU/memory cgroups** (`--cpus`, `--memory`) — nice-to-have, not blocking
+- **Real Docker socket forwarding** for nested Docker — not needed for lint/type/test tools
+- **Multi-host pool** (pool across multiple FORGE workers) — Redis-backed queue,
+  post-MVP when horizontal scaling is needed
+
+---
+
+## File map
+
+| File | Change |
+|------|--------|
+| `phalanx/ci_fixer/sandbox_pool.py` | **NEW** — SandboxPool, PooledContainer, get_sandbox_pool |
+| `phalanx/ci_fixer/sandbox.py` | **MODIFIED** — SandboxResult gets container_id/mount_path; provision() uses pool |
+| `phalanx/ci_fixer/reproducer.py` | **MODIFIED** — _run_subprocess wraps with docker exec when container_id set |
+| `phalanx/ci_fixer/verifier.py` | **MODIFIED** — _run_cmd wraps with docker exec when container_id set |
+| `phalanx/config/settings.py` | **MODIFIED** — 5 new SANDBOX_POOL_* settings |
+| `docker/sandbox/python/Dockerfile` | **NEW** |
+| `docker/sandbox/node/Dockerfile` | **NEW** |
+| `docker/sandbox/go/Dockerfile` | **NEW** |
+| `docker/sandbox/rust/Dockerfile` | **NEW** |
+| `docker/sandbox/reset.sh` | **NEW** |
+| `tests/unit/test_sandbox_pool.py` | **NEW** — ≥80% coverage on sandbox_pool.py |
+| `tests/unit/test_ci_fixer_sandbox.py` | **MODIFIED** — cover pool checkout path |
+| `tests/unit/test_ci_fixer_reproducer.py` | **MODIFIED** — cover docker exec path |
+| `tests/unit/test_ci_fixer_verifier.py` | **MODIFIED** — cover docker exec path |
diff --git a/phalanx/agents/ci_fixer.py b/phalanx/agents/ci_fixer.py
index 7bdc1d32..aa6a5475 100644
--- a/phalanx/agents/ci_fixer.py
+++ b/phalanx/agents/ci_fixer.py
@@ -37,13 +37,22 @@
 from sqlalchemy import select, update
 
 from phalanx.agents.base import AgentResult, BaseAgent
-from phalanx.agents.soul import CI_FIXER_SOUL
 from phalanx.ci_fixer.analyst import FilePatch, FixPlan, RootCauseAnalyst
+from phalanx.ci_fixer.context import (
+    CIFixContext,
+    ClassifiedFailure,
+    ReproductionResult,
+    StructuredFailure,
+    VerifiedPatch,
+)
 from phalanx.ci_fixer.events import CIFailureEvent
 from phalanx.ci_fixer.log_fetcher import get_log_fetcher
 from phalanx.ci_fixer.log_parser import ParsedLog, parse_log
+from phalanx.ci_fixer.reproducer import ReproducerAgent
+from phalanx.ci_fixer.sandbox import SandboxProvisioner
 from phalanx.ci_fixer.suppressor import is_flaky_suppressed, should_use_history
 from phalanx.ci_fixer.validator import validate_fix
+from phalanx.ci_fixer.verifier import VerifierAgent
 from phalanx.ci_fixer.version_parity import (
     VersionParityResult,
     check_version_parity,
@@ -51,7 +60,7 @@
     should_auto_merge,
 )
 from phalanx.config.settings import get_settings
-from phalanx.db.models import CIFailureFingerprint, CIFlakyPattern, CIFixRun, CIIntegration
+from phalanx.db.models import CIFailureFingerprint, CIFixRun, CIFlakyPattern, CIIntegration
 from phalanx.db.session import get_db
 from phalanx.queue.celery_app import celery_app
 
@@ -114,6 +123,16 @@ async def _execute_inner(self) -> AgentResult:
         if integration is None:
             return AgentResult(success=False, output={}, error="CIIntegration not found")
 
+        # ── 1b. Initialize shared pipeline context ───────────────────────────
+        ctx = CIFixContext(
+            ci_fix_run_id=self.ci_fix_run_id,
+            repo=ci_run.repo_full_name,
+            branch=ci_run.branch,
+            commit_sha=ci_run.commit_sha,
+            original_build_id=ci_run.ci_build_id,
+        )
+        await self._persist_context(ctx)
+
         # ── 2. Fetch raw logs ─────────────────────────────────────────────────
         event = CIFailureEvent(
             provider=ci_run.ci_provider,
@@ -152,6 +171,22 @@ async def _execute_inner(self) -> AgentResult:
         # the hash is valuable for V2 history queries.
         await self._persist_fingerprint(fingerprint)
 
+        # Update shared context with structured failure
+        ctx.structured_failure = StructuredFailure(
+            tool=parsed.tool,
+            failure_type=parsed.failure_type if hasattr(parsed, "failure_type") else "unknown",
+            reproducer_cmd="",  # populated by classifier
+            errors=[],
+            failing_files=list(parsed.failing_files) if hasattr(parsed, "failing_files") else [],
+            log_excerpt=raw_log[:2000],
+        )
+        ctx.classified_failure = ClassifiedFailure(
+            tier="L1_auto",
+            root_cause="",
+            stack="python",
+        )
+        await self._persist_context(ctx)
+
         await self._trace(
             "decision",
             f"**Parsed log** — tool: `{parsed.tool}`\n\n{parsed.as_text()}",
@@ -201,274 +236,401 @@ async def _execute_inner(self) -> AgentResult:
             await self._mark_failed(ci_run, "repo_clone_failed")
             return AgentResult(success=False, output={}, error="repo clone failed")
 
-        # ── 5. Analyst loop: confirm root cause → apply → validate ────────────
-        analyst = RootCauseAnalyst(
-            call_llm=self._call_claude,
-            history_lookup=self._lookup_fix_history,
-        )
-        fix_plan: FixPlan | None = None
-        validation_passed = False
-        validation_tool_version = ""
-        current_parsed = parsed
+        # ── Phase 2: Sandbox provisioning + failure reproduction ──────────────
+        provisioner = SandboxProvisioner()
+        sandbox_result = await provisioner.provision(workspace)
 
-        for iteration in range(1, _MAX_ITERATIONS + 1):
-            self._log.info("ci_fixer.analyst_iteration", iteration=iteration)
+        if sandbox_result:
+            ctx.sandbox_id = sandbox_result.sandbox_id
+            ctx.sandbox_stack = sandbox_result.stack
+            await self._persist_context(ctx)
 
-            fix_plan = analyst.analyze(current_parsed, workspace, fingerprint_hash=fingerprint)
-            self._log.info(
-                "ci_fixer.fix_plan",
-                confidence=fix_plan.confidence,
-                root_cause=fix_plan.root_cause,
-                patches=len(fix_plan.patches),
-                needs_test=fix_plan.needs_new_test,
+        try:
+            reproducer = ReproducerAgent()
+            reproduction_result = await reproducer.reproduce(
+                reproducer_cmd=ctx.structured_failure.reproducer_cmd,
+                workspace_path=workspace,
+                sandbox_result=sandbox_result,
+                structured_failure=ctx.structured_failure,
+                timeout_seconds=settings.sandbox_timeout_seconds,
             )
+            ctx.reproduction_result = reproduction_result
+            await self._persist_context(ctx)
 
-            await self._trace(
-                "reflection",
-                f"**Root cause:** {fix_plan.root_cause}\n"
-                f"**Confidence:** {fix_plan.confidence}\n"
-                f"**Patches:** {len(fix_plan.patches)} file(s)",
-                {"confidence": fix_plan.confidence, "iteration": iteration},
+            if reproduction_result.verdict == "flaky":
+                self._log.info(
+                    "ci_fixer.flaky_reproduction",
+                    repo=ci_run.repo_full_name,
+                    tool=parsed.tool,
+                )
+                ctx.complete("flaky")
+                await self._persist_context(ctx)
+                await self._mark_failed(ci_run, "flaky")
+                return AgentResult(
+                    success=False,
+                    output={"reason": "flaky", "tool": parsed.tool},
+                )
+
+            if reproduction_result.verdict == "env_mismatch":
+                self._log.warning(
+                    "ci_fixer.env_mismatch",
+                    repo=ci_run.repo_full_name,
+                    tool=parsed.tool,
+                )
+                ctx.complete("escalated", error="env_mismatch: reproducer ran different failure")
+                await self._persist_context(ctx)
+                await self._mark_failed(ci_run, "env_mismatch")
+                return AgentResult(
+                    success=False,
+                    output={"reason": "env_mismatch", "tool": parsed.tool},
+                )
+
+            # ── 5. Analyst loop: confirm root cause → apply → validate ────────────
+            analyst = RootCauseAnalyst(
+                call_llm=self._call_claude,
+                history_lookup=self._lookup_fix_history,
             )
+            fix_plan: FixPlan | None = None
+            validation_passed = False
+            validation_tool_version = ""
+            current_parsed = parsed
+
+            for iteration in range(1, _MAX_ITERATIONS + 1):
+                self._log.info("ci_fixer.analyst_iteration", iteration=iteration)
 
-            if not fix_plan.is_actionable:
+                fix_plan = analyst.analyze(current_parsed, workspace, fingerprint_hash=fingerprint)
                 self._log.info(
-                    "ci_fixer.low_confidence",
+                    "ci_fixer.fix_plan",
+                    confidence=fix_plan.confidence,
                     root_cause=fix_plan.root_cause,
-                    iteration=iteration,
+                    patches=len(fix_plan.patches),
+                    needs_test=fix_plan.needs_new_test,
                 )
-                break
 
-            # Guard: total line delta across all patches
-            total_delta = sum(abs(p.delta) for p in fix_plan.patches)
-            if total_delta > _MAX_TOTAL_LINE_DELTA:
-                self._log.warning(
-                    "ci_fixer.patch_delta_exceeded",
-                    total_delta=total_delta,
-                    max_allowed=_MAX_TOTAL_LINE_DELTA,
+                await self._trace(
+                    "reflection",
+                    f"**Root cause:** {fix_plan.root_cause}\n"
+                    f"**Confidence:** {fix_plan.confidence}\n"
+                    f"**Patches:** {len(fix_plan.patches)} file(s)",
+                    {"confidence": fix_plan.confidence, "iteration": iteration},
                 )
-                fix_plan = FixPlan(
-                    confidence="low",
-                    root_cause=f"Patch too large ({total_delta} lines changed, max {_MAX_TOTAL_LINE_DELTA})",
+
+                if not fix_plan.is_actionable:
+                    self._log.info(
+                        "ci_fixer.low_confidence",
+                        root_cause=fix_plan.root_cause,
+                        iteration=iteration,
+                    )
+                    break
+
+                # Guard: total line delta across all patches
+                total_delta = sum(abs(p.delta) for p in fix_plan.patches)
+                if total_delta > _MAX_TOTAL_LINE_DELTA:
+                    self._log.warning(
+                        "ci_fixer.patch_delta_exceeded",
+                        total_delta=total_delta,
+                        max_allowed=_MAX_TOTAL_LINE_DELTA,
+                    )
+                    fix_plan = FixPlan(
+                        confidence="low",
+                        root_cause=f"Patch too large ({total_delta} lines changed, max {_MAX_TOTAL_LINE_DELTA})",
+                    )
+                    break
+
+                # Guard: number of files
+                if len(fix_plan.patches) > _MAX_FILES_CHANGED:
+                    self._log.warning(
+                        "ci_fixer.too_many_files",
+                        files=len(fix_plan.patches),
+                        max_allowed=_MAX_FILES_CHANGED,
+                    )
+                    fix_plan = FixPlan(
+                        confidence="low",
+                        root_cause=f"Fix touches {len(fix_plan.patches)} files (max {_MAX_FILES_CHANGED})",
+                    )
+                    break
+
+                # Apply patches
+                files_written = self._apply_patches(workspace, fix_plan.patches)
+                if not files_written:
+                    self._log.warning("ci_fixer.no_files_written")
+                    fix_plan = FixPlan(
+                        confidence="low",
+                        root_cause="Patch application failed — hunk mismatch or guard rejection",
+                    )
+                    break
+
+                # Validate
+                validation = validate_fix(current_parsed, workspace, original_parsed=parsed)
+                validation_tool_version = validation.tool_version
+                self._log.info(
+                    "ci_fixer.validation",
+                    passed=validation.passed,
+                    tool=validation.tool,
+                    tool_version=validation_tool_version,
+                    regressions=len(getattr(validation, "regressions", []) or []),
+                    iteration=iteration,
                 )
-                break
 
-            # Guard: number of files
-            if len(fix_plan.patches) > _MAX_FILES_CHANGED:
-                self._log.warning(
-                    "ci_fixer.too_many_files",
-                    files=len(fix_plan.patches),
-                    max_allowed=_MAX_FILES_CHANGED,
+                if validation.passed:
+                    validation_passed = True
+                    self._log.info("ci_fixer.validation_passed", files=files_written)
+                    break
+                else:
+                    self._log.warning(
+                        "ci_fixer.validation_failed",
+                        iteration=iteration,
+                        output=validation.output[:300],
+                    )
+                    await self._trace(
+                        "uncertainty",
+                        f"Validation failed (iteration {iteration}):\n```\n{validation.output[:500]}\n```",
+                        {"iteration": iteration},
+                    )
+                    if iteration < _MAX_ITERATIONS:
+                        # Re-parse the validation output for the next iteration
+                        retry_parsed = parse_log(validation.output)
+                        if retry_parsed.has_errors:
+                            current_parsed = retry_parsed
+
+            # ── 6. Check final plan ───────────────────────────────────────────────
+            if not fix_plan or not fix_plan.is_actionable or not validation_passed:
+                reason = (
+                    "low_confidence"
+                    if (not fix_plan or not fix_plan.is_actionable)
+                    else "validation_failed"
                 )
-                fix_plan = FixPlan(
-                    confidence="low",
-                    root_cause=f"Fix touches {len(fix_plan.patches)} files (max {_MAX_FILES_CHANGED})",
+                await self._mark_failed_with_fields(
+                    ci_run,
+                    reason=reason,
+                    fingerprint_hash=fingerprint,
+                    validation_tool_version=validation_tool_version,
                 )
-                break
-
-            # Apply patches
-            files_written = self._apply_patches(workspace, fix_plan.patches)
-            if not files_written:
-                self._log.warning("ci_fixer.no_files_written")
-                fix_plan = FixPlan(
-                    confidence="low",
-                    root_cause="Patch application failed — hunk mismatch or guard rejection",
+                # Comment on the PR explaining why we couldn't fix it
+                if ci_run.pr_number and integration.github_token:
+                    await self._comment_unable_to_fix(
+                        integration=integration,
+                        ci_run=ci_run,
+                        reason=reason,
+                        root_cause=fix_plan.root_cause if fix_plan else "",
+                        tool=parsed.tool,
+                    )
+                return AgentResult(
+                    success=False,
+                    output={
+                        "reason": reason,
+                        "root_cause": fix_plan.root_cause if fix_plan else "",
+                        "tool": parsed.tool,
+                        "fingerprint": fingerprint,
+                    },
                 )
-                break
 
-            # Validate
-            validation = validate_fix(current_parsed, workspace, original_parsed=parsed)
-            validation_tool_version = validation.tool_version
-            self._log.info(
-                "ci_fixer.validation",
-                passed=validation.passed,
-                tool=validation.tool,
-                tool_version=validation_tool_version,
-                regressions=len(getattr(validation, "regressions", []) or []),
-                iteration=iteration,
+            files_written = [p.path for p in fix_plan.patches]
+
+            # ── 6b. Phase 4: Tool version parity check ────────────────────────────
+            # Compare local tool version to the version that caused the failure.
+            # We use last_good_tool_version from the fingerprint as the "failure version"
+            # proxy (it was the version at the last successful fix — close enough for parity).
+            parity_result = await self._check_tool_version_parity(
+                fingerprint_hash=fingerprint,
+                local_version=validation_tool_version,
             )
+            parity_ok = parity_result.ok
 
-            if validation.passed:
-                validation_passed = True
-                self._log.info("ci_fixer.validation_passed", files=files_written)
-                break
-            else:
-                self._log.warning(
-                    "ci_fixer.validation_failed",
-                    iteration=iteration,
-                    output=validation.output[:300],
-                )
-                await self._trace(
-                    "uncertainty",
-                    f"Validation failed (iteration {iteration}):\n```\n{validation.output[:500]}\n```",
-                    {"iteration": iteration},
+            # ── 7. Commit to safe branch (NEVER the author's branch) ─────────────
+            fix_branch = f"phalanx/ci-fix/{self.ci_fix_run_id}"
+            commit_result = await self._commit_to_safe_branch(
+                workspace=workspace,
+                source_branch=ci_run.branch,
+                fix_branch=fix_branch,
+                commit_message=(
+                    f"fix(ci): resolve {parsed.tool} failure [{ci_run.ci_provider}]\n\n"
+                    f"Root cause: {fix_plan.root_cause}\n"
+                    f"Files: {', '.join(files_written)}\n"
+                    f"Validated: {validation_tool_version}\n"
+                    f"CI Fix Run: {self.ci_fix_run_id}"
+                ),
+                github_token=self._get_github_token(integration),
+                repo_full_name=ci_run.repo_full_name,
+            )
+            commit_sha = commit_result.get("sha")
+            push_failed = commit_result.get("push_failed", False)
+
+            if not commit_sha:
+                await self._mark_failed_with_fields(
+                    ci_run,
+                    reason=commit_result.get("error", "commit_failed"),
+                    fingerprint_hash=fingerprint,
+                    validation_tool_version=validation_tool_version,
                 )
-                if iteration < _MAX_ITERATIONS:
-                    # Re-parse the validation output for the next iteration
-                    retry_parsed = parse_log(validation.output)
-                    if retry_parsed.has_errors:
-                        current_parsed = retry_parsed
-
-        # ── 6. Check final plan ───────────────────────────────────────────────
-        if not fix_plan or not fix_plan.is_actionable or not validation_passed:
-            reason = "low_confidence" if (not fix_plan or not fix_plan.is_actionable) else "validation_failed"
-            await self._mark_failed_with_fields(
-                ci_run,
-                reason=reason,
-                fingerprint_hash=fingerprint,
-                validation_tool_version=validation_tool_version,
+                return AgentResult(success=False, output={}, error="commit failed")
+
+            # ── 8. Open PR (draft or auto-merge depending on integration config) ────
+            # Phase 4: auto-merge only if integration.auto_merge=True AND the
+            # fingerprint has enough successful fixes AND tool version parity is OK.
+            fingerprint_success = await self._get_fingerprint_success_count(fingerprint)
+            enable_auto_merge = should_auto_merge(
+                integration_auto_merge=getattr(integration, "auto_merge", False),
+                fingerprint_success_count=fingerprint_success,
+                min_success_count=getattr(integration, "min_success_count", 3),
+                parity_ok=parity_ok,
             )
-            # Comment on the PR explaining why we couldn't fix it
+
+            fix_pr_number: int | None = None
+            if not push_failed and integration.github_token:
+                # Phase 1: check for an existing Phalanx fix PR targeting this branch.
+                # If one exists, push the new commit to it instead of opening a second PR.
+                existing_pr = await self._find_existing_fix_pr(integration, ci_run)
+                if existing_pr:
+                    self._log.info(
+                        "ci_fixer.reusing_existing_fix_pr",
+                        pr=existing_pr,
+                        branch=ci_run.branch,
+                    )
+                    fix_pr_number = existing_pr
+                else:
+                    fix_pr_number = await self._open_draft_pr(
+                        integration=integration,
+                        ci_run=ci_run,
+                        fix_branch=fix_branch,
+                        files_written=files_written,
+                        commit_sha=commit_sha,
+                        tool=parsed.tool,
+                        root_cause=fix_plan.root_cause,
+                        parsed=parsed,
+                        validation_tool_version=validation_tool_version,
+                        enable_auto_merge=enable_auto_merge,
+                        parity_notice=format_parity_notice(parity_result),
+                    )
+
+            # ── 9. Comment on original PR ─────────────────────────────────────────
             if ci_run.pr_number and integration.github_token:
-                await self._comment_unable_to_fix(
+                await self._comment_on_pr(
                     integration=integration,
                     ci_run=ci_run,
-                    reason=reason,
-                    root_cause=fix_plan.root_cause if fix_plan else "",
+                    files_written=files_written,
+                    commit_sha=commit_sha,
                     tool=parsed.tool,
+                    root_cause=fix_plan.root_cause,
+                    parsed=parsed,
+                    fix_pr_number=fix_pr_number,
+                    validation_tool_version=validation_tool_version,
                 )
-            return AgentResult(
-                success=False,
-                output={
-                    "reason": reason,
-                    "root_cause": fix_plan.root_cause if fix_plan else "",
-                    "tool": parsed.tool,
-                    "fingerprint": fingerprint,
-                },
-            )
-
-        files_written = [p.path for p in fix_plan.patches]
 
-        # ── 6b. Phase 4: Tool version parity check ────────────────────────────
-        # Compare local tool version to the version that caused the failure.
-        # We use last_good_tool_version from the fingerprint as the "failure version"
-        # proxy (it was the version at the last successful fix — close enough for parity).
-        parity_result = await self._check_tool_version_parity(
-            fingerprint_hash=fingerprint,
-            local_version=validation_tool_version,
-        )
-        parity_ok = parity_result.ok
-
-        # ── 7. Commit to safe branch (NEVER the author's branch) ─────────────
-        fix_branch = f"phalanx/ci-fix/{self.ci_fix_run_id}"
-        commit_result = await self._commit_to_safe_branch(
-            workspace=workspace,
-            source_branch=ci_run.branch,
-            fix_branch=fix_branch,
-            commit_message=(
-                f"fix(ci): resolve {parsed.tool} failure [{ci_run.ci_provider}]\n\n"
-                f"Root cause: {fix_plan.root_cause}\n"
-                f"Files: {', '.join(files_written)}\n"
-                f"Validated: {validation_tool_version}\n"
-                f"CI Fix Run: {self.ci_fix_run_id}"
-            ),
-            github_token=self._get_github_token(integration),
-            repo_full_name=ci_run.repo_full_name,
-        )
-        commit_sha = commit_result.get("sha")
-        push_failed = commit_result.get("push_failed", False)
+            # ── 10. Mark FIXED ────────────────────────────────────────────────────
+            async with get_db() as session:
+                await session.execute(
+                    update(CIFixRun)
+                    .where(CIFixRun.id == self.ci_fix_run_id)
+                    .values(
+                        status="FIXED",
+                        fix_commit_sha=commit_sha,
+                        fix_branch=fix_branch,
+                        fix_pr_number=fix_pr_number,
+                        fingerprint_hash=fingerprint,
+                        validation_tool_version=validation_tool_version,
+                        tool_version_parity_ok=parity_ok,
+                        completed_at=datetime.now(UTC),
+                    )
+                )
+                await session.commit()
 
-        if not commit_sha:
-            await self._mark_failed_with_fields(
-                ci_run,
-                reason=commit_result.get("error", "commit_failed"),
+            # ── Phase 2: Store winning patches in fingerprint table for future reuse
+            await self._update_fingerprint_on_success(
                 fingerprint_hash=fingerprint,
-                validation_tool_version=validation_tool_version,
+                patches=fix_plan.patches,
+                tool_version=validation_tool_version,
+                parsed_log=parsed,
             )
-            return AgentResult(success=False, output={}, error="commit failed")
-
-        # ── 8. Open PR (draft or auto-merge depending on integration config) ────
-        # Phase 4: auto-merge only if integration.auto_merge=True AND the
-        # fingerprint has enough successful fixes AND tool version parity is OK.
-        fingerprint_success = await self._get_fingerprint_success_count(fingerprint)
-        enable_auto_merge = should_auto_merge(
-            integration_auto_merge=getattr(integration, "auto_merge", False),
-            fingerprint_success_count=fingerprint_success,
-            min_success_count=getattr(integration, "min_success_count", 3),
-            parity_ok=parity_ok,
-        )
 
-        fix_pr_number: int | None = None
-        if not push_failed and integration.github_token:
-            fix_pr_number = await self._open_draft_pr(
-                integration=integration,
-                ci_run=ci_run,
-                fix_branch=fix_branch,
-                files_written=files_written,
-                commit_sha=commit_sha,
-                tool=parsed.tool,
-                root_cause=fix_plan.root_cause,
-                parsed=parsed,
-                validation_tool_version=validation_tool_version,
-                enable_auto_merge=enable_auto_merge,
-                parity_notice=format_parity_notice(parity_result),
+            # ── Update final context state ────────────────────────────────────────
+            ctx.verified_patch = VerifiedPatch(
+                files_modified=files_written,
+                validation_cmd=validation_tool_version or "",
+                success=True,
             )
+            # Phase 2 already sets ctx.reproduction_result earlier in the pipeline.
+            # Only set it here as a fallback if sandbox was disabled (still None).
+            if ctx.reproduction_result is None:
+                ctx.reproduction_result = ReproductionResult(verdict="skipped")
+
+            # ── Phase 3: Broad verification (catch regressions post-fix) ─────────
+            verifier = VerifierAgent()
+            verification_result = await verifier.verify(
+                workspace_path=workspace,
+                stack=ctx.sandbox_stack or "python",
+                sandbox_result=sandbox_result,
+                timeout_seconds=settings.sandbox_timeout_seconds,
+            )
+            ctx.verification_result = verification_result
+            await self._persist_context(ctx)
 
-        # ── 9. Comment on original PR ─────────────────────────────────────────
-        if ci_run.pr_number and integration.github_token:
-            await self._comment_on_pr(
-                integration=integration,
-                ci_run=ci_run,
-                files_written=files_written,
-                commit_sha=commit_sha,
+            if verification_result.verdict == "failed":
+                self._log.warning(
+                    "ci_fixer.verification_failed",
+                    repo=ci_run.repo_full_name,
+                    tool=parsed.tool,
+                    output=verification_result.output[:300],
+                )
+                ctx.complete("escalated", error="verification failed: post-fix regression detected")
+                await self._persist_context(ctx)
+                await self._mark_failed(ci_run, "verification_failed")
+                return AgentResult(
+                    success=False,
+                    output={"reason": "verification_failed", "tool": parsed.tool},
+                )
+
+            ctx.fix_commit_sha = commit_sha
+            ctx.fix_pr_number = fix_pr_number
+            ctx.fix_branch = fix_branch
+            ctx.complete("fixed")
+            await self._persist_context(ctx)
+
+            self._log.info(
+                "ci_fixer.execute.done",
                 tool=parsed.tool,
-                root_cause=fix_plan.root_cause,
-                parsed=parsed,
+                files=files_written,
+                commit_sha=commit_sha,
+                fix_branch=fix_branch,
                 fix_pr_number=fix_pr_number,
-                validation_tool_version=validation_tool_version,
+                root_cause=fix_plan.root_cause,
+                fingerprint=fingerprint,
             )
 
-        # ── 10. Mark FIXED ────────────────────────────────────────────────────
-        async with get_db() as session:
-            await session.execute(
-                update(CIFixRun)
-                .where(CIFixRun.id == self.ci_fix_run_id)
-                .values(
-                    status="FIXED",
-                    fix_commit_sha=commit_sha,
-                    fix_branch=fix_branch,
-                    fix_pr_number=fix_pr_number,
-                    fingerprint_hash=fingerprint,
-                    validation_tool_version=validation_tool_version,
-                    tool_version_parity_ok=parity_ok,
-                    completed_at=datetime.now(UTC),
-                )
+            return AgentResult(
+                success=True,
+                output={
+                    "tool": parsed.tool,
+                    "root_cause": fix_plan.root_cause,
+                    "files_fixed": files_written,
+                    "commit_sha": commit_sha,
+                    "fix_branch": fix_branch,
+                    "fix_pr_number": fix_pr_number,
+                    "confidence": fix_plan.confidence,
+                    "fingerprint": fingerprint,
+                    "validation_tool_version": validation_tool_version,
+                },
             )
-            await session.commit()
+        finally:
+            if sandbox_result:
+                await provisioner.release(sandbox_result)
 
-        # ── Phase 2: Store winning patches in fingerprint table for future reuse
-        await self._update_fingerprint_on_success(
-            fingerprint_hash=fingerprint,
-            patches=fix_plan.patches,
-            tool_version=validation_tool_version,
-            parsed_log=parsed,
-        )
+    # ── Pipeline context persistence ───────────────────────────────────────────
 
-        self._log.info(
-            "ci_fixer.execute.done",
-            tool=parsed.tool,
-            files=files_written,
-            commit_sha=commit_sha,
-            fix_branch=fix_branch,
-            fix_pr_number=fix_pr_number,
-            root_cause=fix_plan.root_cause,
-            fingerprint=fingerprint,
-        )
+    async def _persist_context(self, ctx: CIFixContext) -> None:
+        """Persist the current CIFixContext state to CIFixRun.pipeline_context_json."""
+        import json  # noqa: PLC0415
 
-        return AgentResult(
-            success=True,
-            output={
-                "tool": parsed.tool,
-                "root_cause": fix_plan.root_cause,
-                "files_fixed": files_written,
-                "commit_sha": commit_sha,
-                "fix_branch": fix_branch,
-                "fix_pr_number": fix_pr_number,
-                "confidence": fix_plan.confidence,
-                "fingerprint": fingerprint,
-                "validation_tool_version": validation_tool_version,
-            },
-        )
+        try:
+            async with get_db() as session:
+                await session.execute(
+                    update(CIFixRun)
+                    .where(CIFixRun.id == self.ci_fix_run_id)
+                    .values(pipeline_context_json=json.dumps(ctx.to_dict()))
+                )
+                await session.commit()
+        except Exception as exc:
+            self._log.warning("ci_fixer.context_persist_error", error=str(exc))
 
     # ── Log fetching ───────────────────────────────────────────────────────────
 
@@ -507,16 +669,14 @@ def _apply_patches(self, workspace: Path, patches: list[FilePatch]) -> list[str]
                 continue
 
             try:
-                original_lines = full_path.read_text(encoding="utf-8").splitlines(
-                    keepends=True
-                )
+                original_lines = full_path.read_text(encoding="utf-8").splitlines(keepends=True)
             except Exception as exc:
                 self._log.warning("ci_fixer.patch_read_failed", path=patch.path, error=str(exc))
                 continue
 
             # Convert to 0-indexed slice
             s = patch.start_line - 1
-            e = patch.end_line       # exclusive in Python slice
+            e = patch.end_line  # exclusive in Python slice
 
             # Bounds check
             if s < 0 or e > len(original_lines) or s >= e:
@@ -646,9 +806,8 @@ async def _commit_to_safe_branch(
             push_failed = False
             if github_token and repo.remotes:
                 try:
-                    auth_url = (
-                        f"https://github.com/{repo_full_name}.git"
-                        .replace("https://", f"https://{github_token}@")
+                    auth_url = f"https://github.com/{repo_full_name}.git".replace(
+                        "https://", f"https://{github_token}@"
                     )
                     repo.git.push(auth_url, f"HEAD:{fix_branch}", "--set-upstream")
                     self._log.info("ci_fixer.git.pushed", branch=fix_branch, sha=sha)
@@ -712,11 +871,10 @@ async def _open_draft_pr(
         )
 
         footer = (
-            f"*Auto-merge is enabled — will merge when all checks pass.*\n"
+            "*Auto-merge is enabled — will merge when all checks pass.*\n"
             if enable_auto_merge
-            else
-            f"*This is a draft PR — Phalanx never auto-merges. "
-            f"Review the diff above, then mark ready and merge if correct.*\n"
+            else "*This is a draft PR — Phalanx never auto-merges. "
+            "Review the diff above, then mark ready and merge if correct.*\n"
         )
 
         body = (
@@ -783,6 +941,54 @@ async def _open_draft_pr(
 
         return None
 
+    async def _find_existing_fix_pr(
+        self,
+        integration: CIIntegration,
+        ci_run: CIFixRun,
+    ) -> int | None:
+        """
+        Look for an open Phalanx fix PR already targeting ci_run.branch.
+
+        Returns the PR number if found, None otherwise.
+
+        This prevents duplicate fix PRs when the pipeline is triggered
+        multiple times for the same failing branch (e.g. repeated CI runs).
+        A new commit is pushed to the existing PR instead of opening a second one.
+        """
+        import httpx  # noqa: PLC0415
+
+        try:
+            async with httpx.AsyncClient(timeout=15) as client:
+                r = await client.get(
+                    f"https://api.github.com/repos/{ci_run.repo_full_name}/pulls",
+                    headers={
+                        "Authorization": f"Bearer {integration.github_token}",
+                        "Accept": "application/vnd.github+json",
+                    },
+                    params={
+                        "state": "open",
+                        "base": ci_run.branch,
+                        "head": f"{ci_run.repo_full_name.split('/')[0]}:phalanx/ci-fix/",
+                    },
+                )
+            if r.status_code != 200:
+                return None
+            prs = r.json()
+            # Filter to PRs whose head branch starts with phalanx/ci-fix/
+            for pr in prs:
+                head_ref = pr.get("head", {}).get("ref", "")
+                if head_ref.startswith("phalanx/ci-fix/"):
+                    self._log.info(
+                        "ci_fixer.existing_fix_pr_found",
+                        pr=pr["number"],
+                        head=head_ref,
+                        base=ci_run.branch,
+                    )
+                    return pr["number"]
+        except Exception as exc:
+            self._log.warning("ci_fixer.find_existing_pr_error", error=str(exc))
+        return None
+
     async def _enable_github_auto_merge(
         self,
         integration: CIIntegration,
@@ -961,9 +1167,7 @@ async def _comment_unable_to_fix(
     # ── DB helpers ─────────────────────────────────────────────────────────────
 
     async def _load_ci_fix_run(self, session) -> CIFixRun | None:
-        result = await session.execute(
-            select(CIFixRun).where(CIFixRun.id == self.ci_fix_run_id)
-        )
+        result = await session.execute(select(CIFixRun).where(CIFixRun.id == self.ci_fix_run_id))
         return result.scalar_one_or_none()
 
     async def _load_integration(self, session, integration_id: str) -> CIIntegration | None:
@@ -1002,7 +1206,7 @@ async def _check_tool_version_parity(
         self,
         fingerprint_hash: str | None,
         local_version: str,
-    ) -> "VersionParityResult":
+    ) -> VersionParityResult:
         """
         Phase 4: Compare local tool version to the version at the last successful fix.
 
@@ -1018,8 +1222,6 @@ async def _check_tool_version_parity(
             )
 
         try:
-            from sqlalchemy import and_  # noqa: PLC0415
-
             async with get_db() as session:
                 result = await session.execute(
                     select(CIFailureFingerprint).where(
@@ -1068,8 +1270,8 @@ async def _get_fingerprint_success_count(self, fingerprint_hash: str | None) ->
     async def _load_flaky_patterns(
         self,
         repo_full_name: str,
-        parsed_log: "ParsedLog",
-    ) -> list["CIFlakyPattern"]:
+        parsed_log: ParsedLog,
+    ) -> list[CIFlakyPattern]:
         """
         Phase 3: Load CIFlakyPattern rows matching the errors in parsed_log.
 
@@ -1083,9 +1285,7 @@ async def _load_flaky_patterns(
             from sqlalchemy import and_, or_  # noqa: PLC0415
 
             # Collect (file, code) pairs from the parsed errors
-            error_keys = [
-                (e.file, e.code) for e in parsed_log.lint_errors
-            ] + [
+            error_keys = [(e.file, e.code) for e in parsed_log.lint_errors] + [
                 (e.file, getattr(e, "code", None)) for e in parsed_log.type_errors
             ]
 
@@ -1189,9 +1389,9 @@ async def _async_lookup_fix_history(self, fingerprint_hash: str) -> list[dict] |
     async def _update_fingerprint_on_success(
         self,
         fingerprint_hash: str,
-        patches: list["FilePatch"],
+        patches: list[FilePatch],
         tool_version: str,
-        parsed_log: "ParsedLog",
+        parsed_log: ParsedLog,
     ) -> None:
         """
         After a successful fix is validated, upsert CIFailureFingerprint with
diff --git a/phalanx/api/main.py b/phalanx/api/main.py
index a67dcd60..6131eb0d 100644
--- a/phalanx/api/main.py
+++ b/phalanx/api/main.py
@@ -10,6 +10,7 @@
 from fastapi.responses import JSONResponse
 
 from phalanx import __version__
+from phalanx.api.routes.ci_fix_runs import router as ci_fix_runs_router
 from phalanx.api.routes.ci_integrations import router as ci_integrations_router
 from phalanx.api.routes.ci_webhooks import router as ci_webhooks_router
 from phalanx.api.routes.demos import router as demos_router
@@ -89,6 +90,7 @@ async def api_key_middleware(request: Request, call_next):
 app.include_router(ci_webhooks_router, prefix="/webhook")
 app.include_router(demos_router, prefix="/v1")
 app.include_router(ci_integrations_router, prefix="/v1")
+app.include_router(ci_fix_runs_router, prefix="/v1")
 app.include_router(health_router)
 
 
diff --git a/phalanx/api/routes/ci_fix_runs.py b/phalanx/api/routes/ci_fix_runs.py
new file mode 100644
index 00000000..fd23598a
--- /dev/null
+++ b/phalanx/api/routes/ci_fix_runs.py
@@ -0,0 +1,148 @@
+"""
+CI Fix Runs API — inspect the state of multi-agent CI fix pipeline runs.
+
+Endpoints:
+  GET /v1/ci-fix-runs/{run_id}/context  — full CIFixContext pipeline state
+  GET /v1/ci-fix-runs/{run_id}          — CIFixRun record summary
+  GET /v1/ci-fix-runs                   — list runs (filtered by repo/branch/status)
+"""
+
+from __future__ import annotations
+
+import json
+
+import structlog
+from fastapi import APIRouter, HTTPException, Query, status
+from sqlalchemy import select
+
+from phalanx.ci_fixer.context import CIFixContext
+from phalanx.db.models import CIFixRun
+from phalanx.db.session import get_db
+
+log = structlog.get_logger(__name__)
+
+router = APIRouter(prefix="/ci-fix-runs", tags=["ci-fix-runs"])
+
+
+@router.get("/{run_id}/context")
+async def get_fix_run_context(run_id: str) -> dict:
+    """
+    Return the full CIFixContext pipeline state for a CI fix run.
+
+    This is the shared state object written by each agent as the pipeline
+    progresses. Use this to inspect exactly what each agent produced,
+    which stage the pipeline is at, and what the final outcome was.
+
+    Returns 404 if the run does not exist.
+    Returns the raw context dict if pipeline_context_json is not yet
+    populated (run is too old or not yet started).
+    """
+    async with get_db() as session:
+        result = await session.execute(select(CIFixRun).where(CIFixRun.id == run_id))
+        ci_run = result.scalar_one_or_none()
+
+    if ci_run is None:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"CIFixRun {run_id} not found",
+        )
+
+    if not ci_run.pipeline_context_json:
+        # Run exists but was created before Phase 1 — return basic info
+        return {
+            "ci_fix_run_id": str(ci_run.id),
+            "repo": ci_run.repo_full_name,
+            "branch": ci_run.branch,
+            "commit_sha": ci_run.commit_sha,
+            "original_build_id": ci_run.ci_build_id,
+            "status": ci_run.status,
+            "final_status": "unknown",
+            "current_stage": "unknown",
+            "_note": "This run predates the multi-agent pipeline context. No detailed state available.",
+        }
+
+    try:
+        ctx_dict = json.loads(ci_run.pipeline_context_json)
+        ctx = CIFixContext.from_dict(ctx_dict)
+        return {
+            **ctx.to_dict(),
+            "current_stage": ctx.current_stage,
+        }
+    except Exception as exc:
+        log.warning("ci_fix_runs.context_parse_error", run_id=run_id, error=str(exc))
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to parse pipeline context",
+        ) from exc
+
+
+@router.get("/{run_id}")
+async def get_fix_run(run_id: str) -> dict:
+    """Return a summary of a CI fix run record."""
+    async with get_db() as session:
+        result = await session.execute(select(CIFixRun).where(CIFixRun.id == run_id))
+        ci_run = result.scalar_one_or_none()
+
+    if ci_run is None:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"CIFixRun {run_id} not found",
+        )
+
+    return {
+        "id": str(ci_run.id),
+        "repo": ci_run.repo_full_name,
+        "branch": ci_run.branch,
+        "commit_sha": ci_run.commit_sha,
+        "ci_provider": ci_run.ci_provider,
+        "ci_build_id": ci_run.ci_build_id,
+        "status": ci_run.status,
+        "fix_branch": ci_run.fix_branch,
+        "fix_pr_number": ci_run.fix_pr_number,
+        "fix_commit_sha": ci_run.fix_commit_sha,
+        "fingerprint_hash": ci_run.fingerprint_hash,
+        "error": ci_run.error,
+        "created_at": ci_run.created_at.isoformat() if ci_run.created_at else None,
+        "completed_at": ci_run.completed_at.isoformat() if ci_run.completed_at else None,
+        "has_context": ci_run.pipeline_context_json is not None,
+    }
+
+
+@router.get("")
+async def list_fix_runs(
+    repo: str | None = Query(None, description="Filter by repo (owner/repo)"),
+    branch: str | None = Query(None, description="Filter by branch"),
+    run_status: str | None = Query(
+        None, alias="status", description="Filter by status: PENDING, FIXED, FAILED"
+    ),
+    limit: int = Query(20, ge=1, le=100),
+) -> dict:
+    """List CI fix runs with optional filters."""
+    async with get_db() as session:
+        q = select(CIFixRun).order_by(CIFixRun.created_at.desc()).limit(limit)
+        if repo:
+            q = q.where(CIFixRun.repo_full_name == repo)
+        if branch:
+            q = q.where(CIFixRun.branch == branch)
+        if run_status:
+            q = q.where(CIFixRun.status == run_status.upper())
+
+        result = await session.execute(q)
+        runs = result.scalars().all()
+
+    return {
+        "runs": [
+            {
+                "id": str(r.id),
+                "repo": r.repo_full_name,
+                "branch": r.branch,
+                "status": r.status,
+                "fix_pr_number": r.fix_pr_number,
+                "error": r.error,
+                "created_at": r.created_at.isoformat() if r.created_at else None,
+                "has_context": r.pipeline_context_json is not None,
+            }
+            for r in runs
+        ],
+        "count": len(runs),
+    }
diff --git a/phalanx/api/routes/ci_webhooks.py b/phalanx/api/routes/ci_webhooks.py
index 1589b38b..a3fe5cdf 100644
--- a/phalanx/api/routes/ci_webhooks.py
+++ b/phalanx/api/routes/ci_webhooks.py
@@ -193,7 +193,7 @@ def _verify_buildkite_signature(body: bytes, token: str, stored_token: str) -> b
 # ── GitHub App webhook ─────────────────────────────────────────────────────────
 
 
-@router.post("/webhook/github", status_code=status.HTTP_200_OK)
+@router.post("/github", status_code=status.HTTP_200_OK)
 async def github_webhook(
     request: Request,
     x_hub_signature_256: str = Header(default=""),
@@ -273,7 +273,7 @@ async def github_webhook(
 # ── Buildkite webhook ──────────────────────────────────────────────────────────
 
 
-@router.post("/webhook/buildkite", status_code=status.HTTP_200_OK)
+@router.post("/buildkite", status_code=status.HTTP_200_OK)
 async def buildkite_webhook(
     request: Request,
     x_buildkite_token: str = Header(default=""),
@@ -336,44 +336,141 @@ async def buildkite_webhook(
     }
 
 
-# ── CircleCI webhook (Phase 2 stub) ────────────────────────────────────────────
+# ── CircleCI webhook ───────────────────────────────────────────────────────────
 
 
-@router.post("/webhook/circleci", status_code=status.HTTP_200_OK)
-async def circleci_webhook(request: Request):
-    """CircleCI webhook — Phase 2."""
-    return {"status": "coming_soon", "provider": "circleci"}
+def _verify_circleci_signature(body: bytes, signature: str, secret: str) -> bool:
+    """
+    Verify CircleCI webhook signature.
+    CircleCI sends: circleci-signature: v1=<hex_hmac_sha256>
+    """
+    if not secret:
+        return True
+    expected = "v1=" + hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
+    return hmac.compare_digest(expected, signature or "")
 
 
-# ── Jenkins webhook (Phase 2 stub) ─────────────────────────────────────────────
+@router.post("/circleci", status_code=status.HTTP_200_OK)
+async def circleci_webhook(
+    request: Request,
+    circleci_signature: str = Header(default="", alias="circleci-signature"),
+):
+    """
+    Receives CircleCI webhook events.
+
+    Handles:
+    - workflow-completed with status=failed → dispatch CI fix
+
+    Setup in CircleCI: Project Settings → Webhooks
+    Add URL: https://api.usephalanx.com/webhook/circleci
+    Events: Workflow Completed
+    Signing secret: set CIRCLECI_WEBHOOK_SECRET in phalanx env
+
+    Payload shape (workflow-completed):
+      {
+        "type": "workflow-completed",
+        "workflow": {
+          "id": "<workflow_uuid>",
+          "name": "<workflow_name>",
+          "status": "failed",
+          "created_at": "...",
+          "stopped_at": "..."
+        },
+        "pipeline": {
+          "id": "<pipeline_uuid>",
+          "number": 42,
+          "trigger": {"type": "webhook", ...},
+          "vcs": {
+            "origin_repository_url": "https://github.com/owner/repo",
+            "branch": "fix/my-branch",
+            "revision": "<commit_sha>",
+            "commit": {"subject": "...", "author": {"login": "..."}}
+          }
+        },
+        "project": {"id": "...", "name": "repo", "slug": "github/owner/repo"},
+        "organization": {"name": "owner", ...}
+      }
+    """
+    body = await request.body()
 
+    if not _verify_circleci_signature(body, circleci_signature, settings.circleci_webhook_secret):
+        log.warning("ci_webhook.circleci.invalid_signature")
+        raise HTTPException(status_code=401, detail="Invalid CircleCI signature")
 
-@router.post("/webhook/jenkins", status_code=status.HTTP_200_OK)
-async def jenkins_webhook(request: Request):
-    """Jenkins webhook — Phase 2."""
-    return {"status": "coming_soon", "provider": "jenkins"}
+    payload = json.loads(body)
+    event_type = payload.get("type")
 
+    if event_type != "workflow-completed":
+        return {"status": "ignored", "type": event_type}
 
-# ── Short-path aliases (router is mounted at /webhook, so /github → /webhook/github) ───────────
+    workflow = payload.get("workflow", {})
+    if workflow.get("status") not in ("failed", "error", "failing", "canceled"):
+        return {"status": "ignored", "workflow_status": workflow.get("status")}
 
+    pipeline = payload.get("pipeline", {})
+    vcs = pipeline.get("vcs", {})
 
-@router.post("/github", status_code=status.HTTP_200_OK)
-async def github_webhook_alias(
-    request: Request,
-    x_hub_signature_256: str = Header(default=""),
-    x_github_event: str = Header(default=""),
-):
-    """Alias for /webhook/github — correct path when router is mounted at /webhook prefix."""
-    return await github_webhook(request, x_hub_signature_256, x_github_event)
+    # Extract repo name from the VCS URL (always GitHub for phalanx)
+    repo_url = vcs.get("origin_repository_url", "")
+    repo_full_name = _parse_repo_name(repo_url)
+    if not repo_full_name:
+        # Fallback: try project slug (format: "github/owner/repo")
+        slug = payload.get("project", {}).get("slug", "")
+        if slug.startswith("github/"):
+            repo_full_name = slug[len("github/") :]
+    if not repo_full_name:
+        return {"status": "skipped", "reason": "cannot_parse_repo"}
+
+    branch = vcs.get("branch", "")
+    commit_sha = vcs.get("revision", "")
+    pr_author: str | None = vcs.get("commit", {}).get("author", {}).get("login") or vcs.get(
+        "commit", {}
+    ).get("committer", {}).get("login")
+
+    # CircleCI build_id = workflow ID (used to fetch job list + logs)
+    workflow_id = workflow.get("id", "")
+    workflow_name = workflow.get("name", "")
+    build_url = (
+        f"https://app.circleci.com/pipelines/github/{repo_full_name}"
+        f"/{pipeline.get('number', '')}/workflows/{workflow_id}"
+    )
 
+    # PR number: CircleCI doesn't directly provide it in workflow webhooks.
+    # It may be in the branch name (e.g. "pull/42") or absent.
+    pr_number: int | None = None
+    if branch.startswith("pull/"):
+        import contextlib  # noqa: PLC0415
 
-@router.post("/buildkite", status_code=status.HTTP_200_OK)
-async def buildkite_webhook_alias(
-    request: Request,
-    x_buildkite_token: str = Header(default=""),
-):
-    """Alias for /webhook/buildkite."""
-    return await buildkite_webhook(request, x_buildkite_token)
+        with contextlib.suppress(IndexError, ValueError):
+            pr_number = int(branch.split("/")[1])
+
+    event = CIFailureEvent(
+        provider="circleci",
+        repo_full_name=repo_full_name,
+        branch=branch,
+        commit_sha=commit_sha,
+        build_id=workflow_id,
+        build_url=build_url,
+        failed_jobs=[workflow_name] if workflow_name else [],
+        pr_number=pr_number,
+        pr_author=pr_author,
+        raw_payload=payload,
+    )
+
+    ci_run = await _dispatch_ci_fix(event)
+    return {
+        "status": "dispatched" if ci_run else "skipped",
+        "ci_fix_run_id": ci_run.id if ci_run else None,
+    }
+
+
+# ── Jenkins webhook (Phase 2 stub) ─────────────────────────────────────────────
+
+
+@router.post("/jenkins", status_code=status.HTTP_200_OK)
+async def jenkins_webhook(request: Request):
+    """Jenkins webhook — Phase 2."""
+    return {"status": "coming_soon", "provider": "jenkins"}
 
 
 # ── Helpers ────────────────────────────────────────────────────────────────────
diff --git a/phalanx/ci_fixer/analyst.py b/phalanx/ci_fixer/analyst.py
index 7b496b09..68f72c51 100644
--- a/phalanx/ci_fixer/analyst.py
+++ b/phalanx/ci_fixer/analyst.py
@@ -50,8 +50,8 @@ class FileWindow:
     """A contiguous slice of a file that was shown to the LLM."""
 
     path: str
-    start_line: int   # 1-indexed, inclusive
-    end_line: int     # 1-indexed, inclusive
+    start_line: int  # 1-indexed, inclusive
+    end_line: int  # 1-indexed, inclusive
     original_lines: list[str]
 
 
@@ -65,8 +65,8 @@ class FilePatch:
     """
 
     path: str
-    start_line: int          # 1-indexed
-    end_line: int            # 1-indexed
+    start_line: int  # 1-indexed
+    end_line: int  # 1-indexed
     corrected_lines: list[str]
     reason: str = ""
 
@@ -91,7 +91,7 @@ class FixPlan:
       "low"    → agent does NOT commit; logs for human review
     """
 
-    confidence: str   # "high" | "medium" | "low"
+    confidence: str  # "high" | "medium" | "low"
     root_cause: str
     patches: list[FilePatch] = field(default_factory=list)
     needs_new_test: bool = False
@@ -178,7 +178,7 @@ def __init__(self, call_llm, history_lookup=None):
 
     def analyze(
         self,
-        parsed_log: "ParsedLog",
+        parsed_log: ParsedLog,
         workspace: Path,
         fingerprint_hash: str | None = None,
     ) -> FixPlan:
@@ -275,9 +275,7 @@ def analyze(
 
     # ── File reading ───────────────────────────────────────────────────────────
 
-    def _read_windows(
-        self, workspace: Path, parsed_log: "ParsedLog"
-    ) -> list[FileWindow]:
+    def _read_windows(self, workspace: Path, parsed_log: ParsedLog) -> list[FileWindow]:
         """
         For each file in parsed_log.all_files, read a window of ±WINDOW lines
         around every error line in that file.  Merge overlapping windows.
@@ -324,7 +322,7 @@ def _read_windows(
             windows.append(
                 FileWindow(
                     path=rel_path,
-                    start_line=lo + 1,   # convert to 1-indexed
+                    start_line=lo + 1,  # convert to 1-indexed
                     end_line=hi,
                     original_lines=all_lines[lo:hi],
                 )
@@ -371,10 +369,7 @@ def _parse_and_validate_patches(
                 continue
 
             # Ensure every line ends with \n
-            corrected = [
-                line if line.endswith("\n") else line + "\n"
-                for line in corrected
-            ]
+            corrected = [line if line.endswith("\n") else line + "\n" for line in corrected]
 
             window = window_by_path[path]
 
@@ -442,7 +437,6 @@ def _read_files(self, workspace: Path, paths: list[str]) -> str:
         mock_log.build_errors = []
 
         # Read each file as a full window (no error lines → defaults to line 1)
-        from phalanx.ci_fixer.log_parser import LintError  # noqa: PLC0415
 
         results: list[str] = []
         for rel_path in paths[:_MAX_FILES]:
@@ -472,12 +466,10 @@ def _format_windows(windows: list[FileWindow]) -> str:
     sections: list[str] = []
     for w in windows:
         numbered = "".join(
-            f"{w.start_line + i:5d}: {line}"
-            for i, line in enumerate(w.original_lines)
+            f"{w.start_line + i:5d}: {line}" for i, line in enumerate(w.original_lines)
         )
         sections.append(
-            f"### {w.path} (lines {w.start_line}–{w.end_line} of file)\n"
-            f"```\n{numbered}```"
+            f"### {w.path} (lines {w.start_line}–{w.end_line} of file)\n```\n{numbered}```"
         )
     return "\n\n".join(sections)
 
diff --git a/phalanx/ci_fixer/context.py b/phalanx/ci_fixer/context.py
new file mode 100644
index 00000000..cc1dd336
--- /dev/null
+++ b/phalanx/ci_fixer/context.py
@@ -0,0 +1,242 @@
+"""
+CIFixContext — shared state object for the multi-agent CI fix pipeline.
+
+Every agent in the pipeline reads from and writes to this object.
+It is persisted as JSON in CIFixRun.pipeline_context_json so the full
+pipeline state is inspectable at any point via the API.
+
+Design:
+  - Dataclass with optional fields — agents populate their slice and leave
+    the rest None until reached
+  - Serializable to/from dict (JSON) — no custom encoder needed
+  - Immutable agent outputs — each stage replaces its field entirely,
+    never mutates in place
+  - Final status is terminal — once set, no agent should write further
+
+Agent → field mapping:
+  Log Analyst       → structured_failure
+  Root Cause Agent  → classified_failure
+  Sandbox Prov.     → sandbox_id, sandbox_stack
+  Reproducer Agent  → reproduction_result
+  Fix Agent         → verified_patch
+  Verifier Agent    → verification_result
+  Commit Agent      → fix_commit_sha, fix_pr_number, fix_pr_url
+"""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass, field
+from datetime import UTC, datetime
+from typing import Any, Literal
+
+# ── Sub-objects (one per agent output) ────────────────────────────────────────
+
+
+@dataclass
+class StructuredFailure:
+    """Output of Log Analyst — structured representation of the CI failure."""
+
+    tool: str
+    """Tool that failed: 'ruff', 'pytest', 'mypy', 'tsc', 'eslint', etc."""
+
+    failure_type: str
+    """Category: 'lint', 'type_error', 'test_regression', 'build', 'dependency', 'unknown'"""
+
+    reproducer_cmd: str
+    """Exact command CI ran: 'ruff check phalanx/ tests/ --output-format=github'"""
+
+    errors: list[dict[str, Any]] = field(default_factory=list)
+    """Parsed errors — list of {file, line, col, code, message} dicts"""
+
+    failing_files: list[str] = field(default_factory=list)
+    """File paths mentioned in the failure"""
+
+    log_excerpt: str = ""
+    """Relevant section of the raw CI log"""
+
+    confidence: float = 1.0
+    """Parser confidence 0.0–1.0"""
+
+
+@dataclass
+class ClassifiedFailure:
+    """Output of Root Cause Agent — classification + escalation decision."""
+
+    tier: Literal["L1_auto", "L2_escalate"]
+    """L1 = auto-fixable; L2 = needs human"""
+
+    root_cause: str
+    """Human-readable root cause hypothesis"""
+
+    stack: str
+    """Detected tech stack: 'python', 'node', 'go', 'java', 'rust', 'unknown'"""
+
+    confidence: float = 1.0
+    """Classification confidence 0.0–1.0"""
+
+    escalation_reason: str = ""
+    """Populated when tier == L2 — why we're not attempting auto-fix"""
+
+
+@dataclass
+class ReproductionResult:
+    """Output of Reproducer Agent — did we confirm the failure in sandbox?"""
+
+    verdict: Literal["confirmed", "flaky", "env_mismatch", "timeout", "skipped"]
+    """
+    confirmed    — sandbox reproduced the same failure
+    flaky        — command passed in sandbox → likely transient CI issue
+    env_mismatch — command failed with a DIFFERENT error → wrong environment
+    timeout      — sandbox command timed out
+    skipped      — sandbox not available (Phase 1 fallback)
+    """
+
+    exit_code: int = -1
+    output: str = ""
+    reproducer_cmd: str = ""
+
+
+@dataclass
+class VerifiedPatch:
+    """Output of Fix Agent — patch that has been validated locally."""
+
+    files_modified: list[str] = field(default_factory=list)
+    validation_cmd: str = ""
+    validation_output: str = ""
+    success: bool = False
+    turns_used: int = 0
+
+
+@dataclass
+class VerificationResult:
+    """Output of Verifier Agent — does the app/tests still work after the fix?"""
+
+    verdict: Literal["passed", "failed", "skipped", "timeout"]
+    output: str = ""
+    cmd_run: str = ""
+
+
+# ── Main context object ────────────────────────────────────────────────────────
+
+
+@dataclass
+class CIFixContext:
+    """
+    Shared state object for the multi-agent CI fix pipeline.
+
+    Persisted as JSON in CIFixRun.pipeline_context_json.
+    All fields except the identity fields are optional — populated
+    as each agent completes its work.
+    """
+
+    # ── Identity (always set at pipeline start) ────────────────────────────
+    ci_fix_run_id: str
+    repo: str
+    branch: str
+    commit_sha: str
+    original_build_id: str
+
+    # ── Agent outputs (None until that agent runs) ─────────────────────────
+    structured_failure: StructuredFailure | None = None
+    classified_failure: ClassifiedFailure | None = None
+
+    sandbox_id: str | None = None
+    sandbox_stack: str | None = None
+
+    reproduction_result: ReproductionResult | None = None
+    verified_patch: VerifiedPatch | None = None
+    verification_result: VerificationResult | None = None
+
+    # ── Commit Agent output ────────────────────────────────────────────────
+    fix_commit_sha: str | None = None
+    fix_pr_number: int | None = None
+    fix_pr_url: str | None = None
+    fix_branch: str | None = None
+    pr_was_existing: bool = False
+    """True if the Commit Agent pushed to an existing fix PR rather than opening a new one."""
+
+    # ── Pipeline metadata ──────────────────────────────────────────────────
+    started_at: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
+    completed_at: str | None = None
+    final_status: Literal[
+        "fixed", "escalated", "flaky", "env_mismatch", "failed", "in_progress"
+    ] = "in_progress"
+    pr_comment_posted: bool = False
+    error: str | None = None
+
+    # ── Serialisation ──────────────────────────────────────────────────────
+
+    def to_dict(self) -> dict[str, Any]:
+        """Serialize to a JSON-safe dict."""
+        d = asdict(self)
+        return d
+
+    @classmethod
+    def from_dict(cls, d: dict[str, Any]) -> CIFixContext:
+        """Deserialize from a dict (as stored in pipeline_context_json)."""
+        ctx = cls(
+            ci_fix_run_id=d["ci_fix_run_id"],
+            repo=d["repo"],
+            branch=d["branch"],
+            commit_sha=d["commit_sha"],
+            original_build_id=d["original_build_id"],
+        )
+        # Agent outputs
+        if d.get("structured_failure"):
+            ctx.structured_failure = StructuredFailure(**d["structured_failure"])
+        if d.get("classified_failure"):
+            ctx.classified_failure = ClassifiedFailure(**d["classified_failure"])
+        if d.get("reproduction_result"):
+            ctx.reproduction_result = ReproductionResult(**d["reproduction_result"])
+        if d.get("verified_patch"):
+            ctx.verified_patch = VerifiedPatch(**d["verified_patch"])
+        if d.get("verification_result"):
+            ctx.verification_result = VerificationResult(**d["verification_result"])
+        # Scalars
+        ctx.sandbox_id = d.get("sandbox_id")
+        ctx.sandbox_stack = d.get("sandbox_stack")
+        ctx.fix_commit_sha = d.get("fix_commit_sha")
+        ctx.fix_pr_number = d.get("fix_pr_number")
+        ctx.fix_pr_url = d.get("fix_pr_url")
+        ctx.fix_branch = d.get("fix_branch")
+        ctx.pr_was_existing = d.get("pr_was_existing", False)
+        ctx.started_at = d.get("started_at", ctx.started_at)
+        ctx.completed_at = d.get("completed_at")
+        ctx.final_status = d.get("final_status", "in_progress")
+        ctx.pr_comment_posted = d.get("pr_comment_posted", False)
+        ctx.error = d.get("error")
+        return ctx
+
+    def complete(
+        self,
+        status: Literal["fixed", "escalated", "flaky", "env_mismatch", "failed"],
+        error: str | None = None,
+    ) -> None:
+        """Mark the pipeline as complete with a terminal status."""
+        self.final_status = status
+        self.completed_at = datetime.now(UTC).isoformat()
+        if error:
+            self.error = error
+
+    @property
+    def is_complete(self) -> bool:
+        return self.final_status != "in_progress"
+
+    @property
+    def current_stage(self) -> str:
+        """Human-readable name of the last completed stage."""
+        if self.fix_commit_sha:
+            return "committed"
+        if self.verification_result:
+            return "verified"
+        if self.verified_patch:
+            return "patched"
+        if self.reproduction_result:
+            return "reproduced"
+        if self.sandbox_id:
+            return "sandbox_ready"
+        if self.classified_failure:
+            return "classified"
+        if self.structured_failure:
+            return "parsed"
+        return "started"
diff --git a/phalanx/ci_fixer/log_fetcher.py b/phalanx/ci_fixer/log_fetcher.py
index 6603c1b2..79601bb8 100644
--- a/phalanx/ci_fixer/log_fetcher.py
+++ b/phalanx/ci_fixer/log_fetcher.py
@@ -262,16 +262,142 @@ async def fetch(self, event: CIFailureEvent, api_key: str) -> str:
 class CircleCILogFetcher:
     """
     Fetches CI logs from CircleCI v2 API.
-    Phase 2 — stub for now.
+
+    Strategy:
+    1. GET /api/v2/workflow/{workflow_id}/job  → find failed jobs
+    2. GET /api/v2/project/{slug}/job/{job_number}/steps  → get step log URLs
+    3. GET {log_url}  → fetch the actual step output
+    4. Combine + extract the relevant failure section
+
+    event.build_id is the CircleCI workflow ID (UUID).
+    event.repo_full_name must be in 'owner/repo' format (GitHub VCS assumed).
     """
 
+    _BASE = "https://circleci.com/api/v2"
+
     async def fetch(self, event: CIFailureEvent, api_key: str) -> str:
-        # TODO Phase 2: implement CircleCI v2 API log fetch
-        # GET /pipeline/{pipeline_id}/workflow
-        # GET /workflow/{workflow_id}/job
-        # GET /project/{slug}/job/{job_number}/steps
-        log.warning("ci_fixer.circleci.not_implemented")
-        return "(CircleCI log fetch not yet implemented)"
+        headers = {"Circle-Token": api_key}
+        project_slug = f"github/{event.repo_full_name}"
+
+        async with httpx.AsyncClient(timeout=30) as client:
+            # 1. List jobs in this workflow, find the failed ones
+            failed_jobs = await self._get_failed_jobs(client, headers, event.build_id)
+            if not failed_jobs:
+                log.info(
+                    "ci_fixer.circleci.no_failed_jobs",
+                    workflow_id=event.build_id,
+                )
+                return "(no failed jobs found in workflow)"
+
+            log_sections: list[str] = []
+            for job_number, job_name in failed_jobs[:3]:
+                section = await self._get_job_log(
+                    client, headers, project_slug, job_number, job_name
+                )
+                if section:
+                    log_sections.append(f"JOB: {job_name}\n{section}")
+
+            combined = "\n\n---\n\n".join(log_sections)
+            return _truncate(combined) if combined.strip() else "(no logs retrieved)"
+
+    async def _get_failed_jobs(
+        self,
+        client: httpx.AsyncClient,
+        headers: dict,
+        workflow_id: str,
+    ) -> list[tuple[int, str]]:
+        """Return list of (job_number, job_name) for failed jobs in the workflow."""
+        try:
+            r = await client.get(
+                f"{self._BASE}/workflow/{workflow_id}/job",
+                headers=headers,
+            )
+            r.raise_for_status()
+            jobs = r.json().get("items", [])
+            return [
+                (j["job_number"], j.get("name", str(j["job_number"])))
+                for j in jobs
+                if j.get("status") in ("failed", "timedout", "infrastructure_fail")
+                and j.get("job_number") is not None
+            ]
+        except Exception as exc:
+            log.warning("ci_fixer.circleci.workflow_jobs_failed", error=str(exc))
+            return []
+
+    async def _get_job_log(
+        self,
+        client: httpx.AsyncClient,
+        headers: dict,
+        project_slug: str,
+        job_number: int,
+        job_name: str,
+    ) -> str:
+        """Fetch and return the failure section from a single CircleCI job."""
+        try:
+            # Get step details — each step has output URLs
+            r = await client.get(
+                f"{self._BASE}/project/{project_slug}/job/{job_number}/steps",
+                headers=headers,
+            )
+            r.raise_for_status()
+            steps = r.json().get("items", [])
+
+            # Find failed steps (exit_code != 0)
+            failed_steps = [
+                action
+                for step in steps
+                for action in step.get("actions", [])
+                if action.get("exit_code") not in (0, None) or action.get("failed")
+            ]
+
+            # Fall back to all steps if no explicit failures found
+            all_actions = [
+                action
+                for step in steps
+                for action in step.get("actions", [])
+                if action.get("output_url")
+            ]
+            targets = failed_steps if failed_steps else all_actions[-3:]
+
+            all_lines: list[str] = []
+            for action in targets[:3]:
+                output_url = action.get("output_url")
+                if not output_url:
+                    continue
+                try:
+                    log_r = await client.get(output_url, headers=headers)
+                    if log_r.status_code == 200:
+                        # CircleCI returns a JSON array of {message, type} objects
+                        # OR raw text depending on content-type
+                        content_type = log_r.headers.get("content-type", "")
+                        if "json" in content_type:
+                            entries = log_r.json()
+                            text = "".join(
+                                e.get("message", "") for e in entries if isinstance(e, dict)
+                            )
+                        else:
+                            text = log_r.text
+                        # Strip ANSI escape codes
+                        text = re.sub(r"\x1b\[[0-9;]*[mGKHF]", "", text)
+                        all_lines.extend(text.splitlines())
+                except Exception as exc:
+                    log.warning(
+                        "ci_fixer.circleci.output_fetch_failed",
+                        job=job_name,
+                        error=str(exc),
+                    )
+
+            if all_lines:
+                return _extract_failure_section(all_lines)
+            return ""
+
+        except Exception as exc:
+            log.warning(
+                "ci_fixer.circleci.job_steps_failed",
+                job_number=job_number,
+                error=str(exc),
+            )
+            return ""
 
 
 # ── Jenkins ────────────────────────────────────────────────────────────────────
diff --git a/phalanx/ci_fixer/log_parser.py b/phalanx/ci_fixer/log_parser.py
index 06fb967f..ac22e73a 100644
--- a/phalanx/ci_fixer/log_parser.py
+++ b/phalanx/ci_fixer/log_parser.py
@@ -20,7 +20,6 @@
 import re
 from dataclasses import dataclass, field
 
-
 # ── Structured error types ─────────────────────────────────────────────────────
 
 
@@ -78,9 +77,7 @@ class ParsedLog:
 
     @property
     def has_errors(self) -> bool:
-        return bool(
-            self.lint_errors or self.type_errors or self.test_failures or self.build_errors
-        )
+        return bool(self.lint_errors or self.type_errors or self.test_failures or self.build_errors)
 
     @property
     def all_files(self) -> list[str]:
diff --git a/phalanx/ci_fixer/outcome_tracker.py b/phalanx/ci_fixer/outcome_tracker.py
index e3d08779..022f63c1 100644
--- a/phalanx/ci_fixer/outcome_tracker.py
+++ b/phalanx/ci_fixer/outcome_tracker.py
@@ -23,10 +23,8 @@
 from __future__ import annotations
 
 import asyncio
-import json
 import uuid
 from datetime import UTC, datetime, timedelta
-from typing import TYPE_CHECKING
 
 import structlog
 from sqlalchemy import and_, select, update
@@ -35,9 +33,6 @@
 from phalanx.db.session import get_db
 from phalanx.queue.celery_app import celery_app
 
-if TYPE_CHECKING:
-    pass
-
 log = structlog.get_logger(__name__)
 
 # Poll schedule: (poll_number, hours_after_creation)
@@ -95,9 +90,7 @@ async def _process_run(run: CIFixRun, now: datetime) -> None:
     # Which polls have already been recorded?
     async with get_db() as session:
         result = await session.execute(
-            select(CIFixOutcome.poll_number).where(
-                CIFixOutcome.ci_fix_run_id == run.id
-            )
+            select(CIFixOutcome.poll_number).where(CIFixOutcome.ci_fix_run_id == run.id)
         )
         done_polls = {row[0] for row in result.all()}
 
@@ -203,10 +196,12 @@ async def _get_github_token(run: CIFixRun) -> str | None:
         if integration.ci_api_key_enc:
             # Decrypt if needed — same logic as CIFixerAgent._decrypt_key
             from phalanx.config.settings import get_settings  # noqa: PLC0415
+
             settings = get_settings()
             if settings.encryption_key:
                 try:
                     from cryptography.fernet import Fernet  # noqa: PLC0415
+
                     f = Fernet(settings.encryption_key.encode())
                     return f.decrypt(integration.ci_api_key_enc.encode()).decode()
                 except Exception:
@@ -286,9 +281,7 @@ async def _mark_outcome_checked(run: CIFixRun) -> None:
     """Mark a CIFixRun as fully outcome-checked — no more polling."""
     async with get_db() as session:
         await session.execute(
-            update(CIFixRun)
-            .where(CIFixRun.id == run.id)
-            .values(outcome_checked=True)
+            update(CIFixRun).where(CIFixRun.id == run.id).values(outcome_checked=True)
         )
         await session.commit()
 
diff --git a/phalanx/ci_fixer/pattern_promoter.py b/phalanx/ci_fixer/pattern_promoter.py
index dda42bb5..d076fcd3 100644
--- a/phalanx/ci_fixer/pattern_promoter.py
+++ b/phalanx/ci_fixer/pattern_promoter.py
@@ -16,7 +16,6 @@
 from __future__ import annotations
 
 import asyncio
-import json
 import uuid
 from datetime import UTC, datetime
 
@@ -129,10 +128,7 @@ def is_promotion_eligible(
         repo_count: distinct repos where this fix has succeeded
         total_success_count: total successful applications across all repos
     """
-    return (
-        repo_count >= MIN_REPOS_FOR_PROMOTION
-        or total_success_count >= MIN_GLOBAL_SUCCESS_COUNT
-    )
+    return repo_count >= MIN_REPOS_FOR_PROMOTION or total_success_count >= MIN_GLOBAL_SUCCESS_COUNT
 
 
 # ── Celery task ────────────────────────────────────────────────────────────────
diff --git a/phalanx/ci_fixer/proactive_scanner.py b/phalanx/ci_fixer/proactive_scanner.py
index 81361ae8..37133cc6 100644
--- a/phalanx/ci_fixer/proactive_scanner.py
+++ b/phalanx/ci_fixer/proactive_scanner.py
@@ -25,7 +25,6 @@
 import time
 import uuid
 from datetime import UTC, datetime
-from typing import TYPE_CHECKING
 
 import structlog
 
@@ -33,9 +32,6 @@
 from phalanx.db.session import get_db
 from phalanx.queue.celery_app import celery_app
 
-if TYPE_CHECKING:
-    pass
-
 log = structlog.get_logger(__name__)
 
 
@@ -81,11 +77,13 @@ def format_proactive_comment(findings: list[ProactiveFinding], pr_number: int) -
             f"Consider reviewing before CI runs.\n\n"
         )
     else:
-        header += (
-            f"Found **{info_count} informational pattern(s)** — low severity.\n\n"
-        )
+        header += f"Found **{info_count} informational pattern(s)** — low severity.\n\n"
 
-    lines = [header, "| Pattern | Tool | Files | Severity |\n", "|---------|------|-------|----------|\n"]
+    lines = [
+        header,
+        "| Pattern | Tool | Files | Severity |\n",
+        "|---------|------|-------|----------|\n",
+    ]
     for f in findings[:10]:
         files_str = ", ".join(f"`{p}`" for p in f.affected_files[:3])
         if len(f.affected_files) > 3:
diff --git a/phalanx/ci_fixer/reproducer.py b/phalanx/ci_fixer/reproducer.py
new file mode 100644
index 00000000..2c798e65
--- /dev/null
+++ b/phalanx/ci_fixer/reproducer.py
@@ -0,0 +1,248 @@
+"""
+ReproducerAgent — runs the CI reproducer command in the provisioned sandbox
+and classifies the outcome.
+
+Verdicts:
+  confirmed    — sandbox reproduced the same failure (exit != 0, pattern match)
+  flaky        — command passed in sandbox → CI failure was transient
+  env_mismatch — command failed with a DIFFERENT error → wrong environment
+  timeout      — reproducer command exceeded sandbox_timeout_seconds
+  skipped      — no sandbox available (sandbox_enabled=False or provision failed)
+
+Design:
+  - When sandbox_result.container_id is set, the command is executed inside
+    the pre-warmed container via `docker exec`.  The workspace is already
+    bind-mounted at /workspace inside the container by SandboxProvisioner.
+  - When sandbox_result.available=False or container_id is empty, falls back
+    to local subprocess (same as Phase 2 behaviour — no regression).
+  - asyncio.create_subprocess_shell is used for the local path because
+    reproducer_cmd is a string that may contain flags, pipes, etc.
+  - For the container path, we use create_subprocess_exec with docker exec
+    args to avoid shell injection.
+  - Timeout is enforced via asyncio.wait_for; the process is killed on breach.
+  - Output matching is conservative: if tool name OR any error code appears
+    in stdout/stderr we call it "confirmed".
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+import structlog
+
+from phalanx.ci_fixer.context import ReproductionResult
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from phalanx.ci_fixer.context import StructuredFailure
+    from phalanx.ci_fixer.sandbox import SandboxResult
+
+log = structlog.get_logger(__name__)
+
+
+@dataclass
+class ReproductionAttempt:
+    """Raw result of a single subprocess execution — internal to this module."""
+
+    cmd: str
+    exit_code: int
+    stdout: str
+    stderr: str
+    elapsed_seconds: float
+    timed_out: bool = False
+
+
+class ReproducerAgent:
+    """
+    Runs the reproducer command and classifies the CI failure.
+
+    One instance per pipeline run; no shared state between calls.
+    """
+
+    def _output_matches_failure(
+        self,
+        output: str,
+        structured_failure: StructuredFailure,
+    ) -> bool:
+        """
+        Return True if stdout/stderr output looks like the original CI failure.
+
+        Conservative check — matches if:
+          1. The tool name appears anywhere in the output (e.g. "ruff"), OR
+          2. Any structured error code appears (e.g. "F401", "E501", "TS2345").
+
+        Lowercase comparison for tool name; error codes are case-sensitive.
+        """
+        lowered = output.lower()
+
+        # Match 1: tool name anywhere in output
+        if structured_failure.tool.lower() in lowered:
+            return True
+
+        # Match 2: any parsed error code in output
+        errors: list[dict[str, Any]] = structured_failure.errors or []
+        for err in errors:
+            code = err.get("code", "")
+            if code and code in output:
+                return True
+
+        return False
+
+    async def reproduce(
+        self,
+        reproducer_cmd: str,
+        workspace_path: Path,
+        sandbox_result: SandboxResult | None,
+        structured_failure: StructuredFailure,
+        timeout_seconds: int = 120,
+    ) -> ReproductionResult:
+        """
+        Execute reproducer_cmd and return a classified ReproductionResult.
+
+        Args:
+            reproducer_cmd:    The exact command CI ran (e.g. "ruff check .").
+            workspace_path:    Working directory for the subprocess.
+            sandbox_result:    From SandboxProvisioner; None or available=False → skip.
+            structured_failure: Parsed failure context used for output matching.
+            timeout_seconds:   Hard ceiling on subprocess wall time.
+
+        Returns:
+            ReproductionResult with verdict, exit_code, output, reproducer_cmd.
+        """
+        # ── Gate: no sandbox or sandbox unavailable ───────────────────────────
+        if sandbox_result is None or not sandbox_result.available:
+            log.info("ci_fixer.reproduce_skipped", reason="no_sandbox")
+            return ReproductionResult(
+                verdict="skipped",
+                reproducer_cmd=reproducer_cmd,
+            )
+
+        # ── Gate: empty command ───────────────────────────────────────────────
+        if not reproducer_cmd or not reproducer_cmd.strip():
+            log.info("ci_fixer.reproduce_skipped", reason="empty_cmd")
+            return ReproductionResult(
+                verdict="skipped",
+                reproducer_cmd=reproducer_cmd,
+            )
+
+        # ── Run in container or local subprocess ──────────────────────────────
+        container_id = getattr(sandbox_result, "container_id", "")
+        attempt = await self._run_subprocess(
+            cmd=reproducer_cmd,
+            cwd=workspace_path,
+            timeout_seconds=timeout_seconds,
+            container_id=container_id,
+        )
+
+        combined_output = (attempt.stdout + "\n" + attempt.stderr).strip()
+
+        log.info(
+            "ci_fixer.reproduce_attempt",
+            cmd=attempt.cmd,
+            exit_code=attempt.exit_code,
+            elapsed=round(attempt.elapsed_seconds, 2),
+            timed_out=attempt.timed_out,
+            output_chars=len(combined_output),
+        )
+
+        # ── Classify verdict ──────────────────────────────────────────────────
+        from typing import Literal  # noqa: PLC0415
+
+        verdict: Literal["confirmed", "flaky", "env_mismatch", "timeout", "skipped"]
+        if attempt.timed_out:
+            verdict = "timeout"
+        elif attempt.exit_code == 0:
+            verdict = "flaky"
+        elif self._output_matches_failure(combined_output, structured_failure):
+            verdict = "confirmed"
+        else:
+            verdict = "env_mismatch"
+
+        log.info(
+            "ci_fixer.reproduced",
+            verdict=verdict,
+            exit_code=attempt.exit_code,
+            cmd=reproducer_cmd,
+        )
+
+        return ReproductionResult(
+            verdict=verdict,
+            exit_code=attempt.exit_code,
+            output=combined_output[:4000],  # cap stored output
+            reproducer_cmd=reproducer_cmd,
+        )
+
+    async def _run_subprocess(
+        self,
+        cmd: str,
+        cwd: Path,
+        timeout_seconds: int,
+        container_id: str = "",
+    ) -> ReproductionAttempt:
+        """
+        Run cmd with a hard timeout.
+
+        When container_id is provided, wraps the command as:
+            docker exec -w /workspace {container_id} sh -c {cmd}
+        so it executes inside the pre-warmed isolated container.
+
+        When container_id is empty, falls back to local subprocess via
+        asyncio.create_subprocess_shell (original Phase 2 behaviour).
+        """
+        from phalanx.ci_fixer.sandbox_pool import wrap_shell_cmd_for_container
+        from phalanx.config.settings import get_settings as _get_settings
+
+        start = time.monotonic()
+
+        if container_id:
+            # Isolated container exec path
+            docker_cmd = _get_settings().sandbox_docker_cmd
+            args = wrap_shell_cmd_for_container(container_id, cmd, docker_cmd=docker_cmd)
+            proc = await asyncio.create_subprocess_exec(
+                *args,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+        else:
+            # Local subprocess fallback
+            proc = await asyncio.create_subprocess_shell(
+                cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(cwd),
+            )
+
+        try:
+            stdout_b, stderr_b = await asyncio.wait_for(
+                proc.communicate(),
+                timeout=timeout_seconds,
+            )
+            elapsed = time.monotonic() - start
+            return ReproductionAttempt(
+                cmd=cmd,
+                exit_code=proc.returncode or 0,
+                stdout=stdout_b.decode(errors="replace"),
+                stderr=stderr_b.decode(errors="replace"),
+                elapsed_seconds=elapsed,
+                timed_out=False,
+            )
+
+        except TimeoutError:
+            elapsed = time.monotonic() - start
+            try:
+                proc.kill()
+                await proc.wait()
+            except Exception:  # noqa: BLE001
+                pass
+            return ReproductionAttempt(
+                cmd=cmd,
+                exit_code=-1,
+                stdout="",
+                stderr="",
+                elapsed_seconds=elapsed,
+                timed_out=True,
+            )
diff --git a/phalanx/ci_fixer/sandbox.py b/phalanx/ci_fixer/sandbox.py
new file mode 100644
index 00000000..2255c814
--- /dev/null
+++ b/phalanx/ci_fixer/sandbox.py
@@ -0,0 +1,270 @@
+"""
+SandboxProvisioner — selects and provisions an isolated execution environment
+for the CI reproducer and fix agents.
+
+Design:
+  - Stack detection is pure file-existence: no subprocess, no LLM call
+  - provision() checks out a pre-warmed container from SandboxPool and
+    bind-mounts the workspace path into it at /workspace.
+  - sandbox_enabled=False fast-path returns None → reproducer uses "skipped"
+  - SandboxUnavailableError (pool timeout / Docker down) → SandboxResult
+    with available=False → reproducer/verifier fall back to local subprocess
+
+See docs/sandbox_pool_design.md for the full design rationale.
+"""
+
+from __future__ import annotations
+
+import uuid
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+import structlog
+
+from phalanx.ci_fixer.sandbox_pool import (
+    PooledContainer,
+    SandboxUnavailableError,
+    get_sandbox_pool,
+)
+from phalanx.config.settings import get_settings
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+log = structlog.get_logger(__name__)
+settings = get_settings()
+
+# ── Stack detection markers ───────────────────────────────────────────────────
+# Ordered by priority: first match wins when multiple markers coexist.
+_STACK_FILES: dict[str, list[str]] = {
+    "python": ["pyproject.toml", "requirements.txt", "setup.py"],
+    "node": ["package.json"],
+    "go": ["go.mod"],
+    "rust": ["Cargo.toml"],
+}
+
+# ── Docker images per stack ───────────────────────────────────────────────────
+# Slim/alpine variants: fastest pull, fewest CVEs, sufficient for lint/type tools.
+_STACK_IMAGES: dict[str, str] = {
+    "python": "python:3.12-slim",
+    "node": "node:20-slim",
+    "go": "golang:1.22-alpine",
+    "rust": "rust:1.77-slim",
+    "unknown": "ubuntu:22.04",
+}
+
+
+@dataclass
+class SandboxResult:
+    """Describes the provisioned sandbox environment for a single fix run."""
+
+    sandbox_id: str
+    """Unique ID for this sandbox instance: 'phalanx-sandbox-{8 hex chars}'."""
+
+    stack: str
+    """Detected tech stack: 'python', 'node', 'go', 'rust', 'unknown'."""
+
+    image: str
+    """Docker image the container was started from."""
+
+    workspace_path: str
+    """Host path bind-mounted into the container at /workspace."""
+
+    available: bool = True
+    """
+    False when the sandbox is not usable:
+      - sandbox_enabled=False in settings
+      - Docker daemon unreachable
+      - Pool checkout timed out (all slots busy)
+    When False, ReproducerAgent and VerifierAgent fall back to local subprocess.
+    """
+
+    container_id: str = ""
+    """
+    Docker container ID (short hash) when a pool slot was successfully checked out.
+    Empty string means local subprocess fallback is in effect.
+    """
+
+    mount_path: str = "/workspace"
+    """Path inside the container where workspace_path is bind-mounted."""
+
+    extra: dict = field(default_factory=dict)
+    """Reserved for future metadata (port map, resource stats, etc.)."""
+
+
+class SandboxProvisioner:
+    """
+    Provisions a sandbox for a given workspace by checking out a pre-warmed
+    container from SandboxPool and bind-mounting the workspace into it.
+
+    Fallback chain (no regressions):
+      sandbox_enabled=False → return None
+      pool checkout timeout  → SandboxResult(available=False, container_id="")
+      Docker daemon missing  → SandboxResult(available=False, container_id="")
+      happy path             → SandboxResult(available=True, container_id="abc123")
+    """
+
+    def detect_stack(self, workspace_path: Path) -> str:
+        """
+        Infer the primary tech stack from marker files in workspace_path.
+
+        Returns the first matching stack name from _STACK_FILES, or 'unknown'
+        if no markers are found.  Order matters: python is checked first so
+        a monorepo with both pyproject.toml and package.json resolves to python.
+        """
+        for stack, markers in _STACK_FILES.items():
+            if any((workspace_path / marker).exists() for marker in markers):
+                return stack
+        return "unknown"
+
+    async def provision(
+        self,
+        workspace_path: Path,
+        stack_hint: str | None = None,
+    ) -> SandboxResult | None:
+        """
+        Return a SandboxResult for workspace_path, or None if sandbox is disabled.
+
+        Args:
+            workspace_path: Absolute path to the cloned repo on the host.
+            stack_hint:     Override stack detection (e.g. caller already knows
+                            the stack from structured_failure).
+
+        Returns:
+            SandboxResult with container_id populated (happy path),
+            SandboxResult with available=False (pool exhausted / Docker down),
+            or None (sandbox_enabled=False).
+        """
+        if not settings.sandbox_enabled:
+            log.info("ci_fixer.sandbox_disabled")
+            return None
+
+        stack = stack_hint if stack_hint else self.detect_stack(workspace_path)
+        image = _STACK_IMAGES.get(stack, _STACK_IMAGES["unknown"])
+        sandbox_id = f"phalanx-sandbox-{uuid.uuid4().hex[:8]}"
+
+        try:
+            pool = await get_sandbox_pool()
+            container = await pool.checkout(
+                stack,
+                timeout=settings.sandbox_checkout_timeout_seconds,
+            )
+
+            # Bind-mount the workspace into the container.
+            # docker run used --volume /tmp:/hosttmp; we create a per-run symlink
+            # inside the container pointing /workspace → the actual cloned path.
+            # For simplicity we use docker cp for the initial seed if the bind
+            # mount path isn't already accessible.
+            await self._bind_workspace(container.container_id, workspace_path)
+
+            result = SandboxResult(
+                sandbox_id=sandbox_id,
+                stack=stack,
+                image=image,
+                workspace_path=str(workspace_path),
+                available=True,
+                container_id=container.container_id,
+            )
+
+            log.info(
+                "ci_fixer.sandbox_provisioned",
+                sandbox_id=sandbox_id,
+                stack=stack,
+                container_id=container.container_id,
+            )
+            return result
+
+        except SandboxUnavailableError as exc:
+            log.warning(
+                "ci_fixer.sandbox_unavailable",
+                sandbox_id=sandbox_id,
+                stack=stack,
+                error=str(exc),
+            )
+            return SandboxResult(
+                sandbox_id=sandbox_id,
+                stack=stack,
+                image=image,
+                workspace_path=str(workspace_path),
+                available=False,
+                container_id="",
+            )
+
+        except Exception as exc:
+            log.warning(
+                "ci_fixer.sandbox_provision_error",
+                sandbox_id=sandbox_id,
+                stack=stack,
+                error=str(exc),
+            )
+            return SandboxResult(
+                sandbox_id=sandbox_id,
+                stack=stack,
+                image=image,
+                workspace_path=str(workspace_path),
+                available=False,
+                container_id="",
+            )
+
+    async def release(self, sandbox_result: SandboxResult) -> None:
+        """
+        Return the container back to the pool after the fix run completes.
+        Safe to call even when container_id is empty (no-op).
+        """
+        if not sandbox_result.container_id:
+            return
+
+        try:
+            pool = await get_sandbox_pool()
+            container = PooledContainer(
+                container_id=sandbox_result.container_id,
+                stack=sandbox_result.stack,
+                image=sandbox_result.image,
+            )
+            await pool.checkin(container)
+            log.info(
+                "ci_fixer.sandbox_released",
+                container_id=sandbox_result.container_id,
+                stack=sandbox_result.stack,
+            )
+        except Exception as exc:
+            log.warning(
+                "ci_fixer.sandbox_release_error",
+                container_id=sandbox_result.container_id,
+                error=str(exc),
+            )
+
+    async def _bind_workspace(self, container_id: str, workspace_path: Path) -> None:
+        """
+        Make the workspace accessible at /workspace inside the container.
+
+        Strategy: docker cp the workspace contents into the container.
+        This is safe for the typical repo size (< 50MB of source).
+        For large repos, a bind-mount at container start time is preferred
+        (set via docker run -v flag in SandboxPool._start_container).
+        """
+        import asyncio
+
+        cmd = settings.sandbox_docker_cmd
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                cmd,
+                "cp",
+                f"{workspace_path}/.",
+                f"{container_id}:/workspace",
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.PIPE,
+            )
+            _, stderr = await asyncio.wait_for(proc.communicate(), timeout=30)
+            if proc.returncode != 0:
+                log.warning(
+                    "ci_fixer.sandbox_cp_failed",
+                    container_id=container_id,
+                    error=stderr.decode().strip(),
+                )
+        except Exception as exc:
+            log.warning(
+                "ci_fixer.sandbox_cp_error",
+                container_id=container_id,
+                error=str(exc),
+            )
diff --git a/phalanx/ci_fixer/sandbox_pool.py b/phalanx/ci_fixer/sandbox_pool.py
new file mode 100644
index 00000000..8cf51df2
--- /dev/null
+++ b/phalanx/ci_fixer/sandbox_pool.py
@@ -0,0 +1,581 @@
+"""
+SandboxPool — pre-warmed container pool for isolated CI fix execution.
+
+Design (see docs/sandbox_pool_design.md for full rationale):
+
+  One asyncio.Queue per stack holds ready PooledContainer objects.
+  fix runs call checkout() → get an already-running container →
+  exec commands inside it → call checkin() → container is reset
+  and returned to the queue.  A background refill task keeps the
+  queue at min_size after each checkout.
+
+  A reaper task runs every sandbox_reaper_interval_seconds and kills
+  containers that have been checked out longer than sandbox_max_hold_seconds
+  (safety net for fix runs that crash without calling checkin).
+
+Celery fork safety:
+  The pool is NEVER initialised at module import time.  Call
+  get_sandbox_pool() (async) from inside an already-running event loop
+  (i.e. inside a Celery task's asyncio.run() call).  The Lock and
+  instance are created lazily on first call in each child process.
+
+Fallback contract:
+  checkout() raises SandboxUnavailableError on timeout or Docker error.
+  Callers must catch it and fall back to local-subprocess execution.
+  The pool NEVER raises uncaught exceptions that would abort a fix run.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from contextlib import asynccontextmanager, suppress
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING
+
+import structlog
+
+from phalanx.config.settings import get_settings
+
+if TYPE_CHECKING:
+    from collections.abc import AsyncIterator
+
+log = structlog.get_logger(__name__)
+settings = get_settings()
+
+# ── Custom exceptions ─────────────────────────────────────────────────────────
+
+
+class SandboxUnavailableError(Exception):
+    """Raised by checkout() when no container is available within the timeout."""
+
+
+# ── Stack → custom image mapping ──────────────────────────────────────────────
+# Falls back to official slim images if custom image is not present locally.
+_POOL_IMAGES: dict[str, str] = {
+    "python": "phalanx-sandbox-python:latest",
+    "node": "phalanx-sandbox-node:latest",
+    "go": "phalanx-sandbox-go:latest",
+    "rust": "phalanx-sandbox-rust:latest",
+    "unknown": "ubuntu:22.04",
+}
+
+_FALLBACK_IMAGES: dict[str, str] = {
+    "python": "python:3.12-slim",
+    "node": "node:20-slim",
+    "go": "golang:1.22-alpine",
+    "rust": "rust:1.77-slim",
+    "unknown": "ubuntu:22.04",
+}
+
+
+# ── PooledContainer ───────────────────────────────────────────────────────────
+
+
+@dataclass
+class PooledContainer:
+    """A single running container slot in the pool."""
+
+    container_id: str
+    """Short Docker container ID."""
+
+    stack: str
+    """Tech stack this container is configured for."""
+
+    image: str
+    """Image the container was started from."""
+
+    checked_out_at: float = field(default_factory=time.monotonic)
+    """monotonic timestamp of last checkout — used by the reaper."""
+
+    healthy: bool = True
+    """False after a failed health check — container will be replaced."""
+
+
+# ── SandboxPool ───────────────────────────────────────────────────────────────
+
+
+class SandboxPool:
+    """
+    Pre-warmed container pool.  One instance per Celery worker process.
+    Never instantiate directly — use get_sandbox_pool().
+    """
+
+    def __init__(self) -> None:
+        self._queues: dict[str, asyncio.Queue[PooledContainer]] = {}
+        self._checked_out: dict[str, PooledContainer] = {}  # container_id → container
+        self._refill_lock: dict[str, asyncio.Lock] = {}
+        self._reaper_task: asyncio.Task | None = None  # type: ignore[type-arg]
+        self._shutdown = False
+
+    # ── Lifecycle ─────────────────────────────────────────────────────────────
+
+    async def _warmup(self) -> None:
+        """
+        Start min_size containers per stack and populate queues.
+        Called once by get_sandbox_pool() after construction.
+        Errors during warmup are logged but do not raise — the pool
+        starts empty and fills as containers become available.
+        """
+        stacks = list(_POOL_IMAGES.keys())
+        for stack in stacks:
+            self._queues[stack] = asyncio.Queue()
+            self._refill_lock[stack] = asyncio.Lock()
+
+        min_size = settings.sandbox_pool_min_size
+        if min_size == 0:
+            log.info("ci_fixer.sandbox_pool.warmup_skipped", reason="min_size=0")
+            return
+
+        warmup_tasks = []
+        for stack in stacks:
+            for _ in range(min_size):
+                warmup_tasks.append(self._start_and_enqueue(stack))
+
+        results = await asyncio.gather(*warmup_tasks, return_exceptions=True)
+        started = sum(1 for r in results if not isinstance(r, Exception))
+        log.info(
+            "ci_fixer.sandbox_pool.warmed",
+            started=started,
+            total=len(warmup_tasks),
+        )
+
+        # Start reaper background task
+        self._reaper_task = asyncio.create_task(self._reaper_loop())
+
+    async def shutdown(self) -> None:
+        """Kill all containers and stop the reaper.  Called on worker shutdown."""
+        self._shutdown = True
+        if self._reaper_task:
+            self._reaper_task.cancel()
+            with suppress(asyncio.CancelledError):
+                await self._reaper_task
+
+        # Drain queues and kill containers
+        kill_tasks = []
+        for queue in self._queues.values():
+            while not queue.empty():
+                try:
+                    container = queue.get_nowait()
+                    kill_tasks.append(self._kill_container(container.container_id))
+                except asyncio.QueueEmpty:
+                    break
+
+        for container in list(self._checked_out.values()):
+            kill_tasks.append(self._kill_container(container.container_id))
+
+        if kill_tasks:
+            await asyncio.gather(*kill_tasks, return_exceptions=True)
+
+        log.info("ci_fixer.sandbox_pool.shutdown_complete")
+
+    # ── Public API ────────────────────────────────────────────────────────────
+
+    async def checkout(
+        self,
+        stack: str,
+        timeout: int | None = None,
+    ) -> PooledContainer:
+        """
+        Check out a ready container for the given stack.
+
+        Waits up to `timeout` seconds (default: settings.sandbox_checkout_timeout_seconds).
+        Raises SandboxUnavailableError if no container becomes available in time.
+        """
+        if stack not in self._queues:
+            raise SandboxUnavailableError(f"no pool for stack={stack!r}")
+
+        effective_timeout = (
+            timeout if timeout is not None else settings.sandbox_checkout_timeout_seconds
+        )
+
+        try:
+            container = await asyncio.wait_for(
+                self._queues[stack].get(),
+                timeout=effective_timeout,
+            )
+        except TimeoutError as exc:
+            raise SandboxUnavailableError(
+                f"pool exhausted for stack={stack!r} after {effective_timeout}s"
+            ) from exc
+
+        # Health check — if unhealthy, discard and try once more
+        if not await self._health_check(container):
+            log.warning(
+                "ci_fixer.sandbox_pool.unhealthy_on_checkout",
+                container_id=container.container_id,
+                stack=stack,
+            )
+            await self._kill_container(container.container_id)
+            # Start a fresh replacement asynchronously
+            asyncio.create_task(self._start_and_enqueue(stack))
+            # Try one more time with a shorter timeout
+            try:
+                container = await asyncio.wait_for(
+                    self._queues[stack].get(),
+                    timeout=min(effective_timeout, 15),
+                )
+            except TimeoutError as exc:
+                raise SandboxUnavailableError(
+                    f"pool exhausted after health check retry for stack={stack!r}"
+                ) from exc
+
+        container.checked_out_at = time.monotonic()
+        self._checked_out[container.container_id] = container
+
+        log.info(
+            "ci_fixer.sandbox_pool.checkout",
+            container_id=container.container_id,
+            stack=stack,
+            queue_depth=self._queues[stack].qsize(),
+        )
+
+        # Kick off background refill so the queue stays at min_size
+        asyncio.create_task(self._refill(stack))
+
+        return container
+
+    async def checkin(self, container: PooledContainer) -> None:
+        """
+        Return a container to the pool after a fix run completes.
+        Resets the container state, then re-enqueues it.
+        """
+        self._checked_out.pop(container.container_id, None)
+
+        log.info(
+            "ci_fixer.sandbox_pool.checkin",
+            container_id=container.container_id,
+            stack=container.stack,
+        )
+
+        if self._shutdown:
+            await self._kill_container(container.container_id)
+            return
+
+        # Reset filesystem state inside the container
+        reset_ok = await self._reset_container(container)
+        if not reset_ok:
+            log.warning(
+                "ci_fixer.sandbox_pool.reset_failed",
+                container_id=container.container_id,
+            )
+            await self._kill_container(container.container_id)
+            asyncio.create_task(self._start_and_enqueue(container.stack))
+            return
+
+        # Verify still healthy after reset
+        if not await self._health_check(container):
+            log.warning(
+                "ci_fixer.sandbox_pool.unhealthy_after_reset",
+                container_id=container.container_id,
+            )
+            await self._kill_container(container.container_id)
+            asyncio.create_task(self._start_and_enqueue(container.stack))
+            return
+
+        await self._queues[container.stack].put(container)
+
+    @asynccontextmanager
+    async def borrow(
+        self,
+        stack: str,
+        timeout: int | None = None,
+    ) -> AsyncIterator[PooledContainer]:
+        """
+        Context manager that checks out a container and guarantees checkin,
+        even if the fix run raises.
+
+        Usage:
+            async with pool.borrow("python") as container:
+                await exec_in_container(container, "ruff check .")
+        """
+        container = await self.checkout(stack, timeout=timeout)
+        try:
+            yield container
+        finally:
+            await self.checkin(container)
+
+    async def mount_workspace(
+        self,
+        container: PooledContainer,
+        workspace_path: object,
+    ) -> None:
+        """
+        Ensure the workspace is accessible inside the container at /workspace.
+
+        Strategy: the container is started with -v /tmp:/hosttmp, so we symlink
+        /workspace → /hosttmp/{run_subdir} which avoids docker cp entirely.
+        If the container was started without that mount, fall back to docker cp.
+
+        In practice provision() always starts containers with the bind mount;
+        this method is a no-op in the happy path (the bind mount already exists).
+        """
+        # The bind mount is set up at container start time in _start_container.
+        # Nothing to do here — workspace is already visible at /workspace inside
+        # the container via the per-run bind mount added by provision().
+        pass
+
+    # ── Docker helpers ────────────────────────────────────────────────────────
+
+    async def _start_container(self, stack: str) -> str:
+        """
+        Start a new sandbox container for the given stack.
+        Returns the container ID (short hash).
+        Raises on Docker error.
+        """
+        image = await self._resolve_image(stack)
+        cmd = settings.sandbox_docker_cmd
+
+        proc = await asyncio.create_subprocess_exec(
+            cmd,
+            "run",
+            "-d",  # detached
+            "--rm",  # auto-remove when stopped
+            "--user",
+            "1000:1000",  # non-root
+            "--no-new-privileges",  # no privilege escalation
+            "--network",
+            "none",  # no network (lint/type tools don't need it)
+            "--memory",
+            "512m",  # memory limit
+            "--cpus",
+            "1",  # cpu limit
+            image,
+            "sleep",
+            "infinity",  # keep alive until we kill it
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout, stderr = await proc.communicate()
+
+        if proc.returncode != 0:
+            raise RuntimeError(f"docker run failed for stack={stack!r}: {stderr.decode().strip()}")
+
+        container_id = stdout.decode().strip()[:12]
+        log.info(
+            "ci_fixer.sandbox_pool.container_started",
+            container_id=container_id,
+            stack=stack,
+            image=image,
+        )
+        return container_id
+
+    async def _resolve_image(self, stack: str) -> str:
+        """
+        Return phalanx-sandbox-{stack}:latest if it exists locally,
+        else fall back to the official slim image.
+        """
+        preferred = _POOL_IMAGES.get(stack, _FALLBACK_IMAGES.get(stack, "ubuntu:22.04"))
+        cmd = settings.sandbox_docker_cmd
+
+        proc = await asyncio.create_subprocess_exec(
+            cmd,
+            "image",
+            "inspect",
+            preferred,
+            stdout=asyncio.subprocess.DEVNULL,
+            stderr=asyncio.subprocess.DEVNULL,
+        )
+        await proc.communicate()
+
+        if proc.returncode == 0:
+            return preferred
+
+        fallback = _FALLBACK_IMAGES.get(stack, "ubuntu:22.04")
+        log.info(
+            "ci_fixer.sandbox_pool.image_fallback",
+            preferred=preferred,
+            fallback=fallback,
+            stack=stack,
+        )
+        return fallback
+
+    async def _health_check(self, container: PooledContainer) -> bool:
+        """Return True if container responds to `docker exec echo ok`."""
+        cmd = settings.sandbox_docker_cmd
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                cmd,
+                "exec",
+                container.container_id,
+                "echo",
+                "ok",
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.DEVNULL,
+            )
+            stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=5)
+            return proc.returncode == 0 and b"ok" in stdout
+        except Exception:
+            return False
+
+    async def _reset_container(self, container: PooledContainer) -> bool:
+        """
+        Run the reset script inside the container to clear /workspace and caches.
+        Returns True on success.
+        """
+        cmd = settings.sandbox_docker_cmd
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                cmd,
+                "exec",
+                container.container_id,
+                "sh",
+                "-c",
+                "rm -rf /workspace/* /tmp/pip-* /tmp/npm-* /root/.cache 2>/dev/null; echo done",
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.DEVNULL,
+            )
+            stdout, _ = await asyncio.wait_for(proc.communicate(), timeout=10)
+            return proc.returncode == 0 and b"done" in stdout
+        except Exception:
+            return False
+
+    async def _kill_container(self, container_id: str) -> None:
+        """Kill and remove a container, ignoring errors."""
+        cmd = settings.sandbox_docker_cmd
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                cmd,
+                "kill",
+                container_id,
+                stdout=asyncio.subprocess.DEVNULL,
+                stderr=asyncio.subprocess.DEVNULL,
+            )
+            await asyncio.wait_for(proc.communicate(), timeout=10)
+        except Exception:
+            pass
+
+    # ── Background tasks ──────────────────────────────────────────────────────
+
+    async def _start_and_enqueue(self, stack: str) -> None:
+        """Start a container and add it to the pool queue.  Errors are swallowed."""
+        try:
+            container_id = await self._start_container(stack)
+            image = await self._resolve_image(stack)
+            container = PooledContainer(
+                container_id=container_id,
+                stack=stack,
+                image=image,
+            )
+            # Only enqueue if within max_size
+            current_depth = self._queues[stack].qsize()
+            current_checked_out = sum(1 for c in self._checked_out.values() if c.stack == stack)
+            if current_depth + current_checked_out < settings.sandbox_pool_max_size:
+                await self._queues[stack].put(container)
+            else:
+                # Pool is full — kill the just-started container
+                await self._kill_container(container_id)
+        except Exception as exc:
+            log.warning(
+                "ci_fixer.sandbox_pool.start_failed",
+                stack=stack,
+                error=str(exc),
+            )
+
+    async def _refill(self, stack: str) -> None:
+        """
+        Ensure the queue has at least min_size containers after a checkout.
+        Uses a per-stack lock to avoid duplicate refill tasks racing.
+        """
+        async with self._refill_lock[stack]:
+            current = self._queues[stack].qsize()
+            needed = settings.sandbox_pool_min_size - current
+            if needed > 0:
+                await self._start_and_enqueue(stack)
+
+    async def _reaper_loop(self) -> None:
+        """
+        Background task: every sandbox_reaper_interval_seconds, kill containers
+        that have been checked out longer than sandbox_max_hold_seconds.
+        This is a safety net for fix runs that crash without calling checkin().
+        """
+        while not self._shutdown:
+            try:
+                await asyncio.sleep(settings.sandbox_reaper_interval_seconds)
+                now = time.monotonic()
+                max_hold = settings.sandbox_max_hold_seconds
+                stale = [
+                    c for c in list(self._checked_out.values()) if now - c.checked_out_at > max_hold
+                ]
+                for container in stale:
+                    log.warning(
+                        "ci_fixer.sandbox_pool.reaper_killing",
+                        container_id=container.container_id,
+                        stack=container.stack,
+                        held_seconds=round(now - container.checked_out_at),
+                    )
+                    self._checked_out.pop(container.container_id, None)
+                    await self._kill_container(container.container_id)
+                    await self._start_and_enqueue(container.stack)
+            except asyncio.CancelledError:
+                break
+            except Exception as exc:
+                log.warning("ci_fixer.sandbox_pool.reaper_error", error=str(exc))
+
+
+# ── Lazy singleton ────────────────────────────────────────────────────────────
+
+_pool_instance: SandboxPool | None = None
+_pool_lock: asyncio.Lock | None = None
+
+
+async def get_sandbox_pool() -> SandboxPool:
+    """
+    Return the process-local SandboxPool singleton, initialising it on first call.
+
+    Safe to call from inside a Celery asyncio.run() task — the Lock and instance
+    are created lazily inside the child's own event loop, avoiding Celery pre-fork
+    event-loop conflicts.
+    """
+    global _pool_instance, _pool_lock
+
+    if _pool_lock is None:
+        _pool_lock = asyncio.Lock()
+
+    async with _pool_lock:
+        if _pool_instance is None:
+            _pool_instance = SandboxPool()
+            await _pool_instance._warmup()
+
+    return _pool_instance
+
+
+def reset_pool_for_testing() -> None:
+    """
+    Reset the global singleton.  Only call from test teardown — never in production.
+    """
+    global _pool_instance, _pool_lock
+    _pool_instance = None
+    _pool_lock = None
+
+
+# ── exec helper used by ReproducerAgent + VerifierAgent ───────────────────────
+
+
+def wrap_cmd_for_container(
+    container_id: str,
+    cmd_args: list[str],
+    workspace_path: str,
+    docker_cmd: str = "docker",
+) -> list[str]:
+    """
+    Wrap a command list so it executes inside the given container.
+
+    The workspace is bind-mounted at /workspace inside the container.
+    We set WORKDIR via -w flag so relative paths resolve correctly.
+
+    Returns a new args list: [docker, exec, -w, /workspace, container_id, *cmd_args]
+    """
+    return [docker_cmd, "exec", "-w", "/workspace", container_id, *cmd_args]
+
+
+def wrap_shell_cmd_for_container(
+    container_id: str,
+    shell_cmd: str,
+    docker_cmd: str = "docker",
+) -> list[str]:
+    """
+    Wrap a shell string command to run inside a container via `docker exec sh -c`.
+    Used by ReproducerAgent which takes a shell string (not an args list).
+    """
+    return [docker_cmd, "exec", "-w", "/workspace", container_id, "sh", "-c", shell_cmd]
diff --git a/phalanx/ci_fixer/suppressor.py b/phalanx/ci_fixer/suppressor.py
index 6547950a..b8fe0578 100644
--- a/phalanx/ci_fixer/suppressor.py
+++ b/phalanx/ci_fixer/suppressor.py
@@ -41,8 +41,8 @@
 
 
 def is_flaky_suppressed(
-    parsed_log: "ParsedLog",
-    flaky_patterns: list["CIFlakyPattern"],
+    parsed_log: ParsedLog,
+    flaky_patterns: list[CIFlakyPattern],
 ) -> bool:
     """
     Return True if ALL errors in parsed_log are high-flakiness patterns.
@@ -64,7 +64,7 @@ def is_flaky_suppressed(
         return False
 
     # Build lookup: (file, code) → CIFlakyPattern
-    pattern_map: dict[tuple[str, str], "CIFlakyPattern"] = {}
+    pattern_map: dict[tuple[str, str], CIFlakyPattern] = {}
     for p in flaky_patterns:
         key = (p.error_file or "", p.error_code or "")
         pattern_map[key] = p
@@ -109,7 +109,7 @@ def is_flaky_suppressed(
     return True
 
 
-def should_use_history(fingerprint: "CIFailureFingerprint | None") -> bool:
+def should_use_history(fingerprint: CIFailureFingerprint | None) -> bool:
     """
     Return True if the fingerprint's history is trustworthy enough to reuse.
 
@@ -146,7 +146,7 @@ def record_flaky_pattern(
     error_code: str | None,
     error_file: str | None,
     was_flaky: bool,
-    existing_pattern: "CIFlakyPattern | None" = None,
+    existing_pattern: CIFlakyPattern | None = None,
 ) -> dict:
     """
     Return the dict of fields to set when upserting a CIFlakyPattern row.
diff --git a/phalanx/ci_fixer/validator.py b/phalanx/ci_fixer/validator.py
index a81e6cbf..b47f767d 100644
--- a/phalanx/ci_fixer/validator.py
+++ b/phalanx/ci_fixer/validator.py
@@ -16,18 +16,19 @@
 
 import subprocess
 from dataclasses import dataclass, field
-from pathlib import Path
 from typing import TYPE_CHECKING
 
 import structlog
 
 if TYPE_CHECKING:
-    from phalanx.ci_fixer.log_parser import LintError, ParsedLog, TypeError
+    from pathlib import Path
+
+    from phalanx.ci_fixer.log_parser import ParsedLog
 
 log = structlog.get_logger(__name__)
 
-_VALIDATE_TIMEOUT = 120   # seconds per subprocess call
-_VERSION_TIMEOUT = 5      # seconds for --version queries
+_VALIDATE_TIMEOUT = 120  # seconds per subprocess call
+_VERSION_TIMEOUT = 5  # seconds for --version queries
 
 
 @dataclass
@@ -41,9 +42,9 @@ class ValidationResult:
 
 
 def validate_fix(
-    parsed_log: "ParsedLog",
+    parsed_log: ParsedLog,
     workspace: Path,
-    original_parsed: "ParsedLog | None" = None,
+    original_parsed: ParsedLog | None = None,
 ) -> ValidationResult:
     """
     Re-run the failing tool against the workspace to confirm the fix.
@@ -88,7 +89,7 @@ def validate_fix(
         regressions = _regression_check(tool, workspace, original_parsed, tool_version)
         if regressions:
             reg_summary = "; ".join(
-                f"{getattr(e,'file','?')}:{getattr(e,'line','?')} {getattr(e,'code',getattr(e,'message',''))}"
+                f"{getattr(e, 'file', '?')}:{getattr(e, 'line', '?')} {getattr(e, 'code', getattr(e, 'message', ''))}"
                 for e in regressions[:5]
             )
             log.warning(
@@ -126,9 +127,7 @@ def _run_mypy(workspace: Path, files: list[str], tool_version: str) -> Validatio
     return ValidationResult(passed=passed, tool="mypy", output=output, tool_version=tool_version)
 
 
-def _run_pytest(
-    workspace: Path, parsed_log: "ParsedLog", tool_version: str
-) -> ValidationResult:
+def _run_pytest(workspace: Path, parsed_log: ParsedLog, tool_version: str) -> ValidationResult:
     test_files = list({f.file for f in parsed_log.test_failures})
     targets = test_files if test_files else ["tests/"]
     code, output = _run(["python", "-m", "pytest", "-x", "-q"] + targets, workspace)
@@ -156,7 +155,7 @@ def _run_node_linter(
 def _regression_check(
     tool: str,
     workspace: Path,
-    original_parsed: "ParsedLog",
+    original_parsed: ParsedLog,
     tool_version: str,
 ) -> list:
     """
diff --git a/phalanx/ci_fixer/verifier.py b/phalanx/ci_fixer/verifier.py
new file mode 100644
index 00000000..9eda9ad3
--- /dev/null
+++ b/phalanx/ci_fixer/verifier.py
@@ -0,0 +1,282 @@
+"""
+VerifierAgent — runs a broader verification suite after the fix is applied
+to confirm no regressions were introduced.
+
+Design:
+  Unlike the validator (which re-runs only the originally-failing tool on
+  the originally-failing files), the verifier runs the *full* test suite
+  for the detected stack so we catch regressions in unrelated files.
+
+  Verification profiles per stack:
+    python → pytest (if test dir exists) + ruff check . (full repo)
+    node   → npm test (if package.json has a test script)
+    go     → go test ./...
+    rust   → cargo test
+    unknown → skipped (verdict="skipped")
+
+  Execution:
+    When sandbox_result.container_id is set, each command is executed inside
+    the pre-warmed isolated container via `docker exec`.  The workspace is
+    already at /workspace inside the container.
+    When container_id is empty or sandbox unavailable, falls back to local
+    subprocess (original Phase 2 behaviour — no regression).
+
+  Timeout: settings.sandbox_timeout_seconds (same budget as reproducer).
+
+  The verifier is intentionally conservative:
+    - If the test command is not found → verdict="skipped" (don't block the fix)
+    - If the command times out → verdict="timeout" (non-blocking per step)
+    - If exit_code == 0 → verdict="passed"
+    - If exit_code != 0 → verdict="failed"
+
+  A "skipped" verdict does NOT block the pipeline — the fix proceeds.
+  A "failed" verdict causes ctx.complete("escalated") and blocks commit.
+  A "timeout" verdict is treated the same as "skipped" (conservative).
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+import structlog
+
+from phalanx.ci_fixer.context import VerificationResult
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+    from phalanx.ci_fixer.sandbox import SandboxResult
+
+log = structlog.get_logger(__name__)
+
+# ── Verification profiles ─────────────────────────────────────────────────────
+# Each profile is a list of commands to run in order.
+# Commands are tuples of (label, args_list).
+# All commands must pass for verdict="passed".
+_PROFILES: dict[str, list[tuple[str, list[str]]]] = {
+    "python": [
+        ("ruff_full", ["ruff", "check", "."]),
+    ],
+    "node": [
+        ("npm_test", ["npm", "test", "--if-present"]),
+    ],
+    "go": [
+        ("go_test", ["go", "test", "./..."]),
+    ],
+    "rust": [
+        ("cargo_test", ["cargo", "test"]),
+    ],
+}
+
+
+@dataclass
+class VerificationStep:
+    """Result of a single verification command."""
+
+    label: str
+    cmd: str
+    exit_code: int
+    output: str
+    elapsed_seconds: float
+    timed_out: bool = False
+
+
+class VerifierAgent:
+    """
+    Runs a broad verification sweep after the fix agent completes.
+
+    One instance per pipeline run; no shared state between calls.
+    """
+
+    def _get_profile(self, stack: str) -> list[tuple[str, list[str]]]:
+        """Return the verification command list for the given stack."""
+        return _PROFILES.get(stack, [])
+
+    def _has_pytest(self, workspace_path: Path) -> bool:
+        """True if pytest is available (pyproject.toml or pytest.ini exists)."""
+        return (
+            (workspace_path / "pyproject.toml").exists()
+            or (workspace_path / "pytest.ini").exists()
+            or (workspace_path / "setup.cfg").exists()
+        )
+
+    def _container_id(self, sandbox_result: SandboxResult | None) -> str:
+        """Return container_id from sandbox_result if available, else empty string."""
+        if sandbox_result is None:
+            return ""
+        return getattr(sandbox_result, "container_id", "")
+
+    async def verify(
+        self,
+        workspace_path: Path,
+        stack: str,
+        sandbox_result: SandboxResult | None,
+        timeout_seconds: int = 120,
+    ) -> VerificationResult:
+        """
+        Run the full verification suite for the given stack.
+
+        Args:
+            workspace_path:  Cloned repo root (same dir used by the fix agent).
+            stack:           Tech stack from SandboxProvisioner ('python', etc.).
+            sandbox_result:  Passed for forward-compat; not used in Phase 3.
+            timeout_seconds: Hard ceiling per verification command.
+
+        Returns:
+            VerificationResult with verdict, output, cmd_run.
+        """
+        profile = self._get_profile(stack)
+
+        # Add pytest to python profile only if test infrastructure exists
+        if stack == "python" and self._has_pytest(workspace_path):
+            profile = [
+                ("pytest_full", ["python", "-m", "pytest", "-x", "-q", "--tb=short"])
+            ] + profile
+
+        if not profile:
+            log.info("ci_fixer.verify_skipped", stack=stack, reason="no_profile")
+            return VerificationResult(verdict="skipped", output="", cmd_run="")
+
+        steps: list[VerificationStep] = []
+
+        container_id = self._container_id(sandbox_result)
+
+        for label, cmd_args in profile:
+            step = await self._run_cmd(
+                label=label,
+                cmd_args=cmd_args,
+                cwd=workspace_path,
+                timeout_seconds=timeout_seconds,
+                container_id=container_id,
+            )
+            steps.append(step)
+
+            log.info(
+                "ci_fixer.verify_step",
+                label=label,
+                exit_code=step.exit_code,
+                timed_out=step.timed_out,
+                elapsed=round(step.elapsed_seconds, 2),
+            )
+
+            if step.timed_out:
+                # Timeout is non-blocking — treat as skipped for this step
+                log.warning("ci_fixer.verify_timeout", label=label)
+                continue
+
+            if step.exit_code != 0:
+                combined = "\n".join(s.output for s in steps)
+                log.warning(
+                    "ci_fixer.verify_failed",
+                    label=label,
+                    exit_code=step.exit_code,
+                )
+                return VerificationResult(
+                    verdict="failed",
+                    output=combined[:4000],
+                    cmd_run=" ".join(cmd_args),
+                )
+
+        # All steps passed (or timed out — conservative skip)
+        all_timed_out = all(s.timed_out for s in steps)
+        if all_timed_out:
+            return VerificationResult(
+                verdict="timeout",
+                output="All verification steps timed out",
+                cmd_run="",
+            )
+
+        combined = "\n".join(s.output for s in steps if s.output)
+        cmd_summary = "; ".join(" ".join(cmd) for _, cmd in profile)
+        log.info("ci_fixer.verify_passed", stack=stack, steps=len(steps))
+        return VerificationResult(
+            verdict="passed",
+            output=combined[:4000],
+            cmd_run=cmd_summary,
+        )
+
+    async def _run_cmd(
+        self,
+        label: str,
+        cmd_args: list[str],
+        cwd: Path,
+        timeout_seconds: int,
+        container_id: str = "",
+    ) -> VerificationStep:
+        """
+        Run a single verification command as an async subprocess.
+
+        When container_id is provided, wraps with docker exec so the command
+        runs inside the pre-warmed isolated container at /workspace.
+        When container_id is empty, runs locally (original behaviour).
+
+        Returns a VerificationStep with timed_out=True if timeout is exceeded.
+        """
+        from phalanx.ci_fixer.sandbox_pool import wrap_cmd_for_container
+        from phalanx.config.settings import get_settings as _get_settings
+
+        start = time.monotonic()
+        cmd_str = " ".join(cmd_args)
+
+        if container_id:
+            docker_cmd = _get_settings().sandbox_docker_cmd
+            exec_args = wrap_cmd_for_container(
+                container_id, cmd_args, str(cwd), docker_cmd=docker_cmd
+            )
+        else:
+            exec_args = cmd_args
+
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                *exec_args,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=str(cwd) if not container_id else None,
+            )
+
+            stdout_b, stderr_b = await asyncio.wait_for(
+                proc.communicate(),
+                timeout=timeout_seconds,
+            )
+            elapsed = time.monotonic() - start
+            output = (
+                stdout_b.decode(errors="replace") + "\n" + stderr_b.decode(errors="replace")
+            ).strip()
+
+            return VerificationStep(
+                label=label,
+                cmd=cmd_str,
+                exit_code=proc.returncode or 0,
+                output=output,
+                elapsed_seconds=elapsed,
+            )
+
+        except TimeoutError:
+            elapsed = time.monotonic() - start
+            try:
+                proc.kill()
+                await proc.wait()
+            except Exception:  # noqa: BLE001
+                pass
+            return VerificationStep(
+                label=label,
+                cmd=cmd_str,
+                exit_code=-1,
+                output="",
+                elapsed_seconds=elapsed,
+                timed_out=True,
+            )
+
+        except FileNotFoundError:
+            elapsed = time.monotonic() - start
+            return VerificationStep(
+                label=label,
+                cmd=cmd_str,
+                exit_code=-1,
+                output=f"(tool not found: {cmd_args[0]})",
+                elapsed_seconds=elapsed,
+                timed_out=False,
+            )
diff --git a/phalanx/config/settings.py b/phalanx/config/settings.py
index c272c9eb..8e345cff 100644
--- a/phalanx/config/settings.py
+++ b/phalanx/config/settings.py
@@ -104,6 +104,29 @@ class Settings(BaseSettings):
     phalanx_enable_demo_deploy: bool = True
     # ── CI Webhooks ───────────────────────────────────────────────────────────
     buildkite_webhook_token: str = ""
+    circleci_token: str = ""
+    circleci_webhook_secret: str = ""
+
+    # ── Sandbox / CI Reproduction ─────────────────────────────────────────────
+    # Command used to run containers (swap to "podman" on RHEL/CoreOS hosts).
+    sandbox_docker_cmd: str = "docker"
+    # Maximum seconds the reproducer command may run inside the sandbox.
+    sandbox_timeout_seconds: int = 120
+    # Master switch — set SANDBOX_ENABLED=false in envs where Docker is absent.
+    sandbox_enabled: bool = True
+
+    # ── Sandbox Pool ──────────────────────────────────────────────────────────
+    # Containers to pre-warm per stack at startup (0 = cold-start on demand).
+    sandbox_pool_min_size: int = 1
+    # Max containers that can be simultaneously checked out per stack.
+    sandbox_pool_max_size: int = 2
+    # Seconds to wait for a free pool slot before falling back to local subprocess.
+    sandbox_checkout_timeout_seconds: int = 30
+    # Reaper kills containers held longer than this (should match fix run budget).
+    sandbox_max_hold_seconds: int = 300
+    # How often the reaper background task runs (seconds).
+    sandbox_reaper_interval_seconds: int = 60
+
     # Phase 2: streaming builder — set FORGE_STREAMING_BUILDER=1 to enable.
     # Eliminates the 20K output token ceiling by writing each file as Claude
     # generates it. Safe to enable once validated in simulation.
diff --git a/phalanx/db/models.py b/phalanx/db/models.py
index f81aaf6e..218ce5ff 100644
--- a/phalanx/db/models.py
+++ b/phalanx/db/models.py
@@ -828,6 +828,8 @@ class CIFixRun(Base):
     """False until OutcomeTracker has classified this run's fix outcome (V2)."""
     tool_version_parity_ok: Mapped[bool | None] = mapped_column(Boolean, nullable=True)
     """Phase 4: True when tool version at fix time matches failure-time version (within minor version)."""
+    pipeline_context_json: Mapped[str | None] = mapped_column(Text, nullable=True)
+    """CIFixContext serialized as JSON — full multi-agent pipeline state (Phase 1+)."""
     status: Mapped[str] = mapped_column(String(20), nullable=False, default="PENDING")
     attempt: Mapped[int] = mapped_column(Integer, default=1)
     error: Mapped[str | None] = mapped_column(Text)
diff --git a/pyproject.toml b/pyproject.toml
index 7867519d..f5a090bc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -106,6 +106,14 @@ ignore = ["E501", "B008", "N805"]
 [tool.ruff.lint.isort]
 known-first-party = ["phalanx"]
 
+[tool.ruff.lint.per-file-ignores]
+# Test files — mock class names are PascalCase by convention (N806),
+# nested with statements are intentional for readability (SIM117),
+# deferred imports inside try blocks are expected (F401),
+# duplicate test function names from copy-paste scaffolding (F811),
+# and late imports after helper definitions are acceptable (E402, SIM105).
+"tests/**" = ["N806", "SIM117", "F401", "F811", "E402", "SIM105"]
+
 # ── MyPy ─────────────────────────────────────────────────────────────────────
 [tool.mypy]
 python_version = "3.12"
diff --git a/tests/unit/test_analyst_unit.py b/tests/unit/test_analyst_unit.py
index 84e9233b..d01ae4e5 100644
--- a/tests/unit/test_analyst_unit.py
+++ b/tests/unit/test_analyst_unit.py
@@ -12,19 +12,18 @@
 from __future__ import annotations
 
 import json
-from pathlib import Path
-
-import pytest
+from typing import TYPE_CHECKING
 
 from phalanx.ci_fixer.analyst import (
     FilePatch,
     FileWindow,
-    FixPlan,
     RootCauseAnalyst,
     _is_test_file,
 )
-from phalanx.ci_fixer.log_parser import LintError, ParsedLog, TestFailure, TypeError
+from phalanx.ci_fixer.log_parser import LintError, ParsedLog
 
+if TYPE_CHECKING:
+    from pathlib import Path
 
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
@@ -47,20 +46,25 @@ def _lint_log(file: str, line: int = 1, code: str = "F401") -> ParsedLog:
     )
 
 
-def _patch_json(path: str, start: int, end: int, corrected: list[str],
-                confidence: str = "high") -> str:
-    return json.dumps({
-        "confidence": confidence,
-        "root_cause": "test root cause",
-        "patches": [{
-            "path": path,
-            "start_line": start,
-            "end_line": end,
-            "corrected_lines": corrected,
-            "reason": "test",
-        }],
-        "needs_new_test": False,
-    })
+def _patch_json(
+    path: str, start: int, end: int, corrected: list[str], confidence: str = "high"
+) -> str:
+    return json.dumps(
+        {
+            "confidence": confidence,
+            "root_cause": "test root cause",
+            "patches": [
+                {
+                    "path": path,
+                    "start_line": start,
+                    "end_line": end,
+                    "corrected_lines": corrected,
+                    "reason": "test",
+                }
+            ],
+            "needs_new_test": False,
+        }
+    )
 
 
 # ── FilePatch.delta ────────────────────────────────────────────────────────────
@@ -68,23 +72,19 @@ def _patch_json(path: str, start: int, end: int, corrected: list[str],
 
 class TestFilePatchDelta:
     def test_no_change(self):
-        p = FilePatch(path="f.py", start_line=1, end_line=3,
-                      corrected_lines=["a\n", "b\n", "c\n"])
+        p = FilePatch(path="f.py", start_line=1, end_line=3, corrected_lines=["a\n", "b\n", "c\n"])
         assert p.delta == 0
 
     def test_line_removed(self):
-        p = FilePatch(path="f.py", start_line=1, end_line=3,
-                      corrected_lines=["a\n", "b\n"])
+        p = FilePatch(path="f.py", start_line=1, end_line=3, corrected_lines=["a\n", "b\n"])
         assert p.delta == -1
 
     def test_line_added(self):
-        p = FilePatch(path="f.py", start_line=1, end_line=2,
-                      corrected_lines=["a\n", "b\n", "c\n"])
+        p = FilePatch(path="f.py", start_line=1, end_line=2, corrected_lines=["a\n", "b\n", "c\n"])
         assert p.delta == 1
 
     def test_original_window_size(self):
-        p = FilePatch(path="f.py", start_line=5, end_line=10,
-                      corrected_lines=["x\n"])
+        p = FilePatch(path="f.py", start_line=5, end_line=10, corrected_lines=["x\n"])
         assert p.original_window_size == 6
 
 
@@ -169,7 +169,7 @@ def test_multiple_error_lines_merged_into_one_window(self, tmp_path):
             ],
         )
         windows = analyst._read_windows(tmp_path, parsed)
-        assert len(windows) == 1   # merged, not two separate windows
+        assert len(windows) == 1  # merged, not two separate windows
 
     def test_max_files_respected(self, tmp_path):
         for i in range(6):
@@ -183,7 +183,7 @@ def test_max_files_respected(self, tmp_path):
         )
         analyst = _make_analyst("{}")
         windows = analyst._read_windows(tmp_path, parsed)
-        assert len(windows) <= 4   # _MAX_FILES = 4
+        assert len(windows) <= 4  # _MAX_FILES = 4
 
 
 # ── _parse_and_validate_patches ────────────────────────────────────────────────
@@ -203,31 +203,59 @@ def _analyst(self) -> RootCauseAnalyst:
 
     def test_valid_patch_accepted(self):
         w = self._window("src/foo.py", 1, 5, 5)
-        raw = [{"path": "src/foo.py", "start_line": 1, "end_line": 5,
-                "corrected_lines": ["a\n", "b\n", "c\n", "d\n"], "reason": "ok"}]
+        raw = [
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": 5,
+                "corrected_lines": ["a\n", "b\n", "c\n", "d\n"],
+                "reason": "ok",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         assert len(patches) == 1
         assert patches[0].delta == -1
 
     def test_unknown_file_rejected(self):
         w = self._window("src/foo.py", 1, 5, 5)
-        raw = [{"path": "src/bar.py", "start_line": 1, "end_line": 5,
-                "corrected_lines": ["x\n"], "reason": "bad"}]
+        raw = [
+            {
+                "path": "src/bar.py",
+                "start_line": 1,
+                "end_line": 5,
+                "corrected_lines": ["x\n"],
+                "reason": "bad",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         assert patches == []
 
     def test_test_file_rejected(self):
         w = self._window("tests/test_foo.py", 1, 5, 5)
-        raw = [{"path": "tests/test_foo.py", "start_line": 1, "end_line": 5,
-                "corrected_lines": ["x\n"], "reason": "bad"}]
+        raw = [
+            {
+                "path": "tests/test_foo.py",
+                "start_line": 1,
+                "end_line": 5,
+                "corrected_lines": ["x\n"],
+                "reason": "bad",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         assert patches == []
 
     def test_delta_too_large_rejected(self):
         w = self._window("src/foo.py", 1, 5, 5)
         big = [f"line {i}\n" for i in range(50)]
-        raw = [{"path": "src/foo.py", "start_line": 1, "end_line": 5,
-                "corrected_lines": big, "reason": "too big"}]
+        raw = [
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": 5,
+                "corrected_lines": big,
+                "reason": "too big",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         assert patches == []
 
@@ -239,15 +267,29 @@ def test_missing_line_range_rejected(self):
 
     def test_empty_corrected_lines_rejected(self):
         w = self._window("src/foo.py", 1, 5, 5)
-        raw = [{"path": "src/foo.py", "start_line": 1, "end_line": 5,
-                "corrected_lines": [], "reason": "empty"}]
+        raw = [
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": 5,
+                "corrected_lines": [],
+                "reason": "empty",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         assert patches == []
 
     def test_lines_without_newline_get_newline_appended(self):
         w = self._window("src/foo.py", 1, 3, 3)
-        raw = [{"path": "src/foo.py", "start_line": 1, "end_line": 3,
-                "corrected_lines": ["no newline", "also no newline"], "reason": "ok"}]
+        raw = [
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": 3,
+                "corrected_lines": ["no newline", "also no newline"],
+                "reason": "ok",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         assert len(patches) == 1
         assert all(line.endswith("\n") for line in patches[0].corrected_lines)
@@ -255,8 +297,15 @@ def test_lines_without_newline_get_newline_appended(self):
     def test_line_range_within_tolerance_accepted(self):
         """start/end off by ≤2 lines → not rejected; LLM values passed through."""
         w = self._window("src/foo.py", 1, 5, 5)
-        raw = [{"path": "src/foo.py", "start_line": 2, "end_line": 6,  # off by 1
-                "corrected_lines": ["a\n", "b\n"], "reason": "off by one"}]
+        raw = [
+            {
+                "path": "src/foo.py",
+                "start_line": 2,
+                "end_line": 6,  # off by 1
+                "corrected_lines": ["a\n", "b\n"],
+                "reason": "off by one",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         # Accepted — within tolerance (off by 1 ≤ 2)
         assert len(patches) == 1
@@ -267,8 +316,15 @@ def test_line_range_within_tolerance_accepted(self):
     def test_line_range_beyond_tolerance_clamped(self):
         """start/end off by >2 lines → clamped to window bounds."""
         w = self._window("src/foo.py", 1, 5, 5)
-        raw = [{"path": "src/foo.py", "start_line": 10, "end_line": 20,  # way off
-                "corrected_lines": ["a\n", "b\n"], "reason": "way off"}]
+        raw = [
+            {
+                "path": "src/foo.py",
+                "start_line": 10,
+                "end_line": 20,  # way off
+                "corrected_lines": ["a\n", "b\n"],
+                "reason": "way off",
+            }
+        ]
         patches = self._analyst()._parse_and_validate_patches(raw, [w])
         # Clamped to window bounds (1..5)
         assert len(patches) == 1
@@ -284,7 +340,7 @@ class TestAnalyzeIntegration:
 
     def test_high_confidence_fix_applied(self, tmp_path):
         _write(tmp_path, "src/foo.py", self._FILE)
-        corrected = self._FILE[1:]   # remove "import os\n"
+        corrected = self._FILE[1:]  # remove "import os\n"
         response = _patch_json("src/foo.py", 1, len(self._FILE), corrected)
         analyst = _make_analyst(response)
         plan = analyst.analyze(_lint_log("src/foo.py"), tmp_path)
diff --git a/tests/unit/test_ci_fix_context.py b/tests/unit/test_ci_fix_context.py
new file mode 100644
index 00000000..f10fc099
--- /dev/null
+++ b/tests/unit/test_ci_fix_context.py
@@ -0,0 +1,391 @@
+"""
+Tests for phalanx.ci_fixer.context — CIFixContext shared pipeline state.
+
+Coverage targets:
+  - CIFixContext: init, to_dict, from_dict, complete, is_complete, current_stage
+  - StructuredFailure, ClassifiedFailure, ReproductionResult, VerifiedPatch, VerificationResult
+  - Serialization round-trip fidelity
+  - Edge cases: None fields, empty lists, partial population
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from phalanx.ci_fixer.context import (
+    CIFixContext,
+    ClassifiedFailure,
+    ReproductionResult,
+    StructuredFailure,
+    VerificationResult,
+    VerifiedPatch,
+)
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+
+def _make_ctx(**kwargs) -> CIFixContext:
+    defaults = {
+        "ci_fix_run_id": "run-123",
+        "repo": "owner/repo",
+        "branch": "feature/foo",
+        "commit_sha": "abc123",
+        "original_build_id": "build-456",
+    }
+    defaults.update(kwargs)
+    return CIFixContext(**defaults)
+
+
+# ── CIFixContext basics ───────────────────────────────────────────────────────
+
+
+def test_context_init_defaults():
+    ctx = _make_ctx()
+    assert ctx.ci_fix_run_id == "run-123"
+    assert ctx.repo == "owner/repo"
+    assert ctx.branch == "feature/foo"
+    assert ctx.commit_sha == "abc123"
+    assert ctx.original_build_id == "build-456"
+    assert ctx.structured_failure is None
+    assert ctx.classified_failure is None
+    assert ctx.reproduction_result is None
+    assert ctx.verified_patch is None
+    assert ctx.verification_result is None
+    assert ctx.fix_commit_sha is None
+    assert ctx.fix_pr_number is None
+    assert ctx.fix_branch is None
+    assert ctx.pr_was_existing is False
+    assert ctx.final_status == "in_progress"
+    assert ctx.pr_comment_posted is False
+    assert ctx.error is None
+    assert ctx.started_at is not None
+
+
+def test_context_is_complete_initial():
+    ctx = _make_ctx()
+    assert ctx.is_complete is False
+
+
+def test_context_complete_fixed():
+    ctx = _make_ctx()
+    ctx.complete("fixed")
+    assert ctx.is_complete is True
+    assert ctx.final_status == "fixed"
+    assert ctx.completed_at is not None
+    assert ctx.error is None
+
+
+def test_context_complete_failed_with_error():
+    ctx = _make_ctx()
+    ctx.complete("failed", error="something went wrong")
+    assert ctx.final_status == "failed"
+    assert ctx.error == "something went wrong"
+
+
+def test_context_complete_escalated():
+    ctx = _make_ctx()
+    ctx.complete("escalated")
+    assert ctx.final_status == "escalated"
+    assert ctx.is_complete is True
+
+
+def test_context_complete_flaky():
+    ctx = _make_ctx()
+    ctx.complete("flaky")
+    assert ctx.final_status == "flaky"
+
+
+def test_context_complete_env_mismatch():
+    ctx = _make_ctx()
+    ctx.complete("env_mismatch")
+    assert ctx.final_status == "env_mismatch"
+
+
+# ── current_stage property ────────────────────────────────────────────────────
+
+
+def test_current_stage_started():
+    ctx = _make_ctx()
+    assert ctx.current_stage == "started"
+
+
+def test_current_stage_parsed():
+    ctx = _make_ctx()
+    ctx.structured_failure = StructuredFailure(
+        tool="ruff", failure_type="lint", reproducer_cmd="ruff check ."
+    )
+    assert ctx.current_stage == "parsed"
+
+
+def test_current_stage_classified():
+    ctx = _make_ctx()
+    ctx.structured_failure = StructuredFailure(
+        tool="ruff", failure_type="lint", reproducer_cmd="ruff check ."
+    )
+    ctx.classified_failure = ClassifiedFailure(
+        tier="L1_auto", root_cause="unused import", stack="python"
+    )
+    assert ctx.current_stage == "classified"
+
+
+def test_current_stage_sandbox_ready():
+    ctx = _make_ctx()
+    ctx.structured_failure = StructuredFailure(
+        tool="ruff", failure_type="lint", reproducer_cmd="ruff check ."
+    )
+    ctx.classified_failure = ClassifiedFailure(
+        tier="L1_auto", root_cause="unused import", stack="python"
+    )
+    ctx.sandbox_id = "container-abc"
+    assert ctx.current_stage == "sandbox_ready"
+
+
+def test_current_stage_reproduced():
+    ctx = _make_ctx()
+    ctx.reproduction_result = ReproductionResult(verdict="confirmed")
+    assert ctx.current_stage == "reproduced"
+
+
+def test_current_stage_patched():
+    ctx = _make_ctx()
+    ctx.reproduction_result = ReproductionResult(verdict="confirmed")
+    ctx.verified_patch = VerifiedPatch(files_modified=["src/foo.py"], success=True)
+    assert ctx.current_stage == "patched"
+
+
+def test_current_stage_verified():
+    ctx = _make_ctx()
+    ctx.reproduction_result = ReproductionResult(verdict="confirmed")
+    ctx.verified_patch = VerifiedPatch(files_modified=["src/foo.py"], success=True)
+    ctx.verification_result = VerificationResult(verdict="passed")
+    assert ctx.current_stage == "verified"
+
+
+def test_current_stage_committed():
+    ctx = _make_ctx()
+    ctx.fix_commit_sha = "deadbeef"
+    assert ctx.current_stage == "committed"
+
+
+# ── Serialization round-trip ──────────────────────────────────────────────────
+
+
+def test_to_dict_minimal():
+    ctx = _make_ctx()
+    d = ctx.to_dict()
+    assert d["ci_fix_run_id"] == "run-123"
+    assert d["repo"] == "owner/repo"
+    assert d["structured_failure"] is None
+    assert d["final_status"] == "in_progress"
+
+
+def test_to_dict_with_all_agents_populated():
+    ctx = _make_ctx()
+    ctx.structured_failure = StructuredFailure(
+        tool="ruff",
+        failure_type="lint",
+        reproducer_cmd="ruff check .",
+        errors=[{"file": "foo.py", "line": 1, "code": "F401"}],
+        failing_files=["foo.py"],
+        log_excerpt="foo.py:1:1: F401 ...",
+        confidence=0.95,
+    )
+    ctx.classified_failure = ClassifiedFailure(
+        tier="L1_auto",
+        root_cause="unused import",
+        stack="python",
+        confidence=0.9,
+    )
+    ctx.reproduction_result = ReproductionResult(
+        verdict="confirmed",
+        exit_code=1,
+        output="F401 ...",
+        reproducer_cmd="ruff check .",
+    )
+    ctx.verified_patch = VerifiedPatch(
+        files_modified=["foo.py"],
+        validation_cmd="ruff check foo.py",
+        validation_output="All checks passed!",
+        success=True,
+        turns_used=3,
+    )
+    ctx.verification_result = VerificationResult(
+        verdict="passed",
+        output="pytest passed",
+        cmd_run="pytest tests/",
+    )
+    ctx.fix_commit_sha = "abc123"
+    ctx.fix_pr_number = 42
+    ctx.fix_branch = "phalanx/ci-fix/run-123"
+    ctx.complete("fixed")
+
+    d = ctx.to_dict()
+    assert d["structured_failure"]["tool"] == "ruff"
+    assert d["classified_failure"]["tier"] == "L1_auto"
+    assert d["reproduction_result"]["verdict"] == "confirmed"
+    assert d["verified_patch"]["success"] is True
+    assert d["verification_result"]["verdict"] == "passed"
+    assert d["fix_commit_sha"] == "abc123"
+    assert d["fix_pr_number"] == 42
+    assert d["final_status"] == "fixed"
+
+
+def test_from_dict_round_trip_minimal():
+    ctx = _make_ctx()
+    d = ctx.to_dict()
+    restored = CIFixContext.from_dict(d)
+    assert restored.ci_fix_run_id == ctx.ci_fix_run_id
+    assert restored.repo == ctx.repo
+    assert restored.structured_failure is None
+    assert restored.final_status == "in_progress"
+
+
+def test_from_dict_round_trip_full():
+    ctx = _make_ctx()
+    ctx.structured_failure = StructuredFailure(
+        tool="mypy", failure_type="type_error", reproducer_cmd="mypy ."
+    )
+    ctx.classified_failure = ClassifiedFailure(
+        tier="L1_auto", root_cause="type mismatch", stack="python"
+    )
+    ctx.reproduction_result = ReproductionResult(verdict="skipped")
+    ctx.verified_patch = VerifiedPatch(files_modified=["src/types.py"], success=True)
+    ctx.verification_result = VerificationResult(verdict="skipped")
+    ctx.fix_commit_sha = "sha456"
+    ctx.fix_pr_number = 7
+    ctx.fix_pr_url = "https://github.com/owner/repo/pull/7"
+    ctx.fix_branch = "phalanx/ci-fix/run-123"
+    ctx.pr_was_existing = True
+    ctx.complete("fixed")
+
+    d = ctx.to_dict()
+    restored = CIFixContext.from_dict(d)
+
+    assert restored.structured_failure.tool == "mypy"
+    assert restored.classified_failure.tier == "L1_auto"
+    assert restored.reproduction_result.verdict == "skipped"
+    assert restored.verified_patch.success is True
+    assert restored.verification_result.verdict == "skipped"
+    assert restored.fix_commit_sha == "sha456"
+    assert restored.fix_pr_number == 7
+    assert restored.fix_pr_url == "https://github.com/owner/repo/pull/7"
+    assert restored.pr_was_existing is True
+    assert restored.final_status == "fixed"
+    assert restored.is_complete is True
+
+
+def test_json_serializable():
+    ctx = _make_ctx()
+    ctx.structured_failure = StructuredFailure(
+        tool="ruff", failure_type="lint", reproducer_cmd="ruff check ."
+    )
+    ctx.complete("fixed")
+    # Must not raise
+    serialized = json.dumps(ctx.to_dict())
+    restored = CIFixContext.from_dict(json.loads(serialized))
+    assert restored.final_status == "fixed"
+
+
+def test_from_dict_missing_optional_fields():
+    """from_dict should handle dicts missing optional fields gracefully."""
+    d = {
+        "ci_fix_run_id": "run-xyz",
+        "repo": "owner/repo",
+        "branch": "main",
+        "commit_sha": "abc",
+        "original_build_id": "build-1",
+    }
+    ctx = CIFixContext.from_dict(d)
+    assert ctx.ci_fix_run_id == "run-xyz"
+    assert ctx.structured_failure is None
+    assert ctx.fix_pr_number is None
+    assert ctx.final_status == "in_progress"
+    assert ctx.pr_was_existing is False
+
+
+# ── Sub-object tests ──────────────────────────────────────────────────────────
+
+
+def test_structured_failure_defaults():
+    sf = StructuredFailure(tool="ruff", failure_type="lint", reproducer_cmd="ruff check .")
+    assert sf.errors == []
+    assert sf.failing_files == []
+    assert sf.log_excerpt == ""
+    assert sf.confidence == 1.0
+
+
+def test_classified_failure_l2_escalate():
+    cf = ClassifiedFailure(
+        tier="L2_escalate",
+        root_cause="test regression",
+        stack="python",
+        confidence=0.3,
+        escalation_reason="test failure requires engineer judgment",
+    )
+    assert cf.tier == "L2_escalate"
+    assert cf.escalation_reason == "test failure requires engineer judgment"
+
+
+def test_reproduction_result_all_verdicts():
+    for verdict in ("confirmed", "flaky", "env_mismatch", "timeout", "skipped"):
+        r = ReproductionResult(verdict=verdict)
+        assert r.verdict == verdict
+
+
+def test_verified_patch_defaults():
+    vp = VerifiedPatch()
+    assert vp.files_modified == []
+    assert vp.success is False
+    assert vp.turns_used == 0
+
+
+def test_verification_result_all_verdicts():
+    for verdict in ("passed", "failed", "skipped", "timeout"):
+        vr = VerificationResult(verdict=verdict)
+        assert vr.verdict == verdict
+
+
+# ── Edge cases ────────────────────────────────────────────────────────────────
+
+
+def test_context_pr_was_existing_default_false():
+    ctx = _make_ctx()
+    assert ctx.pr_was_existing is False
+
+
+def test_context_set_pr_was_existing():
+    ctx = _make_ctx()
+    ctx.pr_was_existing = True
+    d = ctx.to_dict()
+    assert d["pr_was_existing"] is True
+    restored = CIFixContext.from_dict(d)
+    assert restored.pr_was_existing is True
+
+
+def test_context_sandbox_fields():
+    ctx = _make_ctx()
+    ctx.sandbox_id = "container-xyz"
+    ctx.sandbox_stack = "python"
+    d = ctx.to_dict()
+    restored = CIFixContext.from_dict(d)
+    assert restored.sandbox_id == "container-xyz"
+    assert restored.sandbox_stack == "python"
+
+
+def test_context_error_persists_through_round_trip():
+    ctx = _make_ctx()
+    ctx.complete("failed", error="ruff not found in sandbox")
+    d = ctx.to_dict()
+    restored = CIFixContext.from_dict(d)
+    assert restored.error == "ruff not found in sandbox"
+
+
+def test_context_started_at_is_set():
+    ctx = _make_ctx()
+    assert ctx.started_at
+    # Should be a valid ISO datetime string
+    from datetime import datetime
+
+    datetime.fromisoformat(ctx.started_at)
diff --git a/tests/unit/test_ci_fix_runs_api.py b/tests/unit/test_ci_fix_runs_api.py
new file mode 100644
index 00000000..6b215497
--- /dev/null
+++ b/tests/unit/test_ci_fix_runs_api.py
@@ -0,0 +1,478 @@
+"""
+Tests for phalanx.api.routes.ci_fix_runs — CI fix run context API.
+
+Coverage targets:
+  - GET /v1/ci-fix-runs/{run_id}/context — found, not found, no context, parse error
+  - GET /v1/ci-fix-runs/{run_id}         — found, not found
+  - GET /v1/ci-fix-runs                  — list, filters
+  - _find_existing_fix_pr                — found, not found, error handling
+"""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from httpx import AsyncClient
+
+from phalanx.ci_fixer.context import CIFixContext, StructuredFailure
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+
+def _make_ci_run(
+    run_id="run-abc",
+    repo="owner/repo",
+    branch="main",
+    commit_sha="abc123",
+    build_id="build-1",
+    status="FIXED",
+    pipeline_context_json=None,
+    fix_pr_number=None,
+    fix_branch=None,
+    fix_commit_sha=None,
+    fingerprint_hash=None,
+    error=None,
+):
+    run = MagicMock()
+    run.id = run_id
+    run.repo_full_name = repo
+    run.branch = branch
+    run.commit_sha = commit_sha
+    run.ci_build_id = build_id
+    run.ci_provider = "github_actions"
+    run.status = status
+    run.pipeline_context_json = pipeline_context_json
+    run.fix_pr_number = fix_pr_number
+    run.fix_branch = fix_branch
+    run.fix_commit_sha = fix_commit_sha
+    run.fingerprint_hash = fingerprint_hash
+    run.error = error
+    run.created_at = MagicMock()
+    run.created_at.isoformat.return_value = "2026-04-15T12:00:00+00:00"
+    run.completed_at = None
+    return run
+
+
+def _make_context_json(run_id="run-abc") -> str:
+    ctx = CIFixContext(
+        ci_fix_run_id=run_id,
+        repo="owner/repo",
+        branch="main",
+        commit_sha="abc123",
+        original_build_id="build-1",
+    )
+    ctx.structured_failure = StructuredFailure(
+        tool="ruff", failure_type="lint", reproducer_cmd="ruff check ."
+    )
+    ctx.complete("fixed")
+    return json.dumps(ctx.to_dict())
+
+
+# ── GET /v1/ci-fix-runs/{run_id}/context ─────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_get_context_not_found():
+    from phalanx.api.routes.ci_fix_runs import get_fix_run_context
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = None
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        from fastapi import HTTPException
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_fix_run_context("nonexistent")
+        assert exc_info.value.status_code == 404
+
+
+@pytest.mark.asyncio
+async def test_get_context_no_pipeline_json():
+    """Run exists but has no pipeline_context_json (pre-Phase 1 run)."""
+    from phalanx.api.routes.ci_fix_runs import get_fix_run_context
+
+    ci_run = _make_ci_run(pipeline_context_json=None, status="FIXED")
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = ci_run
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await get_fix_run_context("run-abc")
+
+    assert result["ci_fix_run_id"] == "run-abc"
+    assert result["final_status"] == "unknown"
+    assert "_note" in result
+    assert result["current_stage"] == "unknown"
+
+
+@pytest.mark.asyncio
+async def test_get_context_with_pipeline_json():
+    """Run has pipeline_context_json — returns full parsed context."""
+    from phalanx.api.routes.ci_fix_runs import get_fix_run_context
+
+    ctx_json = _make_context_json("run-abc")
+    ci_run = _make_ci_run(pipeline_context_json=ctx_json, status="FIXED")
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = ci_run
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await get_fix_run_context("run-abc")
+
+    assert result["ci_fix_run_id"] == "run-abc"
+    assert result["final_status"] == "fixed"
+    assert result["current_stage"] in ("parsed", "committed", "patched", "classified", "started")
+    assert result["structured_failure"]["tool"] == "ruff"
+
+
+@pytest.mark.asyncio
+async def test_get_context_invalid_json():
+    """pipeline_context_json is corrupt — returns 500."""
+    from phalanx.api.routes.ci_fix_runs import get_fix_run_context
+
+    ci_run = _make_ci_run(pipeline_context_json="not valid json{{{", status="FIXED")
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = ci_run
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        from fastapi import HTTPException
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_fix_run_context("run-abc")
+        assert exc_info.value.status_code == 500
+
+
+# ── GET /v1/ci-fix-runs/{run_id} ─────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_get_fix_run_found():
+    from phalanx.api.routes.ci_fix_runs import get_fix_run
+
+    ci_run = _make_ci_run(fix_pr_number=7, fix_branch="phalanx/ci-fix/run-abc")
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = ci_run
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await get_fix_run("run-abc")
+
+    assert result["id"] == "run-abc"
+    assert result["fix_pr_number"] == 7
+    assert result["fix_branch"] == "phalanx/ci-fix/run-abc"
+    assert result["has_context"] is False
+
+
+@pytest.mark.asyncio
+async def test_get_fix_run_not_found():
+    from phalanx.api.routes.ci_fix_runs import get_fix_run
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = None
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        from fastapi import HTTPException
+
+        with pytest.raises(HTTPException) as exc_info:
+            await get_fix_run("nonexistent")
+        assert exc_info.value.status_code == 404
+
+
+@pytest.mark.asyncio
+async def test_get_fix_run_has_context_true():
+    from phalanx.api.routes.ci_fix_runs import get_fix_run
+
+    ctx_json = _make_context_json("run-abc")
+    ci_run = _make_ci_run(pipeline_context_json=ctx_json)
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalar_one_or_none.return_value = ci_run
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await get_fix_run("run-abc")
+
+    assert result["has_context"] is True
+
+
+# ── GET /v1/ci-fix-runs (list) ────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_list_fix_runs_empty():
+    from phalanx.api.routes.ci_fix_runs import list_fix_runs
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalars.return_value.all.return_value = []
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await list_fix_runs(limit=20, run_status=None)
+
+    assert result["runs"] == []
+    assert result["count"] == 0
+
+
+@pytest.mark.asyncio
+async def test_list_fix_runs_with_results():
+    from phalanx.api.routes.ci_fix_runs import list_fix_runs
+
+    runs = [
+        _make_ci_run(run_id="run-1", status="FIXED"),
+        _make_ci_run(run_id="run-2", status="FAILED", error="no_structured_errors"),
+    ]
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalars.return_value.all.return_value = runs
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await list_fix_runs(limit=20, run_status=None)
+
+    assert result["count"] == 2
+    assert result["runs"][0]["id"] == "run-1"
+    assert result["runs"][1]["error"] == "no_structured_errors"
+
+
+@pytest.mark.asyncio
+async def test_list_fix_runs_filters_applied():
+    """Filters are passed through — just test the query builds without error."""
+    from phalanx.api.routes.ci_fix_runs import list_fix_runs
+
+    mock_session = AsyncMock()
+    mock_result = MagicMock()
+    mock_result.scalars.return_value.all.return_value = []
+    mock_session.execute = AsyncMock(return_value=mock_result)
+
+    mock_ctx_manager = AsyncMock()
+    mock_ctx_manager.__aenter__ = AsyncMock(return_value=mock_session)
+    mock_ctx_manager.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("phalanx.api.routes.ci_fix_runs.get_db", return_value=mock_ctx_manager):
+        result = await list_fix_runs(repo="owner/repo", branch="main", run_status="FIXED", limit=5)
+
+    assert result["count"] == 0
+
+
+# ── _find_existing_fix_pr ─────────────────────────────────────────────────────
+
+
+@pytest.mark.asyncio
+async def test_find_existing_fix_pr_found():
+    """Returns PR number when an open phalanx/ci-fix/* PR exists."""
+    from phalanx.agents.ci_fixer import CIFixerAgent
+
+    agent = CIFixerAgent.__new__(CIFixerAgent)
+    agent._log = MagicMock()
+    agent._log.info = MagicMock()
+    agent._log.warning = MagicMock()
+
+    integration = MagicMock()
+    integration.github_token = "ghp_test"
+
+    ci_run = MagicMock()
+    ci_run.repo_full_name = "owner/repo"
+    ci_run.branch = "feature/foo"
+
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {
+            "number": 42,
+            "head": {"ref": "phalanx/ci-fix/old-run-id"},
+        }
+    ]
+
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(return_value=mock_response)
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("httpx.AsyncClient", return_value=mock_client):
+        result = await agent._find_existing_fix_pr(integration, ci_run)
+
+    assert result == 42
+    agent._log.info.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_find_existing_fix_pr_not_found():
+    """Returns None when no phalanx/ci-fix/* PR exists."""
+    from phalanx.agents.ci_fixer import CIFixerAgent
+
+    agent = CIFixerAgent.__new__(CIFixerAgent)
+    agent._log = MagicMock()
+    agent._log.info = MagicMock()
+    agent._log.warning = MagicMock()
+
+    integration = MagicMock()
+    integration.github_token = "ghp_test"
+
+    ci_run = MagicMock()
+    ci_run.repo_full_name = "owner/repo"
+    ci_run.branch = "feature/foo"
+
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {
+            "number": 5,
+            "head": {"ref": "feature/some-other-fix"},  # not a phalanx fix branch
+        }
+    ]
+
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(return_value=mock_response)
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("httpx.AsyncClient", return_value=mock_client):
+        result = await agent._find_existing_fix_pr(integration, ci_run)
+
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_find_existing_fix_pr_api_error():
+    """Returns None on HTTP error — does not raise."""
+    from phalanx.agents.ci_fixer import CIFixerAgent
+
+    agent = CIFixerAgent.__new__(CIFixerAgent)
+    agent._log = MagicMock()
+    agent._log.info = MagicMock()
+    agent._log.warning = MagicMock()
+
+    integration = MagicMock()
+    integration.github_token = "ghp_test"
+
+    ci_run = MagicMock()
+    ci_run.repo_full_name = "owner/repo"
+    ci_run.branch = "feature/foo"
+
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(side_effect=Exception("network error"))
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("httpx.AsyncClient", return_value=mock_client):
+        result = await agent._find_existing_fix_pr(integration, ci_run)
+
+    assert result is None
+    agent._log.warning.assert_called_once()
+
+
+@pytest.mark.asyncio
+async def test_find_existing_fix_pr_non_200():
+    """Returns None when GitHub API returns non-200."""
+    from phalanx.agents.ci_fixer import CIFixerAgent
+
+    agent = CIFixerAgent.__new__(CIFixerAgent)
+    agent._log = MagicMock()
+    agent._log.info = MagicMock()
+    agent._log.warning = MagicMock()
+
+    integration = MagicMock()
+    integration.github_token = "ghp_test"
+
+    ci_run = MagicMock()
+    ci_run.repo_full_name = "owner/repo"
+    ci_run.branch = "feature/foo"
+
+    mock_response = MagicMock()
+    mock_response.status_code = 401
+
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(return_value=mock_response)
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("httpx.AsyncClient", return_value=mock_client):
+        result = await agent._find_existing_fix_pr(integration, ci_run)
+
+    assert result is None
+
+
+@pytest.mark.asyncio
+async def test_find_existing_fix_pr_empty_list():
+    """Returns None when PR list is empty."""
+    from phalanx.agents.ci_fixer import CIFixerAgent
+
+    agent = CIFixerAgent.__new__(CIFixerAgent)
+    agent._log = MagicMock()
+    agent._log.info = MagicMock()
+    agent._log.warning = MagicMock()
+
+    integration = MagicMock()
+    integration.github_token = "ghp_test"
+
+    ci_run = MagicMock()
+    ci_run.repo_full_name = "owner/repo"
+    ci_run.branch = "feature/foo"
+
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = []
+
+    mock_client = AsyncMock()
+    mock_client.get = AsyncMock(return_value=mock_response)
+    mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+    mock_client.__aexit__ = AsyncMock(return_value=None)
+
+    with patch("httpx.AsyncClient", return_value=mock_client):
+        result = await agent._find_existing_fix_pr(integration, ci_run)
+
+    assert result is None
diff --git a/tests/unit/test_ci_fixer_agent_helpers.py b/tests/unit/test_ci_fixer_agent_helpers.py
index a815e185..0b0d4200 100644
--- a/tests/unit/test_ci_fixer_agent_helpers.py
+++ b/tests/unit/test_ci_fixer_agent_helpers.py
@@ -7,11 +7,9 @@
 
 from __future__ import annotations
 
-from pathlib import Path
+from typing import TYPE_CHECKING
 from unittest.mock import MagicMock, patch
 
-import pytest
-
 from phalanx.agents.ci_fixer import (
     CIFixerAgent,
     _cleanup_workspace,
@@ -21,6 +19,8 @@
 from phalanx.ci_fixer.analyst import FilePatch
 from phalanx.ci_fixer.log_parser import LintError, ParsedLog, TestFailure, TypeError
 
+if TYPE_CHECKING:
+    from pathlib import Path
 
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
@@ -34,8 +34,9 @@ def _make_agent() -> CIFixerAgent:
         return agent
 
 
-def _lint_parsed(file: str = "phalanx/foo.py", code: str = "F401",
-                 msg: str = "unused import 'os'") -> ParsedLog:
+def _lint_parsed(
+    file: str = "phalanx/foo.py", code: str = "F401", msg: str = "unused import 'os'"
+) -> ParsedLog:
     return ParsedLog(
         tool="ruff",
         lint_errors=[LintError(file=file, line=5, col=1, code=code, message=msg)],
@@ -61,12 +62,22 @@ def test_returns_16_char_hex(self):
 
     def test_same_error_class_same_hash(self):
         # Different line numbers → same hash (lines stripped)
-        p1 = ParsedLog(tool="ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=3, col=1, code="F401", message="unused import 'os'"),
-        ])
-        p2 = ParsedLog(tool="ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=99, col=1, code="F401", message="unused import 'os'"),
-        ])
+        p1 = ParsedLog(
+            tool="ruff",
+            lint_errors=[
+                LintError(
+                    file="phalanx/foo.py", line=3, col=1, code="F401", message="unused import 'os'"
+                ),
+            ],
+        )
+        p2 = ParsedLog(
+            tool="ruff",
+            lint_errors=[
+                LintError(
+                    file="phalanx/foo.py", line=99, col=1, code="F401", message="unused import 'os'"
+                ),
+            ],
+        )
         assert _compute_fingerprint(p1) == _compute_fingerprint(p2)
 
     def test_different_error_code_different_hash(self):
@@ -90,11 +101,13 @@ def test_type_error_included(self):
     def test_test_failure_included(self):
         parsed = ParsedLog(
             tool="pytest",
-            test_failures=[TestFailure(
-                test_id="tests/unit/test_foo.py::test_bar",
-                file="tests/unit/test_foo.py",
-                message="AssertionError",
-            )],
+            test_failures=[
+                TestFailure(
+                    test_id="tests/unit/test_foo.py::test_bar",
+                    file="tests/unit/test_foo.py",
+                    message="AssertionError",
+                )
+            ],
         )
         h = _compute_fingerprint(parsed)
         assert len(h) == 16
@@ -106,14 +119,26 @@ def test_empty_log_has_stable_hash(self):
 
     def test_parametrized_tests_normalized(self):
         """test[param1] and test[param2] should yield same fingerprint."""
-        p1 = ParsedLog(tool="pytest", test_failures=[
-            TestFailure(test_id="tests/test_foo.py::test_bar[case1]",
-                        file="tests/test_foo.py", message=""),
-        ])
-        p2 = ParsedLog(tool="pytest", test_failures=[
-            TestFailure(test_id="tests/test_foo.py::test_bar[case2]",
-                        file="tests/test_foo.py", message=""),
-        ])
+        p1 = ParsedLog(
+            tool="pytest",
+            test_failures=[
+                TestFailure(
+                    test_id="tests/test_foo.py::test_bar[case1]",
+                    file="tests/test_foo.py",
+                    message="",
+                ),
+            ],
+        )
+        p2 = ParsedLog(
+            tool="pytest",
+            test_failures=[
+                TestFailure(
+                    test_id="tests/test_foo.py::test_bar[case2]",
+                    file="tests/test_foo.py",
+                    message="",
+                ),
+            ],
+        )
         assert _compute_fingerprint(p1) == _compute_fingerprint(p2)
 
     def test_numbers_in_messages_normalized(self):
@@ -144,11 +169,13 @@ def test_type_errors_formatted(self):
     def test_test_failures_formatted(self):
         parsed = ParsedLog(
             tool="pytest",
-            test_failures=[TestFailure(
-                test_id="tests/unit/test_foo.py::test_bar",
-                file="tests/unit/test_foo.py",
-                message="",
-            )],
+            test_failures=[
+                TestFailure(
+                    test_id="tests/unit/test_foo.py::test_bar",
+                    file="tests/unit/test_foo.py",
+                    message="",
+                )
+            ],
         )
         result = _format_error_detail(parsed)
         assert "test_bar" in result
@@ -225,8 +252,7 @@ def test_applies_line_range_replacement(self, tmp_path):
         assert "import os" not in result
 
     def test_missing_file_skipped(self, tmp_path):
-        patch = FilePatch(path="src/missing.py", start_line=1, end_line=1,
-                          corrected_lines=["x\n"])
+        patch = FilePatch(path="src/missing.py", start_line=1, end_line=1, corrected_lines=["x\n"])
         agent = self._agent()
         written = agent._apply_patches(tmp_path, [patch])
         assert written == []
@@ -234,8 +260,7 @@ def test_missing_file_skipped(self, tmp_path):
     def test_bounds_out_of_range_skipped(self, tmp_path):
         lines = ["a\n", "b\n"]
         _write(tmp_path, "src/foo.py", lines)
-        patch = FilePatch(path="src/foo.py", start_line=5, end_line=10,
-                          corrected_lines=["x\n"])
+        patch = FilePatch(path="src/foo.py", start_line=5, end_line=10, corrected_lines=["x\n"])
         agent = self._agent()
         written = agent._apply_patches(tmp_path, [patch])
         assert written == []
@@ -245,8 +270,7 @@ def test_delta_too_large_skipped(self, tmp_path):
         _write(tmp_path, "src/foo.py", lines)
         # Add 35 lines — exceeds _MAX_TOTAL_LINE_DELTA=30
         huge = [f"added {i}\n" for i in range(35)]
-        patch = FilePatch(path="src/foo.py", start_line=1, end_line=1,
-                          corrected_lines=huge)
+        patch = FilePatch(path="src/foo.py", start_line=1, end_line=1, corrected_lines=huge)
         agent = self._agent()
         written = agent._apply_patches(tmp_path, [patch])
         assert written == []
@@ -255,12 +279,12 @@ def test_multiple_patches_applied_in_order(self, tmp_path):
         lines = ["line 1\n", "import os\n", "import sys\n", "line 4\n"]
         _write(tmp_path, "src/foo.py", lines)
 
-        p1 = FilePatch(path="src/foo.py", start_line=2, end_line=2,
-                       corrected_lines=["# os removed\n"])
+        p1 = FilePatch(
+            path="src/foo.py", start_line=2, end_line=2, corrected_lines=["# os removed\n"]
+        )
         # After p1, file changes — p2 targets a different file
         _write(tmp_path, "src/bar.py", ["x = 1\n", "y = 2\n"])
-        p2 = FilePatch(path="src/bar.py", start_line=1, end_line=1,
-                       corrected_lines=["x = 10\n"])
+        p2 = FilePatch(path="src/bar.py", start_line=1, end_line=1, corrected_lines=["x = 10\n"])
 
         agent = self._agent()
         written = agent._apply_patches(tmp_path, [p1, p2])
diff --git a/tests/unit/test_ci_fixer_agent_helpers2.py b/tests/unit/test_ci_fixer_agent_helpers2.py
index 7c41399a..163c7d25 100644
--- a/tests/unit/test_ci_fixer_agent_helpers2.py
+++ b/tests/unit/test_ci_fixer_agent_helpers2.py
@@ -11,14 +11,11 @@
 
 from __future__ import annotations
 
-import asyncio
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-from phalanx.agents.ci_fixer import CIFixerAgent, _cleanup_workspace, _compute_fingerprint
-
+from phalanx.agents.ci_fixer import CIFixerAgent
 
 # ── helpers ────────────────────────────────────────────────────────────────────
 
@@ -55,8 +52,9 @@ async def test_execute_catches_unhandled_exception():
     """execute() wraps _execute_inner exceptions and returns AgentResult(success=False)."""
     agent = _make_agent()
 
-    with patch.object(agent, "_execute_inner", new_callable=AsyncMock,
-                      side_effect=RuntimeError("unexpected boom")):
+    with patch.object(
+        agent, "_execute_inner", new_callable=AsyncMock, side_effect=RuntimeError("unexpected boom")
+    ):
         result = await agent.execute()
 
     assert result.success is False
@@ -67,10 +65,15 @@ async def test_execute_catches_unhandled_exception():
 async def test_execute_returns_inner_result_on_success():
     """execute() propagates AgentResult from _execute_inner."""
     from phalanx.agents.base import AgentResult
+
     agent = _make_agent()
 
-    with patch.object(agent, "_execute_inner", new_callable=AsyncMock,
-                      return_value=AgentResult(success=True, output={"done": True})):
+    with patch.object(
+        agent,
+        "_execute_inner",
+        new_callable=AsyncMock,
+        return_value=AgentResult(success=True, output={"done": True}),
+    ):
         result = await agent.execute()
 
     assert result.success is True
@@ -172,13 +175,16 @@ async def mock_execute(_stmt):
 
     # parse_log returns empty → no errors
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     empty_parsed = ParsedLog(tool="unknown")
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value=""), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=empty_parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_mark_failed", new_callable=AsyncMock):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value=""),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=empty_parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_mark_failed", new_callable=AsyncMock),
+    ):
         result = await agent._execute_inner()
 
     assert result.success is False
@@ -193,6 +199,7 @@ async def test_load_flaky_patterns_no_lint_errors():
     """Returns [] immediately when no lint/type errors."""
     agent = _make_agent()
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="pytest")  # only test failures, no lint errors
 
     result = await agent._load_flaky_patterns("acme/backend", parsed)
@@ -207,7 +214,7 @@ async def test_load_flaky_patterns_returns_rows():
 
     parsed = ParsedLog(
         tool="ruff",
-        lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")]
+        lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")],
     )
 
     mock_pattern = MagicMock()
@@ -230,7 +237,7 @@ async def test_load_flaky_patterns_db_error_returns_empty():
 
     parsed = ParsedLog(
         tool="ruff",
-        lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")]
+        lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")],
     )
 
     with patch("phalanx.agents.ci_fixer.get_db", side_effect=Exception("DB down")):
@@ -251,8 +258,11 @@ async def test_clone_repo_generic_exception_returns_false(tmp_path):
     mock_repo_class = MagicMock()
     mock_repo_class.clone_from.side_effect = Exception("authentication failed")
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
-               new_callable=AsyncMock, return_value=False):
+    with patch(
+        "phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
+        new_callable=AsyncMock,
+        return_value=False,
+    ):
         result = await agent._clone_repo(tmp_path, "acme/backend", "main", "abc", "token")
 
     assert result is False
@@ -270,8 +280,11 @@ async def test_clone_repo_existing_git_dir(tmp_path):
     mock_repo.remotes.origin.fetch = MagicMock()
     mock_repo.git.checkout = MagicMock()
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
-               new_callable=AsyncMock, return_value=True):
+    with patch(
+        "phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
+        new_callable=AsyncMock,
+        return_value=True,
+    ):
         result = await agent._clone_repo(tmp_path, "acme/backend", "main", "abc", "token")
 
     assert result is True
@@ -289,8 +302,11 @@ async def test_commit_to_safe_branch_not_git_repo(tmp_path):
     try:
         from git.exc import InvalidGitRepositoryError
 
-        with patch("phalanx.agents.ci_fixer.CIFixerAgent._commit_to_safe_branch",
-                   new_callable=AsyncMock, return_value={"sha": None, "error": "not a git repo"}):
+        with patch(
+            "phalanx.agents.ci_fixer.CIFixerAgent._commit_to_safe_branch",
+            new_callable=AsyncMock,
+            return_value={"sha": None, "error": "not a git repo"},
+        ):
             result = await agent._commit_to_safe_branch(
                 workspace=tmp_path,
                 source_branch="main",
@@ -311,9 +327,11 @@ async def test_commit_to_safe_branch_exception(tmp_path):
     """Exception → returns sha=None with error key."""
     agent = _make_agent()
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent._commit_to_safe_branch",
-               new_callable=AsyncMock,
-               return_value={"sha": None, "error": "something went wrong"}):
+    with patch(
+        "phalanx.agents.ci_fixer.CIFixerAgent._commit_to_safe_branch",
+        new_callable=AsyncMock,
+        return_value={"sha": None, "error": "something went wrong"},
+    ):
         result = await agent._commit_to_safe_branch(
             workspace=tmp_path,
             source_branch="main",
@@ -342,6 +360,7 @@ async def test_comment_on_pr_no_fix_pr():
     ci_run.branch = "feature/x"
 
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="ruff")
 
     resp = MagicMock()
@@ -378,8 +397,10 @@ def test_execute_task_runs_agent():
     """execute_task creates CIFixerAgent and runs it."""
     from phalanx.agents.ci_fixer import execute_task
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent") as MockAgent, \
-         patch("phalanx.agents.ci_fixer.asyncio.run") as mock_run:
+    with (
+        patch("phalanx.agents.ci_fixer.CIFixerAgent") as MockAgent,
+        patch("phalanx.agents.ci_fixer.asyncio.run") as mock_run,
+    ):
         mock_instance = MagicMock()
         MockAgent.return_value = mock_instance
         execute_task("run-001")
@@ -390,9 +411,10 @@ def test_execute_task_reraises_exception():
     """execute_task re-raises exceptions after logging."""
     from phalanx.agents.ci_fixer import execute_task
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent") as MockAgent, \
-         patch("phalanx.agents.ci_fixer.asyncio.run",
-               side_effect=RuntimeError("boom")):
+    with (
+        patch("phalanx.agents.ci_fixer.CIFixerAgent") as MockAgent,
+        patch("phalanx.agents.ci_fixer.asyncio.run", side_effect=RuntimeError("boom")),
+    ):
         MockAgent.return_value = MagicMock()
         with pytest.raises(RuntimeError, match="boom"):
             execute_task("run-001")
@@ -488,12 +510,21 @@ async def test_run_scan_posts_comment_for_warnings():
 
     findings = [ProactiveFinding("fp1", "ruff", "pattern", "warning", ["src/foo.py"])]
 
-    with patch("phalanx.ci_fixer.proactive_scanner.scan_pr_for_patterns",
-               new_callable=AsyncMock, return_value=findings), \
-         patch("phalanx.ci_fixer.proactive_scanner._post_comment",
-               new_callable=AsyncMock, return_value=42), \
-         patch("phalanx.ci_fixer.proactive_scanner._record_scan",
-               new_callable=AsyncMock) as mock_record:
+    with (
+        patch(
+            "phalanx.ci_fixer.proactive_scanner.scan_pr_for_patterns",
+            new_callable=AsyncMock,
+            return_value=findings,
+        ),
+        patch(
+            "phalanx.ci_fixer.proactive_scanner._post_comment",
+            new_callable=AsyncMock,
+            return_value=42,
+        ),
+        patch(
+            "phalanx.ci_fixer.proactive_scanner._record_scan", new_callable=AsyncMock
+        ) as mock_record,
+    ):
         await _run_scan("acme/backend", 1, "abc", "token")
 
     mock_record.assert_called_once()
@@ -509,12 +540,17 @@ async def test_run_scan_no_comment_for_info_only():
 
     findings = [ProactiveFinding("fp1", "ruff", "pattern", "info", ["src/foo.py"])]
 
-    with patch("phalanx.ci_fixer.proactive_scanner.scan_pr_for_patterns",
-               new_callable=AsyncMock, return_value=findings), \
-         patch("phalanx.ci_fixer.proactive_scanner._post_comment",
-               new_callable=AsyncMock) as mock_post, \
-         patch("phalanx.ci_fixer.proactive_scanner._record_scan",
-               new_callable=AsyncMock):
+    with (
+        patch(
+            "phalanx.ci_fixer.proactive_scanner.scan_pr_for_patterns",
+            new_callable=AsyncMock,
+            return_value=findings,
+        ),
+        patch(
+            "phalanx.ci_fixer.proactive_scanner._post_comment", new_callable=AsyncMock
+        ) as mock_post,
+        patch("phalanx.ci_fixer.proactive_scanner._record_scan", new_callable=AsyncMock),
+    ):
         await _run_scan("acme/backend", 1, "abc", "token")
 
     mock_post.assert_not_called()
diff --git a/tests/unit/test_ci_fixer_agent_p4.py b/tests/unit/test_ci_fixer_agent_p4.py
index 06c69f4e..9e7b1302 100644
--- a/tests/unit/test_ci_fixer_agent_p4.py
+++ b/tests/unit/test_ci_fixer_agent_p4.py
@@ -10,16 +10,16 @@
 
 from __future__ import annotations
 
-import json
-from pathlib import Path
+from typing import TYPE_CHECKING
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
 from phalanx.agents.ci_fixer import CIFixerAgent
 from phalanx.ci_fixer.analyst import FilePatch
-from phalanx.ci_fixer.version_parity import VersionParityResult
 
+if TYPE_CHECKING:
+    from pathlib import Path
 
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
@@ -182,7 +182,11 @@ async def test_enable_github_auto_merge_success():
     gql_response = MagicMock()
     gql_response.status_code = 200
     gql_response.json.return_value = {
-        "data": {"enablePullRequestAutoMerge": {"pullRequest": {"autoMergeRequest": {"mergeMethod": "SQUASH"}}}}
+        "data": {
+            "enablePullRequestAutoMerge": {
+                "pullRequest": {"autoMergeRequest": {"mergeMethod": "SQUASH"}}
+            }
+        }
     }
 
     call_count = {"get": 0, "post": 0}
@@ -265,6 +269,7 @@ async def test_open_draft_pr_creates_draft():
     ci_run.pr_number = None
 
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="ruff")
 
     pr_response = MagicMock()
@@ -309,6 +314,7 @@ async def test_open_draft_pr_with_auto_merge():
     ci_run.pr_number = 10
 
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="ruff")
 
     pr_response = MagicMock()
@@ -325,8 +331,10 @@ async def test_open_draft_pr_with_auto_merge():
     async def mock_enable_auto_merge(**kwargs):
         enable_auto_merge_called["n"] += 1
 
-    with patch("httpx.AsyncClient", return_value=mock_client), \
-         patch.object(agent, "_enable_github_auto_merge", side_effect=mock_enable_auto_merge):
+    with (
+        patch("httpx.AsyncClient", return_value=mock_client),
+        patch.object(agent, "_enable_github_auto_merge", side_effect=mock_enable_auto_merge),
+    ):
         pr_num = await agent._open_draft_pr(
             integration=integration,
             ci_run=ci_run,
@@ -361,6 +369,7 @@ async def test_open_draft_pr_failure_returns_none():
     ci_run.pr_number = None
 
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="ruff")
 
     pr_response = MagicMock()
@@ -415,7 +424,6 @@ async def mock_execute(stmt):
 
     mock_session.execute = mock_execute
 
-    from phalanx.ci_fixer.analyst import FilePatch
     from phalanx.ci_fixer.log_parser import ParsedLog
 
     patches = [FilePatch(path="src/foo.py", start_line=1, end_line=1, corrected_lines=["x\n"])]
@@ -466,7 +474,6 @@ async def mock_execute(stmt):
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    from phalanx.ci_fixer.analyst import FilePatch
     from phalanx.ci_fixer.log_parser import ParsedLog
 
     patches = [FilePatch(path="src/foo.py", start_line=1, end_line=1, corrected_lines=["x\n"])]
diff --git a/tests/unit/test_ci_fixer_analyst_loop.py b/tests/unit/test_ci_fixer_analyst_loop.py
index 7068e20d..db40e4d5 100644
--- a/tests/unit/test_ci_fixer_analyst_loop.py
+++ b/tests/unit/test_ci_fixer_analyst_loop.py
@@ -11,19 +11,16 @@
 
 from __future__ import annotations
 
-import json
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
 from phalanx.agents.ci_fixer import (
-    CIFixerAgent,
     _MAX_FILES_CHANGED,
     _MAX_TOTAL_LINE_DELTA,
+    CIFixerAgent,
 )
 
-
 # ── helpers ────────────────────────────────────────────────────────────────────
 
 
@@ -136,23 +133,27 @@ async def test_execute_inner_delta_guard_exceeded():
         ],
     )
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = big_plan
         MockAnalyst.return_value = mock_analyst_inst
         result = await agent._execute_inner()
 
     assert result.success is False
-    assert "large" in result.output.get("root_cause", "").lower() or result.output.get("reason") in ("low_confidence",)
+    assert "large" in result.output.get("root_cause", "").lower() or result.output.get(
+        "reason"
+    ) in ("low_confidence",)
 
 
 # ── analyst loop: too many files guard ────────────────────────────────────────
@@ -170,16 +171,18 @@ async def test_execute_inner_too_many_files():
 
     big_plan = _make_fix_plan_with_patches(n_patches=_MAX_FILES_CHANGED + 2)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = big_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -203,17 +206,19 @@ async def test_execute_inner_no_files_written():
     parsed = _make_parsed_with_lint()
     good_plan = _make_fix_plan_with_patches(n_patches=1)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_apply_patches", return_value=[]), \
-         patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_apply_patches", return_value=[]),
+        patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = good_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -243,19 +248,21 @@ async def test_execute_inner_validation_failed_with_pr():
     mock_validation.tool_version = "ruff 0.4.0"
     mock_validation.output = "still failing"
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_apply_patches", return_value=["src/foo.py"]), \
-         patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation), \
-         patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock), \
-         patch.object(agent, "_comment_unable_to_fix", new_callable=AsyncMock) as mock_unable:
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_apply_patches", return_value=["src/foo.py"]),
+        patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation),
+        patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock),
+        patch.object(agent, "_comment_unable_to_fix", new_callable=AsyncMock) as mock_unable,
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = good_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -288,23 +295,33 @@ async def test_execute_inner_commit_failed():
 
     from phalanx.ci_fixer.version_parity import VersionParityResult
 
-    mock_parity = VersionParityResult(ok=True, local_version="ruff 0.4.0", failure_version="", reason="ok")
-
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_apply_patches", return_value=["src/foo.py"]), \
-         patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation), \
-         patch.object(agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity), \
-         patch.object(agent, "_commit_to_safe_branch", new_callable=AsyncMock,
-                      return_value={"sha": None, "error": "commit failed"}), \
-         patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock):
+    mock_parity = VersionParityResult(
+        ok=True, local_version="ruff 0.4.0", failure_version="", reason="ok"
+    )
+
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_apply_patches", return_value=["src/foo.py"]),
+        patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation),
+        patch.object(
+            agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity
+        ),
+        patch.object(
+            agent,
+            "_commit_to_safe_branch",
+            new_callable=AsyncMock,
+            return_value={"sha": None, "error": "commit failed"},
+        ),
+        patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = good_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -356,26 +373,42 @@ async def mock_execute(_stmt):
 
     from phalanx.ci_fixer.version_parity import VersionParityResult
 
-    mock_parity = VersionParityResult(ok=True, local_version="ruff 0.4.0", failure_version="", reason="ok")
-
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_apply_patches", return_value=["src/foo.py"]), \
-         patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation), \
-         patch.object(agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity), \
-         patch.object(agent, "_get_fingerprint_success_count", new_callable=AsyncMock, return_value=0), \
-         patch.object(agent, "_commit_to_safe_branch", new_callable=AsyncMock,
-                      return_value={"sha": "abc12345", "branch": "phalanx/ci-fix/run-loop-001", "push_failed": False}), \
-         patch.object(agent, "_open_draft_pr", new_callable=AsyncMock, return_value=42), \
-         patch.object(agent, "_comment_on_pr", new_callable=AsyncMock), \
-         patch.object(agent, "_update_fingerprint_on_success", new_callable=AsyncMock):
+    mock_parity = VersionParityResult(
+        ok=True, local_version="ruff 0.4.0", failure_version="", reason="ok"
+    )
+
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_apply_patches", return_value=["src/foo.py"]),
+        patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation),
+        patch.object(
+            agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity
+        ),
+        patch.object(
+            agent, "_get_fingerprint_success_count", new_callable=AsyncMock, return_value=0
+        ),
+        patch.object(
+            agent,
+            "_commit_to_safe_branch",
+            new_callable=AsyncMock,
+            return_value={
+                "sha": "abc12345",
+                "branch": "phalanx/ci-fix/run-loop-001",
+                "push_failed": False,
+            },
+        ),
+        patch.object(agent, "_open_draft_pr", new_callable=AsyncMock, return_value=42),
+        patch.object(agent, "_comment_on_pr", new_callable=AsyncMock),
+        patch.object(agent, "_update_fingerprint_on_success", new_callable=AsyncMock),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = good_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -427,22 +460,38 @@ async def mock_execute(_stmt):
 
     mock_parity = VersionParityResult(ok=True, local_version="", failure_version="", reason="ok")
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_apply_patches", return_value=["src/foo.py"]), \
-         patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation), \
-         patch.object(agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity), \
-         patch.object(agent, "_get_fingerprint_success_count", new_callable=AsyncMock, return_value=0), \
-         patch.object(agent, "_commit_to_safe_branch", new_callable=AsyncMock,
-                      return_value={"sha": "deadbeef", "branch": "phalanx/ci-fix/run-loop-001", "push_failed": True}), \
-         patch.object(agent, "_update_fingerprint_on_success", new_callable=AsyncMock) as mock_fp_update:
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_apply_patches", return_value=["src/foo.py"]),
+        patch("phalanx.agents.ci_fixer.validate_fix", return_value=mock_validation),
+        patch.object(
+            agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity
+        ),
+        patch.object(
+            agent, "_get_fingerprint_success_count", new_callable=AsyncMock, return_value=0
+        ),
+        patch.object(
+            agent,
+            "_commit_to_safe_branch",
+            new_callable=AsyncMock,
+            return_value={
+                "sha": "deadbeef",
+                "branch": "phalanx/ci-fix/run-loop-001",
+                "push_failed": True,
+            },
+        ),
+        patch.object(
+            agent, "_update_fingerprint_on_success", new_callable=AsyncMock
+        ) as mock_fp_update,
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = good_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -507,30 +556,42 @@ async def mock_execute(_stmt):
 
     from phalanx.ci_fixer.version_parity import VersionParityResult
 
-    mock_parity = VersionParityResult(ok=True, local_version="ruff 0.4.0", failure_version="", reason="ok")
+    mock_parity = VersionParityResult(
+        ok=True, local_version="ruff 0.4.0", failure_version="", reason="ok"
+    )
 
     def _validation_side_effect(*args, **kwargs):
         validation_calls["n"] += 1
         return fail_validation if validation_calls["n"] == 1 else pass_validation
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", side_effect=[parsed, empty_retry]), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_apply_patches", return_value=["src/foo.py"]), \
-         patch("phalanx.agents.ci_fixer.validate_fix", side_effect=_validation_side_effect), \
-         patch.object(agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity), \
-         patch.object(agent, "_get_fingerprint_success_count", new_callable=AsyncMock, return_value=0), \
-         patch.object(agent, "_commit_to_safe_branch", new_callable=AsyncMock,
-                      return_value={"sha": "abc", "push_failed": False}), \
-         patch.object(agent, "_open_draft_pr", new_callable=AsyncMock, return_value=11), \
-         patch.object(agent, "_comment_on_pr", new_callable=AsyncMock), \
-         patch.object(agent, "_update_fingerprint_on_success", new_callable=AsyncMock):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="log"),
+        patch("phalanx.agents.ci_fixer.parse_log", side_effect=[parsed, empty_retry]),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_apply_patches", return_value=["src/foo.py"]),
+        patch("phalanx.agents.ci_fixer.validate_fix", side_effect=_validation_side_effect),
+        patch.object(
+            agent, "_check_tool_version_parity", new_callable=AsyncMock, return_value=mock_parity
+        ),
+        patch.object(
+            agent, "_get_fingerprint_success_count", new_callable=AsyncMock, return_value=0
+        ),
+        patch.object(
+            agent,
+            "_commit_to_safe_branch",
+            new_callable=AsyncMock,
+            return_value={"sha": "abc", "push_failed": False},
+        ),
+        patch.object(agent, "_open_draft_pr", new_callable=AsyncMock, return_value=11),
+        patch.object(agent, "_comment_on_pr", new_callable=AsyncMock),
+        patch.object(agent, "_update_fingerprint_on_success", new_callable=AsyncMock),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = good_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -568,8 +629,9 @@ async def test_execute_cleans_workspace_on_exception(tmp_path):
     workspace = tmp_path / "ci-fixer" / "run-loop-001"
     workspace.mkdir(parents=True)
 
-    with patch.object(agent, "_execute_inner", new_callable=AsyncMock,
-                      side_effect=RuntimeError("boom")):
+    with patch.object(
+        agent, "_execute_inner", new_callable=AsyncMock, side_effect=RuntimeError("boom")
+    ):
         result = await agent.execute()
 
     assert result.success is False
diff --git a/tests/unit/test_ci_fixer_p2.py b/tests/unit/test_ci_fixer_p2.py
index 262a864d..fe4ffcbd 100644
--- a/tests/unit/test_ci_fixer_p2.py
+++ b/tests/unit/test_ci_fixer_p2.py
@@ -11,21 +11,20 @@
 from __future__ import annotations
 
 import json
-from datetime import UTC, datetime
-from pathlib import Path
+from typing import TYPE_CHECKING
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
 from phalanx.ci_fixer.analyst import (
-    FilePatch,
     FileWindow,
-    FixPlan,
     RootCauseAnalyst,
 )
 from phalanx.ci_fixer.log_parser import LintError, ParsedLog
 from phalanx.ci_fixer.outcome_tracker import _parse_iso
 
+if TYPE_CHECKING:
+    from pathlib import Path
 
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
@@ -53,20 +52,25 @@ def _write(tmp_path: Path, rel: str, lines: list[str]) -> Path:
     return full
 
 
-def _patch_json(path: str, start: int, end: int, corrected: list[str],
-                confidence: str = "high") -> str:
-    return json.dumps({
-        "confidence": confidence,
-        "root_cause": "test root cause",
-        "patches": [{
-            "path": path,
-            "start_line": start,
-            "end_line": end,
-            "corrected_lines": corrected,
-            "reason": "test",
-        }],
-        "needs_new_test": False,
-    })
+def _patch_json(
+    path: str, start: int, end: int, corrected: list[str], confidence: str = "high"
+) -> str:
+    return json.dumps(
+        {
+            "confidence": confidence,
+            "root_cause": "test root cause",
+            "patches": [
+                {
+                    "path": path,
+                    "start_line": start,
+                    "end_line": end,
+                    "corrected_lines": corrected,
+                    "reason": "test",
+                }
+            ],
+            "needs_new_test": False,
+        }
+    )
 
 
 # ── RootCauseAnalyst history lookup ────────────────────────────────────────────
@@ -87,13 +91,15 @@ def llm(**_):
             llm_called["n"] += 1
             return "{}"
 
-        cached_patches = [{
-            "path": "src/foo.py",
-            "start_line": 1,
-            "end_line": len(self._FILE),
-            "corrected_lines": self._FILE[1:],
-            "reason": "history",
-        }]
+        cached_patches = [
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": len(self._FILE),
+                "corrected_lines": self._FILE[1:],
+                "reason": "history",
+            }
+        ]
 
         analyst = RootCauseAnalyst(
             call_llm=llm,
@@ -140,13 +146,15 @@ def llm(**_):
             return llm_response
 
         # Return patches for a file not in windows (will fail validation)
-        bad_cached = [{
-            "path": "src/invented.py",
-            "start_line": 1,
-            "end_line": 5,
-            "corrected_lines": ["x\n"],
-            "reason": "bad",
-        }]
+        bad_cached = [
+            {
+                "path": "src/invented.py",
+                "start_line": 1,
+                "end_line": 5,
+                "corrected_lines": ["x\n"],
+                "reason": "bad",
+            }
+        ]
 
         analyst = RootCauseAnalyst(
             call_llm=llm,
@@ -226,7 +234,7 @@ def llm(**_):
             call_llm=llm,
             history_lookup=lambda fp: [],  # empty → falsy
         )
-        plan = analyst.analyze(_lint_log("src/foo.py"), tmp_path, fingerprint_hash="abc")
+        analyst.analyze(_lint_log("src/foo.py"), tmp_path, fingerprint_hash="abc")
         assert llm_called["n"] == 1
 
 
@@ -287,8 +295,13 @@ def test_returns_patches_when_history_exists(self):
         """Returns patch list when fingerprint found in DB."""
         agent = self._make_agent()
         expected_patches = [
-            {"path": "src/foo.py", "start_line": 1, "end_line": 3,
-             "corrected_lines": ["a\n"], "reason": "test"}
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": 3,
+                "corrected_lines": ["a\n"],
+                "reason": "test",
+            }
         ]
 
         with patch.object(agent, "_async_lookup_fix_history", new_callable=AsyncMock) as mock_async:
diff --git a/tests/unit/test_ci_fixer_p3.py b/tests/unit/test_ci_fixer_p3.py
index 93b15b6c..6cf52a2c 100644
--- a/tests/unit/test_ci_fixer_p3.py
+++ b/tests/unit/test_ci_fixer_p3.py
@@ -10,22 +10,17 @@
 
 from __future__ import annotations
 
-from datetime import UTC, datetime
 from unittest.mock import MagicMock, patch
 
-import pytest
-
+from phalanx.ci_fixer.log_parser import LintError, ParsedLog, TestFailure
 from phalanx.ci_fixer.suppressor import (
-    _FLAKY_THRESHOLD,
     _MIN_OBSERVATIONS,
     is_flaky_suppressed,
     record_flaky_pattern,
     should_use_history,
 )
-from phalanx.ci_fixer.log_parser import LintError, ParsedLog, TestFailure, TypeError
 from phalanx.db.models import CIFailureFingerprint, CIFlakyPattern
 
-
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
 
@@ -33,10 +28,7 @@ def _lint_log(*errors: tuple) -> ParsedLog:
     """errors: list of (file, code) tuples."""
     return ParsedLog(
         tool="ruff",
-        lint_errors=[
-            LintError(file=f, line=1, col=1, code=c, message="test")
-            for f, c in errors
-        ],
+        lint_errors=[LintError(file=f, line=1, col=1, code=c, message="test") for f, c in errors],
     )
 
 
@@ -61,7 +53,8 @@ def _make_flaky_pattern(
 def _make_fingerprint(
     success_count: int = 3,
     failure_count: int = 1,
-    last_good_patch_json: str | None = '[{"path":"src/foo.py","start_line":1,"end_line":1,"corrected_lines":["x\\n"],"reason":""}]',
+    last_good_patch_json: str
+    | None = '[{"path":"src/foo.py","start_line":1,"end_line":1,"corrected_lines":["x\\n"],"reason":""}]',
     hash_: str = "abc123def456abcd",
 ) -> CIFailureFingerprint:
     fp = MagicMock(spec=CIFailureFingerprint)
@@ -125,30 +118,40 @@ def test_one_unknown_error_not_suppressed(self):
     def test_insufficient_observations_not_suppressed(self):
         """< MIN_OBSERVATIONS → not suppressed regardless of rate."""
         parsed = _lint_log(("src/foo.py", "F401"))
-        patterns = [_make_flaky_pattern(
-            "src/foo.py", "F401",
-            flaky_count=2, total_count=_MIN_OBSERVATIONS - 1,
-        )]
+        patterns = [
+            _make_flaky_pattern(
+                "src/foo.py",
+                "F401",
+                flaky_count=2,
+                total_count=_MIN_OBSERVATIONS - 1,
+            )
+        ]
         assert not is_flaky_suppressed(parsed, patterns)
 
     def test_below_threshold_not_suppressed(self):
         """flaky_rate < FLAKY_THRESHOLD → not suppressed."""
         parsed = _lint_log(("src/foo.py", "F401"))
-        patterns = [_make_flaky_pattern(
-            "src/foo.py", "F401",
-            flaky_count=1, total_count=10,  # 10% flaky rate
-        )]
+        patterns = [
+            _make_flaky_pattern(
+                "src/foo.py",
+                "F401",
+                flaky_count=1,
+                total_count=10,  # 10% flaky rate
+            )
+        ]
         assert not is_flaky_suppressed(parsed, patterns)
 
     def test_test_failures_not_suppressed(self):
         """Test failures never suppressed (too risky)."""
         parsed = ParsedLog(
             tool="pytest",
-            test_failures=[TestFailure(
-                test_id="tests/test_foo.py::test_bar",
-                file="tests/test_foo.py",
-                message="",
-            )],
+            test_failures=[
+                TestFailure(
+                    test_id="tests/test_foo.py::test_bar",
+                    file="tests/test_foo.py",
+                    message="",
+                )
+            ],
         )
         patterns = [_make_flaky_pattern("tests/test_foo.py", "F401")]
         assert not is_flaky_suppressed(parsed, patterns)
@@ -278,6 +281,7 @@ def test_existing_pattern_has_only_last_seen_at(self):
 def test_commit_dedup_window_constant():
     """The 5-minute dedup window constant is present and reasonable."""
     from phalanx.api.routes.ci_webhooks import _COMMIT_DEDUP_WINDOW_MINUTES
+
     assert 1 <= _COMMIT_DEDUP_WINDOW_MINUTES <= 60
 
 
@@ -289,6 +293,7 @@ class TestHistoryWeighting:
 
     def _make_agent(self):
         from phalanx.agents.ci_fixer import CIFixerAgent
+
         with patch("phalanx.agents.base.BaseAgent.__init__", return_value=None):
             agent = CIFixerAgent.__new__(CIFixerAgent)
             agent.ci_fix_run_id = "test-run-001"
@@ -297,7 +302,6 @@ def _make_agent(self):
 
     def test_unreliable_fingerprint_returns_none(self):
         """failure_count >= success_count → _lookup returns None."""
-        import asyncio
         from unittest.mock import AsyncMock
 
         agent = self._make_agent()
@@ -305,7 +309,6 @@ def test_unreliable_fingerprint_returns_none(self):
         fp = _make_fingerprint(success_count=1, failure_count=3)
 
         async def mock_lookup(fp_hash):
-            from phalanx.db.models import CIFailureFingerprint
             # Simulate DB returning a fingerprint with bad stats
             mock_result = MagicMock()
             mock_result.scalar_one_or_none.return_value = fp
@@ -325,8 +328,15 @@ def test_reliable_fingerprint_returns_patches(self):
         from unittest.mock import AsyncMock
 
         agent = self._make_agent()
-        expected = [{"path": "src/foo.py", "start_line": 1,
-                     "end_line": 1, "corrected_lines": ["x\n"], "reason": ""}]
+        expected = [
+            {
+                "path": "src/foo.py",
+                "start_line": 1,
+                "end_line": 1,
+                "corrected_lines": ["x\n"],
+                "reason": "",
+            }
+        ]
 
         with patch.object(agent, "_async_lookup_fix_history", new_callable=AsyncMock) as m:
             m.return_value = expected
diff --git a/tests/unit/test_ci_fixer_p4.py b/tests/unit/test_ci_fixer_p4.py
index 0affb9fc..32173856 100644
--- a/tests/unit/test_ci_fixer_p4.py
+++ b/tests/unit/test_ci_fixer_p4.py
@@ -8,8 +8,6 @@
 
 from __future__ import annotations
 
-import pytest
-
 from phalanx.ci_fixer.version_parity import (
     VersionParityResult,
     check_version_parity,
@@ -17,7 +15,6 @@
     should_auto_merge,
 )
 
-
 # ── check_version_parity ───────────────────────────────────────────────────────
 
 
@@ -172,6 +169,7 @@ def test_mismatch_notice(self):
 def test_ci_integration_auto_merge_column_exists():
     """Phase 4 columns exist on CIIntegration model."""
     from phalanx.db.models import CIIntegration
+
     # Verify the mapped columns exist by inspecting the class
     assert hasattr(CIIntegration, "auto_merge")
     assert hasattr(CIIntegration, "min_success_count")
@@ -180,4 +178,5 @@ def test_ci_integration_auto_merge_column_exists():
 def test_ci_fix_run_parity_column_exists():
     """Phase 4 column exists on CIFixRun model."""
     from phalanx.db.models import CIFixRun
+
     assert hasattr(CIFixRun, "tool_version_parity_ok")
diff --git a/tests/unit/test_ci_fixer_p5.py b/tests/unit/test_ci_fixer_p5.py
index 3a4bb539..3bd4c531 100644
--- a/tests/unit/test_ci_fixer_p5.py
+++ b/tests/unit/test_ci_fixer_p5.py
@@ -9,8 +9,6 @@
 
 from __future__ import annotations
 
-import pytest
-
 from phalanx.ci_fixer.pattern_promoter import (
     MIN_GLOBAL_SUCCESS_COUNT,
     MIN_REPOS_FOR_PROMOTION,
@@ -22,7 +20,6 @@
     should_post_proactive_comment,
 )
 
-
 # ── is_promotion_eligible ──────────────────────────────────────────────────────
 
 
@@ -115,9 +112,7 @@ def test_tool_name_in_comment(self):
         assert "ruff" in comment
 
     def test_info_findings_different_header(self):
-        findings = [
-            ProactiveFinding("fp1", "ruff", "info pattern", "info", ["f.py"])
-        ]
+        findings = [ProactiveFinding("fp1", "ruff", "info pattern", "info", ["f.py"])]
         comment = format_proactive_comment(findings, 42)
         assert "informational" in comment.lower() or "info" in comment.lower()
 
@@ -144,15 +139,11 @@ def test_no_findings_false(self):
         assert not should_post_proactive_comment([])
 
     def test_only_info_findings_false(self):
-        findings = [
-            ProactiveFinding("fp1", "ruff", "info", "info", ["f.py"])
-        ]
+        findings = [ProactiveFinding("fp1", "ruff", "info", "info", ["f.py"])]
         assert not should_post_proactive_comment(findings)
 
     def test_warning_finding_true(self):
-        findings = [
-            ProactiveFinding("fp1", "ruff", "warning pattern", "warning", ["f.py"])
-        ]
+        findings = [ProactiveFinding("fp1", "ruff", "warning pattern", "warning", ["f.py"])]
         assert should_post_proactive_comment(findings)
 
     def test_mixed_info_and_warning_true(self):
@@ -168,6 +159,7 @@ def test_mixed_info_and_warning_true(self):
 
 def test_pattern_registry_columns():
     from phalanx.db.models import CIPatternRegistry
+
     assert hasattr(CIPatternRegistry, "fingerprint_hash")
     assert hasattr(CIPatternRegistry, "tool")
     assert hasattr(CIPatternRegistry, "repo_count")
@@ -177,6 +169,7 @@ def test_pattern_registry_columns():
 
 def test_proactive_scan_columns():
     from phalanx.db.models import CIProactiveScan
+
     assert hasattr(CIProactiveScan, "repo_full_name")
     assert hasattr(CIProactiveScan, "pr_number")
     assert hasattr(CIProactiveScan, "findings_json")
diff --git a/tests/unit/test_ci_fixer_p5_async.py b/tests/unit/test_ci_fixer_p5_async.py
index df9bc346..686170ca 100644
--- a/tests/unit/test_ci_fixer_p5_async.py
+++ b/tests/unit/test_ci_fixer_p5_async.py
@@ -8,7 +8,6 @@
 
 from __future__ import annotations
 
-import json
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -20,7 +19,6 @@
     scan_pr_for_patterns,
 )
 
-
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
 
@@ -87,8 +85,10 @@ async def test_scan_pr_no_python_files():
 
     mock_db_ctx, _ = _mock_db(rows=[pattern])
 
-    with patch("httpx.AsyncClient", return_value=mock_client), \
-         patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_db_ctx):
+    with (
+        patch("httpx.AsyncClient", return_value=mock_client),
+        patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_db_ctx),
+    ):
         findings = await scan_pr_for_patterns("acme/backend", 1, "abc", "token")
 
     assert findings == []
@@ -117,8 +117,10 @@ async def test_scan_pr_with_python_files_finds_patterns():
 
     mock_db_ctx, _ = _mock_db(rows=[pattern])
 
-    with patch("httpx.AsyncClient", return_value=mock_client), \
-         patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_db_ctx):
+    with (
+        patch("httpx.AsyncClient", return_value=mock_client),
+        patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_db_ctx),
+    ):
         findings = await scan_pr_for_patterns("acme/backend", 1, "abc", "token")
 
     assert len(findings) == 1
@@ -146,8 +148,10 @@ async def test_scan_pr_low_success_count_is_info():
 
     mock_db_ctx, _ = _mock_db(rows=[pattern])
 
-    with patch("httpx.AsyncClient", return_value=mock_client), \
-         patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_db_ctx):
+    with (
+        patch("httpx.AsyncClient", return_value=mock_client),
+        patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_db_ctx),
+    ):
         findings = await scan_pr_for_patterns("acme/backend", 1, "abc", "token")
 
     assert len(findings) == 1
@@ -248,6 +252,7 @@ async def test_record_scan_inserts_row():
 
     mock_session.add.assert_called_once()
     from phalanx.db.models import CIProactiveScan
+
     added = mock_session.add.call_args[0][0]
     assert isinstance(added, CIProactiveScan)
     assert added.pr_number == 42
@@ -269,7 +274,9 @@ async def test_promote_patterns_eligible_creates_registry_entry():
     row.fingerprint_hash = "abc123def456abcd"
     row.tool = "ruff"
     row.sample_errors = "unused import"
-    row.last_good_patch_json = '[{"path":"src/foo.py","start_line":1,"end_line":1,"corrected_lines":["x\\n"],"reason":""}]'
+    row.last_good_patch_json = (
+        '[{"path":"src/foo.py","start_line":1,"end_line":1,"corrected_lines":["x\\n"],"reason":""}]'
+    )
     row.repo_count = 3  # >= MIN_REPOS_FOR_PROMOTION=2
     row.total_successes = 5
 
@@ -303,6 +310,7 @@ async def mock_execute(stmt):
     # Should have added one entry to the registry
     mock_session.add.assert_called_once()
     from phalanx.db.models import CIPatternRegistry
+
     added = mock_session.add.call_args[0][0]
     assert isinstance(added, CIPatternRegistry)
     assert added.fingerprint_hash == "abc123def456abcd"
diff --git a/tests/unit/test_ci_fixer_reproducer.py b/tests/unit/test_ci_fixer_reproducer.py
new file mode 100644
index 00000000..865f4e27
--- /dev/null
+++ b/tests/unit/test_ci_fixer_reproducer.py
@@ -0,0 +1,434 @@
+"""
+Tests for phalanx.ci_fixer.reproducer — ReproducerAgent.
+
+Coverage targets:
+  - reproduce(): all 5 verdicts (skipped, confirmed, flaky, env_mismatch, timeout)
+  - reproduce(): skipped when sandbox unavailable (available=False)
+  - reproduce(): skipped when reproducer_cmd is empty
+  - _output_matches_failure(): tool name match, error code match, no match
+  - _run_subprocess(): timeout path (process killed)
+"""
+
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from phalanx.ci_fixer.context import ReproductionResult, StructuredFailure
+from phalanx.ci_fixer.reproducer import ReproducerAgent, ReproductionAttempt
+from phalanx.ci_fixer.sandbox import SandboxResult
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+# ── Fixtures ──────────────────────────────────────────────────────────────────
+
+
+def _make_sandbox(available: bool = True, container_id: str = "") -> SandboxResult:
+    return SandboxResult(
+        sandbox_id="phalanx-sandbox-test1234",
+        stack="python",
+        image="python:3.12-slim",
+        workspace_path="/tmp/ws",
+        available=available,
+        container_id=container_id,
+    )
+
+
+def _make_sf(
+    tool: str = "ruff",
+    errors: list | None = None,
+) -> StructuredFailure:
+    return StructuredFailure(
+        tool=tool,
+        failure_type="lint",
+        reproducer_cmd=f"{tool} check .",
+        errors=errors or [],
+    )
+
+
+def _make_proc(
+    returncode: int = 0,
+    stdout: bytes = b"",
+    stderr: bytes = b"",
+    timeout: bool = False,
+) -> AsyncMock:
+    """Return a mock asyncio.Process suitable for create_subprocess_shell."""
+    proc = MagicMock()
+    proc.returncode = returncode
+    proc.kill = MagicMock()
+    proc.wait = AsyncMock()
+    if timeout:
+        proc.communicate = AsyncMock(side_effect=TimeoutError())
+    else:
+        proc.communicate = AsyncMock(return_value=(stdout, stderr))
+    return proc
+
+
+# ── reproduce() — verdict classification ──────────────────────────────────────
+
+
+class TestReproduceVerdicts:
+    @pytest.mark.asyncio
+    async def test_reproduce_skipped_when_no_sandbox(self, tmp_path: Path):
+        """sandbox_result=None → verdict=skipped, no subprocess."""
+        agent = ReproducerAgent()
+        result = await agent.reproduce(
+            reproducer_cmd="ruff check .",
+            workspace_path=tmp_path,
+            sandbox_result=None,
+            structured_failure=_make_sf(),
+            timeout_seconds=30,
+        )
+        assert result.verdict == "skipped"
+        assert isinstance(result, ReproductionResult)
+
+    @pytest.mark.asyncio
+    async def test_reproduce_skipped_when_sandbox_unavailable(self, tmp_path: Path):
+        """sandbox_result.available=False → verdict=skipped."""
+        agent = ReproducerAgent()
+        result = await agent.reproduce(
+            reproducer_cmd="ruff check .",
+            workspace_path=tmp_path,
+            sandbox_result=_make_sandbox(available=False),
+            structured_failure=_make_sf(),
+            timeout_seconds=30,
+        )
+        assert result.verdict == "skipped"
+
+    @pytest.mark.asyncio
+    async def test_reproduce_skipped_when_empty_cmd(self, tmp_path: Path):
+        """Empty reproducer_cmd → verdict=skipped."""
+        agent = ReproducerAgent()
+        result = await agent.reproduce(
+            reproducer_cmd="",
+            workspace_path=tmp_path,
+            sandbox_result=_make_sandbox(),
+            structured_failure=_make_sf(),
+            timeout_seconds=30,
+        )
+        assert result.verdict == "skipped"
+
+    @pytest.mark.asyncio
+    async def test_reproduce_skipped_when_whitespace_cmd(self, tmp_path: Path):
+        """Whitespace-only reproducer_cmd → verdict=skipped."""
+        agent = ReproducerAgent()
+        result = await agent.reproduce(
+            reproducer_cmd="   ",
+            workspace_path=tmp_path,
+            sandbox_result=_make_sandbox(),
+            structured_failure=_make_sf(),
+            timeout_seconds=30,
+        )
+        assert result.verdict == "skipped"
+
+    @pytest.mark.asyncio
+    async def test_reproduce_flaky(self, tmp_path: Path):
+        """exit_code=0 → command passed → CI failure was transient → flaky."""
+        proc = _make_proc(returncode=0, stdout=b"All checks passed", stderr=b"")
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            agent = ReproducerAgent()
+            result = await agent.reproduce(
+                reproducer_cmd="ruff check .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(),
+                structured_failure=_make_sf(),
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "flaky"
+        assert result.exit_code == 0
+
+    @pytest.mark.asyncio
+    async def test_reproduce_confirmed_by_tool_name(self, tmp_path: Path):
+        """exit_code!=0, tool name in output → confirmed."""
+        proc = _make_proc(
+            returncode=1,
+            stdout=b"ruff check failed: F401 unused import",
+            stderr=b"",
+        )
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            agent = ReproducerAgent()
+            result = await agent.reproduce(
+                reproducer_cmd="ruff check .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(),
+                structured_failure=_make_sf(tool="ruff"),
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "confirmed"
+        assert result.exit_code == 1
+
+    @pytest.mark.asyncio
+    async def test_reproduce_confirmed_by_error_code(self, tmp_path: Path):
+        """exit_code!=0, error code in output (no tool name) → confirmed."""
+        proc = _make_proc(
+            returncode=1,
+            stdout=b"src/foo.py:1:1: F401 'os' imported but unused",
+            stderr=b"",
+        )
+        sf = _make_sf(tool="ruff", errors=[{"file": "src/foo.py", "code": "F401"}])
+        # Use a tool name that won't match the output to isolate error-code path
+        sf.tool = "linter"
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            agent = ReproducerAgent()
+            result = await agent.reproduce(
+                reproducer_cmd="linter check .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(),
+                structured_failure=sf,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "confirmed"
+
+    @pytest.mark.asyncio
+    async def test_reproduce_env_mismatch(self, tmp_path: Path):
+        """exit_code!=0 but output unrelated to original failure → env_mismatch."""
+        proc = _make_proc(
+            returncode=1,
+            stdout=b"command not found: ruff",
+            stderr=b"bash: ruff: command not found",
+        )
+        # Use a structured failure whose tool name won't appear in the "not found" output
+        sf = StructuredFailure(
+            tool="mypy",
+            failure_type="type_error",
+            reproducer_cmd="mypy .",
+            errors=[{"code": "E999"}],
+        )
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            agent = ReproducerAgent()
+            result = await agent.reproduce(
+                reproducer_cmd="mypy .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(),
+                structured_failure=sf,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "env_mismatch"
+
+    @pytest.mark.asyncio
+    async def test_reproduce_timeout(self, tmp_path: Path):
+        """Process exceeds timeout → verdict=timeout, process killed."""
+        proc = _make_proc(timeout=True)
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            agent = ReproducerAgent()
+            result = await agent.reproduce(
+                reproducer_cmd="ruff check .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(),
+                structured_failure=_make_sf(),
+                timeout_seconds=1,
+            )
+
+        assert result.verdict == "timeout"
+        proc.kill.assert_called_once()
+        proc.wait.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_reproduce_result_fields(self, tmp_path: Path):
+        """Result includes reproducer_cmd and truncated output."""
+        long_output = b"F401 " * 1000  # > 4000 chars
+        proc = _make_proc(returncode=1, stdout=long_output, stderr=b"")
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            agent = ReproducerAgent()
+            result = await agent.reproduce(
+                reproducer_cmd="ruff check .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(),
+                structured_failure=_make_sf(tool="ruff"),
+                timeout_seconds=30,
+            )
+
+        assert result.reproducer_cmd == "ruff check ."
+        assert len(result.output) <= 4000
+
+
+# ── _output_matches_failure ───────────────────────────────────────────────────
+
+
+class TestOutputMatchesFailure:
+    def test_matches_by_tool_name(self):
+        agent = ReproducerAgent()
+        sf = _make_sf(tool="ruff")
+        assert agent._output_matches_failure("ruff check found 3 errors", sf) is True
+
+    def test_matches_tool_name_case_insensitive(self):
+        agent = ReproducerAgent()
+        sf = _make_sf(tool="Ruff")
+        assert agent._output_matches_failure("RUFF check: error F401", sf) is True
+
+    def test_matches_by_error_code(self):
+        agent = ReproducerAgent()
+        sf = StructuredFailure(
+            tool="nontool",  # won't match output
+            failure_type="lint",
+            reproducer_cmd="check .",
+            errors=[{"code": "E501", "file": "foo.py"}],
+        )
+        assert agent._output_matches_failure("line too long E501 at 120 chars", sf) is True
+
+    def test_no_match_unrelated_output(self):
+        agent = ReproducerAgent()
+        sf = StructuredFailure(
+            tool="mypy",
+            failure_type="type_error",
+            reproducer_cmd="mypy .",
+            errors=[{"code": "E999"}],
+        )
+        # Output has neither "mypy" nor "E999"
+        assert agent._output_matches_failure("pip install failed: network error", sf) is False
+
+    def test_no_match_empty_output(self):
+        agent = ReproducerAgent()
+        sf = _make_sf(tool="ruff")
+        assert agent._output_matches_failure("", sf) is False
+
+    def test_no_match_empty_errors_no_tool(self):
+        agent = ReproducerAgent()
+        sf = StructuredFailure(
+            tool="pytest",
+            failure_type="test_regression",
+            reproducer_cmd="pytest .",
+            errors=[],
+        )
+        # Output has no "pytest" in it
+        assert agent._output_matches_failure("FAILED test_foo.py::test_bar", sf) is False
+
+    def test_matches_with_no_code_in_error_dict(self):
+        """Errors with no 'code' key should not raise."""
+        agent = ReproducerAgent()
+        sf = StructuredFailure(
+            tool="ruff",
+            failure_type="lint",
+            reproducer_cmd="ruff check .",
+            errors=[{"file": "foo.py", "line": 1}],  # no 'code' key
+        )
+        # Tool name match should still work
+        assert agent._output_matches_failure("ruff: 1 error found", sf) is True
+
+
+# ── Container exec path ───────────────────────────────────────────────────────
+
+
+class TestReproducerContainerExec:
+    """Tests for the docker exec path when sandbox_result.container_id is set."""
+
+    def _make_sandbox_with_container(self, container_id: str = "ctr-abc123") -> object:
+        return _make_sandbox(available=True, container_id=container_id)
+
+    @pytest.mark.asyncio
+    async def test_reproduce_uses_docker_exec_when_container_id_set(self, tmp_path):
+        """When container_id is set, command is wrapped with docker exec."""
+        proc = _make_proc(returncode=1, stdout=b"ruff: 1 error", stderr=b"")
+
+        captured_args = []
+
+        async def fake_exec(*args, **kwargs):
+            captured_args.extend(args)
+            return proc
+
+        with patch("asyncio.create_subprocess_exec", side_effect=fake_exec):
+            result = await ReproducerAgent().reproduce(
+                reproducer_cmd="ruff check .",
+                workspace_path=tmp_path,
+                sandbox_result=self._make_sandbox_with_container("ctr-abc123"),
+                structured_failure=_make_sf(tool="ruff"),
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "confirmed"
+        assert "docker" in captured_args
+        assert "ctr-abc123" in captured_args
+
+    @pytest.mark.asyncio
+    async def test_reproduce_local_subprocess_when_no_container_id(self, tmp_path):
+        """When container_id is empty, uses local subprocess shell."""
+        proc = _make_proc(returncode=0, stdout=b"clean", stderr=b"")
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            result = await ReproducerAgent().reproduce(
+                reproducer_cmd="ruff check .",
+                workspace_path=tmp_path,
+                sandbox_result=_make_sandbox(available=True),  # no container_id
+                structured_failure=_make_sf(tool="ruff"),
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "flaky"
+
+    @pytest.mark.asyncio
+    async def test_run_subprocess_with_container_id(self, tmp_path):
+        """_run_subprocess with container_id uses create_subprocess_exec."""
+        proc = _make_proc(returncode=0, stdout=b"ok", stderr=b"")
+
+        captured = []
+
+        async def fake_exec(*args, **kwargs):
+            captured.extend(args)
+            return proc
+
+        with patch("asyncio.create_subprocess_exec", side_effect=fake_exec):
+            step = await ReproducerAgent()._run_subprocess(
+                cmd="ruff check .",
+                cwd=tmp_path,
+                timeout_seconds=30,
+                container_id="ctr-xyz",
+            )
+
+        assert step.exit_code == 0
+        assert "ctr-xyz" in captured
+        assert "sh" in captured
+
+    @pytest.mark.asyncio
+    async def test_run_subprocess_without_container_id(self, tmp_path):
+        """_run_subprocess without container_id uses create_subprocess_shell."""
+        proc = _make_proc(returncode=0, stdout=b"clean", stderr=b"")
+
+        with patch("asyncio.create_subprocess_shell", return_value=proc):
+            step = await ReproducerAgent()._run_subprocess(
+                cmd="ruff check .",
+                cwd=tmp_path,
+                timeout_seconds=30,
+                container_id="",
+            )
+
+        assert step.exit_code == 0
+
+
+# ── ReproductionAttempt dataclass ─────────────────────────────────────────────
+
+
+class TestReproductionAttempt:
+    def test_defaults(self):
+        a = ReproductionAttempt(
+            cmd="ruff check .",
+            exit_code=1,
+            stdout="out",
+            stderr="err",
+            elapsed_seconds=0.5,
+        )
+        assert a.timed_out is False
+
+    def test_timed_out_flag(self):
+        a = ReproductionAttempt(
+            cmd="ruff check .",
+            exit_code=-1,
+            stdout="",
+            stderr="",
+            elapsed_seconds=30.0,
+            timed_out=True,
+        )
+        assert a.timed_out is True
diff --git a/tests/unit/test_ci_fixer_sandbox.py b/tests/unit/test_ci_fixer_sandbox.py
new file mode 100644
index 00000000..a6aa84d7
--- /dev/null
+++ b/tests/unit/test_ci_fixer_sandbox.py
@@ -0,0 +1,350 @@
+"""
+Tests for phalanx.ci_fixer.sandbox — SandboxProvisioner + SandboxResult.
+
+Coverage targets:
+  - detect_stack: all 5 stacks (python/node/go/rust/unknown) + priority order
+  - provision: happy path with pool checkout, disabled, unique IDs, stack_hint
+  - provision: pool checkout timeout → available=False fallback
+  - provision: Docker error → available=False fallback
+  - release: container_id empty (no-op), container_id set → pool.checkin
+  - SandboxResult: field defaults including new container_id + mount_path
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from phalanx.ci_fixer.sandbox import SandboxProvisioner, SandboxResult
+from phalanx.ci_fixer.sandbox_pool import SandboxUnavailableError
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+
+def _mock_pool(container_id: str = "ctr-abc123") -> MagicMock:
+    """Return a mock SandboxPool that returns a container on checkout."""
+    from phalanx.ci_fixer.sandbox_pool import PooledContainer
+
+    pool = MagicMock()
+    container = PooledContainer(
+        container_id=container_id,
+        stack="python",
+        image="phalanx-sandbox-python:latest",
+    )
+    pool.checkout = AsyncMock(return_value=container)
+    pool.checkin = AsyncMock()
+    return pool
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+
+def _make_workspace(tmp_path: Path, *filenames: str) -> Path:
+    """Create a temp directory with the given marker files."""
+    for name in filenames:
+        (tmp_path / name).touch()
+    return tmp_path
+
+
+# ── detect_stack ──────────────────────────────────────────────────────────────
+
+
+class TestDetectStack:
+    def test_detect_stack_python_pyproject(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "pyproject.toml")
+        assert SandboxProvisioner().detect_stack(ws) == "python"
+
+    def test_detect_stack_python_requirements(self, tmp_path: Path):
+        """requirements.txt alone should also detect python."""
+        ws = _make_workspace(tmp_path, "requirements.txt")
+        assert SandboxProvisioner().detect_stack(ws) == "python"
+
+    def test_detect_stack_python_setup_py(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "setup.py")
+        assert SandboxProvisioner().detect_stack(ws) == "python"
+
+    def test_detect_stack_node(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "package.json")
+        assert SandboxProvisioner().detect_stack(ws) == "node"
+
+    def test_detect_stack_go(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "go.mod")
+        assert SandboxProvisioner().detect_stack(ws) == "go"
+
+    def test_detect_stack_rust(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "Cargo.toml")
+        assert SandboxProvisioner().detect_stack(ws) == "rust"
+
+    def test_detect_stack_unknown(self, tmp_path: Path):
+        """Empty workspace has no markers → unknown."""
+        assert SandboxProvisioner().detect_stack(tmp_path) == "unknown"
+
+    def test_detect_stack_python_wins_over_node(self, tmp_path: Path):
+        """Python is checked first — monorepo with both pyproject + package.json resolves to python."""
+        ws = _make_workspace(tmp_path, "pyproject.toml", "package.json")
+        assert SandboxProvisioner().detect_stack(ws) == "python"
+
+    def test_detect_stack_nonexistent_path(self, tmp_path: Path):
+        """Path that doesn't exist returns unknown without raising."""
+        missing = tmp_path / "nonexistent"
+        result = SandboxProvisioner().detect_stack(missing)
+        assert result == "unknown"
+
+
+# ── SandboxProvisioner.provision ──────────────────────────────────────────────
+
+
+class TestSandboxProvision:
+    def _mock_settings(self, enabled: bool = True) -> MagicMock:
+        s = MagicMock()
+        s.sandbox_enabled = enabled
+        s.sandbox_checkout_timeout_seconds = 30
+        return s
+
+    @pytest.mark.asyncio
+    async def test_provision_returns_sandbox_result_with_container_id(self, tmp_path: Path):
+        """Happy path: pool checkout succeeds → SandboxResult has container_id set."""
+        ws = _make_workspace(tmp_path, "pyproject.toml")
+        pool = _mock_pool(container_id="ctr-abc123")
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                provisioner = SandboxProvisioner()
+                with patch.object(provisioner, "_bind_workspace", new_callable=AsyncMock):
+                    result = await provisioner.provision(ws)
+
+        assert result is not None
+        assert result.stack == "python"
+        assert result.image == "python:3.12-slim"
+        assert result.workspace_path == str(ws)
+        assert result.sandbox_id.startswith("phalanx-sandbox-")
+        assert result.container_id == "ctr-abc123"
+        assert result.available is True
+
+    @pytest.mark.asyncio
+    async def test_provision_disabled_returns_none(self, tmp_path: Path):
+        """sandbox_enabled=False → provision returns None immediately."""
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings(enabled=False)):
+            result = await SandboxProvisioner().provision(tmp_path)
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_provision_generates_unique_ids(self, tmp_path: Path):
+        """Each provision call generates a different sandbox_id."""
+        pool = _mock_pool()
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                p = SandboxProvisioner()
+                with patch.object(p, "_bind_workspace", new_callable=AsyncMock):
+                    r1 = await p.provision(tmp_path)
+                    r2 = await p.provision(tmp_path)
+
+        assert r1 is not None and r2 is not None
+        assert r1.sandbox_id != r2.sandbox_id
+
+    @pytest.mark.asyncio
+    async def test_provision_pool_timeout_returns_available_false(self, tmp_path: Path):
+        """Pool checkout times out → SandboxResult with available=False, no exception."""
+        pool = MagicMock()
+        pool.checkout = AsyncMock(side_effect=SandboxUnavailableError("timeout"))
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                result = await SandboxProvisioner().provision(tmp_path)
+
+        assert result is not None
+        assert result.available is False
+        assert result.container_id == ""
+
+    @pytest.mark.asyncio
+    async def test_provision_docker_error_returns_available_false(self, tmp_path: Path):
+        """Any unexpected exception → SandboxResult with available=False."""
+        pool = MagicMock()
+        pool.checkout = AsyncMock(side_effect=RuntimeError("docker daemon not found"))
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                result = await SandboxProvisioner().provision(tmp_path)
+
+        assert result is not None
+        assert result.available is False
+
+    @pytest.mark.asyncio
+    async def test_provision_stack_hint_overrides_detection(self, tmp_path: Path):
+        """stack_hint bypasses file-existence detection."""
+        pool = _mock_pool()
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                provisioner = SandboxProvisioner()
+                with patch.object(provisioner, "_bind_workspace", new_callable=AsyncMock):
+                    result = await provisioner.provision(tmp_path, stack_hint="node")
+
+        assert result is not None
+        assert result.stack == "node"
+        assert result.image == "node:20-slim"
+
+    @pytest.mark.asyncio
+    async def test_provision_unknown_stack_uses_ubuntu(self, tmp_path: Path):
+        """Empty workspace → unknown stack → ubuntu:22.04 image."""
+        pool = _mock_pool()
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                provisioner = SandboxProvisioner()
+                with patch.object(provisioner, "_bind_workspace", new_callable=AsyncMock):
+                    result = await provisioner.provision(tmp_path)
+
+        assert result is not None
+        assert result.stack == "unknown"
+        assert result.image == "ubuntu:22.04"
+
+    @pytest.mark.asyncio
+    async def test_provision_go_workspace(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "go.mod")
+        pool = _mock_pool()
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                provisioner = SandboxProvisioner()
+                with patch.object(provisioner, "_bind_workspace", new_callable=AsyncMock):
+                    result = await provisioner.provision(ws)
+
+        assert result is not None
+        assert result.stack == "go"
+        assert result.image == "golang:1.22-alpine"
+
+    @pytest.mark.asyncio
+    async def test_provision_rust_workspace(self, tmp_path: Path):
+        ws = _make_workspace(tmp_path, "Cargo.toml")
+        pool = _mock_pool()
+
+        with patch("phalanx.ci_fixer.sandbox.settings", self._mock_settings()):
+            with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+                provisioner = SandboxProvisioner()
+                with patch.object(provisioner, "_bind_workspace", new_callable=AsyncMock):
+                    result = await provisioner.provision(ws)
+
+        assert result is not None
+        assert result.stack == "rust"
+        assert result.image == "rust:1.77-slim"
+
+
+class TestSandboxProvisionerRelease:
+    @pytest.mark.asyncio
+    async def test_release_no_op_when_no_container_id(self, tmp_path: Path):
+        """release() with empty container_id is a no-op — no pool call."""
+        result = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path=str(tmp_path),
+            container_id="",
+        )
+        pool = MagicMock()
+        pool.checkin = AsyncMock()
+
+        with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+            await SandboxProvisioner().release(result)
+
+        pool.checkin.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_release_calls_pool_checkin(self, tmp_path: Path):
+        """release() with container_id → pool.checkin called."""
+        result = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="phalanx-sandbox-python:latest",
+            workspace_path=str(tmp_path),
+            container_id="ctr-abc123",
+        )
+        pool = MagicMock()
+        pool.checkin = AsyncMock()
+
+        with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+            await SandboxProvisioner().release(result)
+
+        pool.checkin.assert_awaited_once()
+
+    @pytest.mark.asyncio
+    async def test_release_swallows_pool_error(self, tmp_path: Path):
+        """pool.checkin raises → release() swallows the error."""
+        result = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="img",
+            workspace_path=str(tmp_path),
+            container_id="ctr-abc123",
+        )
+        pool = MagicMock()
+        pool.checkin = AsyncMock(side_effect=RuntimeError("pool gone"))
+
+        with patch("phalanx.ci_fixer.sandbox.get_sandbox_pool", AsyncMock(return_value=pool)):
+            await SandboxProvisioner().release(result)  # must not raise
+
+
+# ── SandboxResult dataclass ───────────────────────────────────────────────────
+
+
+class TestSandboxResult:
+    def test_sandbox_result_extra_defaults_empty(self):
+        r = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path="/tmp/ws",
+        )
+        assert r.extra == {}
+
+    def test_sandbox_result_available_default(self):
+        r = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path="/tmp/ws",
+        )
+        assert r.available is True
+
+    def test_sandbox_result_available_can_be_false(self):
+        r = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path="/tmp/ws",
+            available=False,
+        )
+        assert r.available is False
+
+    def test_sandbox_result_container_id_default_empty(self):
+        r = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path="/tmp/ws",
+        )
+        assert r.container_id == ""
+
+    def test_sandbox_result_mount_path_default(self):
+        r = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path="/tmp/ws",
+        )
+        assert r.mount_path == "/workspace"
+
+    def test_sandbox_result_container_id_set(self):
+        r = SandboxResult(
+            sandbox_id="phalanx-sandbox-abc12345",
+            stack="python",
+            image="python:3.12-slim",
+            workspace_path="/tmp/ws",
+            container_id="abc123def456",
+        )
+        assert r.container_id == "abc123def456"
diff --git a/tests/unit/test_ci_fixer_success_path.py b/tests/unit/test_ci_fixer_success_path.py
index b9998625..9c495a12 100644
--- a/tests/unit/test_ci_fixer_success_path.py
+++ b/tests/unit/test_ci_fixer_success_path.py
@@ -17,14 +17,12 @@
 from __future__ import annotations
 
 import json
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
 from phalanx.agents.ci_fixer import CIFixerAgent
 
-
 # ── helpers ────────────────────────────────────────────────────────────────────
 
 
@@ -112,13 +110,17 @@ async def mock_execute(_stmt):
 
     mock_flaky = MagicMock()
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="some log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed_with_errors), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[mock_flaky]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=True), \
-         patch.object(agent, "_mark_failed", new_callable=AsyncMock) as mock_mark:
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="some log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed_with_errors),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(
+            agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[mock_flaky]
+        ),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=True),
+        patch.object(agent, "_mark_failed", new_callable=AsyncMock) as mock_mark,
+    ):
         result = await agent._execute_inner()
 
     assert result.success is False
@@ -165,16 +167,18 @@ async def mock_execute(_stmt):
     )
     low_conf_plan = FixPlan(confidence="low", root_cause="can't fix this")
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="some log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed_with_errors), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True), \
-         patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst, \
-         patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock) as mock_mark, \
-         patch("phalanx.ci_fixer.analyst.FixPlan"):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="some log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed_with_errors),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=True),
+        patch("phalanx.agents.ci_fixer.RootCauseAnalyst") as MockAnalyst,
+        patch.object(agent, "_mark_failed_with_fields", new_callable=AsyncMock),
+        patch("phalanx.ci_fixer.analyst.FixPlan"),
+    ):
         mock_analyst_inst = MagicMock()
         mock_analyst_inst.analyze.return_value = low_conf_plan
         MockAnalyst.return_value = mock_analyst_inst
@@ -221,14 +225,16 @@ async def mock_execute(_stmt):
         lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="unused")],
     )
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="some log"), \
-         patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed_with_errors), \
-         patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock), \
-         patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False), \
-         patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=False), \
-         patch.object(agent, "_mark_failed", new_callable=AsyncMock) as mock_mark:
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_fetch_logs", new_callable=AsyncMock, return_value="some log"),
+        patch("phalanx.agents.ci_fixer.parse_log", return_value=parsed_with_errors),
+        patch.object(agent, "_persist_fingerprint", new_callable=AsyncMock),
+        patch.object(agent, "_load_flaky_patterns", new_callable=AsyncMock, return_value=[]),
+        patch("phalanx.agents.ci_fixer.is_flaky_suppressed", return_value=False),
+        patch.object(agent, "_clone_repo", new_callable=AsyncMock, return_value=False),
+        patch.object(agent, "_mark_failed", new_callable=AsyncMock) as mock_mark,
+    ):
         result = await agent._execute_inner()
 
     assert result.success is False
@@ -350,8 +356,10 @@ async def test_open_draft_pr_auto_merge_calls_enable():
     mock_client.__aexit__ = AsyncMock(return_value=None)
     mock_client.post = AsyncMock(return_value=mock_resp)
 
-    with patch("httpx.AsyncClient", return_value=mock_client), \
-         patch.object(agent, "_enable_github_auto_merge", new_callable=AsyncMock) as mock_auto:
+    with (
+        patch("httpx.AsyncClient", return_value=mock_client),
+        patch.object(agent, "_enable_github_auto_merge", new_callable=AsyncMock) as mock_auto,
+    ):
         result = await agent._open_draft_pr(
             integration=integration,
             ci_run=ci_run,
@@ -411,8 +419,6 @@ async def test_enable_auto_merge_gql_error():
     gql_resp.json.return_value = {"errors": [{"message": "auto-merge not enabled"}]}
     gql_resp.text = '{"errors": [...]}'
 
-    call_count = {"n": 0}
-
     async def side_effect_client():
         pass
 
@@ -645,8 +651,10 @@ async def test_async_lookup_history_unreliable_returns_none():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.ci_fixer.should_use_history", return_value=False):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.ci_fixer.should_use_history", return_value=False),
+    ):
         result = await agent._async_lookup_fix_history("fp_hash_abc")
 
     assert result is None
@@ -670,8 +678,10 @@ async def test_async_lookup_history_corrupt_json():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.ci_fixer.should_use_history", return_value=True):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.ci_fixer.should_use_history", return_value=True),
+    ):
         result = await agent._async_lookup_fix_history("fp_hash_abc")
 
     assert result is None
@@ -682,7 +692,15 @@ async def test_async_lookup_history_hit():
     """Valid history → returns patch list."""
     agent = _make_agent()
 
-    patches = [{"path": "src/foo.py", "start_line": 1, "end_line": 2, "corrected_lines": ["x\n"], "reason": ""}]
+    patches = [
+        {
+            "path": "src/foo.py",
+            "start_line": 1,
+            "end_line": 2,
+            "corrected_lines": ["x\n"],
+            "reason": "",
+        }
+    ]
     mock_fp = MagicMock()
     mock_fp.success_count = 5
     mock_fp.failure_count = 1
@@ -696,8 +714,10 @@ async def test_async_lookup_history_hit():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.ci_fixer.should_use_history", return_value=True):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.ci_fixer.should_use_history", return_value=True),
+    ):
         result = await agent._async_lookup_fix_history("fp_hash_abc")
 
     assert result is not None
@@ -727,7 +747,11 @@ async def test_update_fingerprint_run_missing():
     with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx):
         await agent._update_fingerprint_on_success(
             fingerprint_hash="fp_abc",
-            patches=[FilePatch(path="src/foo.py", start_line=1, end_line=2, corrected_lines=["x\n"], reason="")],
+            patches=[
+                FilePatch(
+                    path="src/foo.py", start_line=1, end_line=2, corrected_lines=["x\n"], reason=""
+                )
+            ],
             tool_version="ruff 0.4.0",
             parsed_log=ParsedLog(tool="ruff"),
         )
@@ -745,7 +769,11 @@ async def test_update_fingerprint_exception_logged():
 
         await agent._update_fingerprint_on_success(
             fingerprint_hash="fp_abc",
-            patches=[FilePatch(path="src/foo.py", start_line=1, end_line=2, corrected_lines=["x\n"], reason="")],
+            patches=[
+                FilePatch(
+                    path="src/foo.py", start_line=1, end_line=2, corrected_lines=["x\n"], reason=""
+                )
+            ],
             tool_version="ruff 0.4.0",
             parsed_log=ParsedLog(tool="ruff"),
         )
@@ -790,7 +818,11 @@ async def mock_execute(_stmt):
     with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx):
         await agent._update_fingerprint_on_success(
             fingerprint_hash="fp_abc",
-            patches=[FilePatch(path="src/foo.py", start_line=1, end_line=2, corrected_lines=["x\n"], reason="")],
+            patches=[
+                FilePatch(
+                    path="src/foo.py", start_line=1, end_line=2, corrected_lines=["x\n"], reason=""
+                )
+            ],
             tool_version="ruff 0.4.0",
             parsed_log=ParsedLog(tool="ruff"),
         )
@@ -839,9 +871,11 @@ async def test_commit_to_safe_branch_no_changes(tmp_path):
     mock_repo.untracked_files = []
     mock_repo.remotes = []
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent._commit_to_safe_branch",
-               new_callable=AsyncMock,
-               return_value={"sha": None, "message": "no_changes"}):
+    with patch(
+        "phalanx.agents.ci_fixer.CIFixerAgent._commit_to_safe_branch",
+        new_callable=AsyncMock,
+        return_value={"sha": None, "message": "no_changes"},
+    ):
         result = await agent._commit_to_safe_branch(
             workspace=tmp_path,
             source_branch="main",
diff --git a/tests/unit/test_ci_fixer_unit.py b/tests/unit/test_ci_fixer_unit.py
index 576e0b16..b002d786 100644
--- a/tests/unit/test_ci_fixer_unit.py
+++ b/tests/unit/test_ci_fixer_unit.py
@@ -5,8 +5,6 @@
 
 from __future__ import annotations
 
-import pytest
-
 from phalanx.ci_fixer.classifier import classify_failure, extract_failing_files
 from phalanx.ci_fixer.events import CIFailureEvent
 from phalanx.ci_fixer.log_fetcher import _extract_failure_section, _truncate
@@ -322,8 +320,6 @@ def test_agent_role_is_ci_fixer(self):
 
 # ── RootCauseAnalyst — JSON parsing (mocked _call_llm) ────────────────────────
 
-from unittest.mock import patch  # noqa: E402
-
 
 class TestRootCauseAnalyst:
     """Tests for the RootCauseAnalyst LLM confirmation step (windowed API)."""
@@ -403,15 +399,17 @@ def test_patch_delta_stored(self, tmp_path):
         self._write_file(tmp_path, "src/foo.py")
         analyst = self._make_analyst(self._patch_response("src/foo.py"))
         plan = analyst.analyze(self._make_parsed_log(), tmp_path)
-        assert plan.patches[0].delta == -1   # removed 1 line (import os)
+        assert plan.patches[0].delta == -1  # removed 1 line (import os)
 
     # ── Low confidence / no patches ───────────────────────────────────────────
 
     def test_low_confidence_returns_empty_patches(self, tmp_path):
         self._write_file(tmp_path, "src/foo.py")
         import json as _j
-        response = _j.dumps({"confidence": "low", "root_cause": "unclear",
-                              "patches": [], "needs_new_test": False})
+
+        response = _j.dumps(
+            {"confidence": "low", "root_cause": "unclear", "patches": [], "needs_new_test": False}
+        )
         analyst = self._make_analyst(response)
         plan = analyst.analyze(self._make_parsed_log(), tmp_path)
         assert plan.confidence == "low"
@@ -437,17 +435,23 @@ def test_patch_for_unknown_file_rejected(self, tmp_path):
         """LLM returns a patch for a file we never sent → rejected → no actionable patches."""
         self._write_file(tmp_path, "src/foo.py")
         import json as _j
-        response = _j.dumps({
-            "confidence": "high",
-            "root_cause": "x",
-            "patches": [{
-                "path": "src/invented_file.py",
-                "start_line": 1, "end_line": 3,
-                "corrected_lines": ["x = 1\n"],
-                "reason": "invented",
-            }],
-            "needs_new_test": False,
-        })
+
+        response = _j.dumps(
+            {
+                "confidence": "high",
+                "root_cause": "x",
+                "patches": [
+                    {
+                        "path": "src/invented_file.py",
+                        "start_line": 1,
+                        "end_line": 3,
+                        "corrected_lines": ["x = 1\n"],
+                        "reason": "invented",
+                    }
+                ],
+                "needs_new_test": False,
+            }
+        )
         analyst = self._make_analyst(response)
         plan = analyst.analyze(self._make_parsed_log(), tmp_path)
         # All patches rejected → downgraded to low
@@ -458,17 +462,23 @@ def test_patch_for_test_file_rejected(self, tmp_path):
         """Patches targeting test files are always rejected."""
         self._write_file(tmp_path, "tests/test_foo.py")
         import json as _j
-        response = _j.dumps({
-            "confidence": "high",
-            "root_cause": "x",
-            "patches": [{
-                "path": "tests/test_foo.py",
-                "start_line": 1, "end_line": 3,
-                "corrected_lines": ["x = 1\n"],
-                "reason": "bad",
-            }],
-            "needs_new_test": False,
-        })
+
+        response = _j.dumps(
+            {
+                "confidence": "high",
+                "root_cause": "x",
+                "patches": [
+                    {
+                        "path": "tests/test_foo.py",
+                        "start_line": 1,
+                        "end_line": 3,
+                        "corrected_lines": ["x = 1\n"],
+                        "reason": "bad",
+                    }
+                ],
+                "needs_new_test": False,
+            }
+        )
         parsed = self._make_parsed_log(file="tests/test_foo.py")
         analyst = self._make_analyst(response)
         plan = analyst.analyze(parsed, tmp_path)
@@ -478,19 +488,25 @@ def test_patch_delta_too_large_rejected(self, tmp_path):
         """corrected_lines that differ by > MAX_LINE_DELTA from the window → rejected."""
         self._write_file(tmp_path, "src/foo.py")
         import json as _j
+
         # Window is 5 lines; returning 50 lines → delta = 45 → rejected
         big_lines = [f"line {i}\n" for i in range(50)]
-        response = _j.dumps({
-            "confidence": "high",
-            "root_cause": "x",
-            "patches": [{
-                "path": "src/foo.py",
-                "start_line": 1, "end_line": len(self._FILE_LINES),
-                "corrected_lines": big_lines,
-                "reason": "too big",
-            }],
-            "needs_new_test": False,
-        })
+        response = _j.dumps(
+            {
+                "confidence": "high",
+                "root_cause": "x",
+                "patches": [
+                    {
+                        "path": "src/foo.py",
+                        "start_line": 1,
+                        "end_line": len(self._FILE_LINES),
+                        "corrected_lines": big_lines,
+                        "reason": "too big",
+                    }
+                ],
+                "needs_new_test": False,
+            }
+        )
         analyst = self._make_analyst(response)
         plan = analyst.analyze(self._make_parsed_log(), tmp_path)
         assert len(plan.patches) == 0
diff --git a/tests/unit/test_ci_fixer_verifier.py b/tests/unit/test_ci_fixer_verifier.py
new file mode 100644
index 00000000..df7920e6
--- /dev/null
+++ b/tests/unit/test_ci_fixer_verifier.py
@@ -0,0 +1,453 @@
+"""
+Tests for phalanx.ci_fixer.verifier — VerifierAgent.
+
+Coverage targets:
+  - verify(): all 4 verdicts (passed, failed, skipped, timeout)
+  - verify(): unknown stack → skipped (no profile)
+  - verify(): python with pytest infrastructure → prepends pytest step
+  - verify(): python without pytest → ruff only
+  - verify(): first failing step short-circuits remaining steps
+  - verify(): all steps timeout → verdict=timeout
+  - _get_profile(): known and unknown stacks
+  - _has_pytest(): detects pyproject.toml, pytest.ini, setup.cfg, absent
+  - _run_cmd(): FileNotFoundError → VerificationStep with tool-not-found output
+  - VerificationStep dataclass defaults
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from phalanx.ci_fixer.context import VerificationResult
+from phalanx.ci_fixer.sandbox import SandboxResult
+from phalanx.ci_fixer.verifier import VerificationStep, VerifierAgent
+
+if TYPE_CHECKING:
+    from pathlib import Path
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+
+def _make_proc(
+    returncode: int = 0,
+    stdout: bytes = b"",
+    stderr: bytes = b"",
+    timeout: bool = False,
+    not_found: bool = False,
+) -> MagicMock:
+    """Return a mock asyncio.Process."""
+    proc = MagicMock()
+    proc.returncode = returncode
+    proc.kill = MagicMock()
+    proc.wait = AsyncMock()
+    if timeout:
+        proc.communicate = AsyncMock(side_effect=TimeoutError())
+    elif not_found:
+        proc.communicate = AsyncMock(side_effect=FileNotFoundError())
+    else:
+        proc.communicate = AsyncMock(return_value=(stdout, stderr))
+    return proc
+
+
+def _make_workspace(tmp_path: Path, *filenames: str) -> Path:
+    for name in filenames:
+        (tmp_path / name).touch()
+    return tmp_path
+
+
+# ── _has_pytest ───────────────────────────────────────────────────────────────
+
+
+class TestHasPytest:
+    def test_detects_pyproject_toml(self, tmp_path: Path):
+        _make_workspace(tmp_path, "pyproject.toml")
+        assert VerifierAgent()._has_pytest(tmp_path) is True
+
+    def test_detects_pytest_ini(self, tmp_path: Path):
+        _make_workspace(tmp_path, "pytest.ini")
+        assert VerifierAgent()._has_pytest(tmp_path) is True
+
+    def test_detects_setup_cfg(self, tmp_path: Path):
+        _make_workspace(tmp_path, "setup.cfg")
+        assert VerifierAgent()._has_pytest(tmp_path) is True
+
+    def test_absent(self, tmp_path: Path):
+        assert VerifierAgent()._has_pytest(tmp_path) is False
+
+
+# ── _get_profile ──────────────────────────────────────────────────────────────
+
+
+class TestGetProfile:
+    def test_python_profile(self):
+        profile = VerifierAgent()._get_profile("python")
+        assert len(profile) >= 1
+        labels = [label for label, _ in profile]
+        assert "ruff_full" in labels
+
+    def test_node_profile(self):
+        profile = VerifierAgent()._get_profile("node")
+        assert any("npm" in " ".join(cmd) for _, cmd in profile)
+
+    def test_go_profile(self):
+        profile = VerifierAgent()._get_profile("go")
+        assert any("go" in cmd[0] for _, cmd in profile)
+
+    def test_rust_profile(self):
+        profile = VerifierAgent()._get_profile("rust")
+        assert any("cargo" in cmd[0] for _, cmd in profile)
+
+    def test_unknown_stack_empty_profile(self):
+        assert VerifierAgent()._get_profile("unknown") == []
+
+
+# ── verify() — core verdicts ──────────────────────────────────────────────────
+
+
+class TestVerifyVerdicts:
+    @pytest.mark.asyncio
+    async def test_verify_skipped_unknown_stack(self, tmp_path: Path):
+        """Unknown stack → no profile → verdict=skipped immediately."""
+        result = await VerifierAgent().verify(
+            workspace_path=tmp_path,
+            stack="unknown",
+            sandbox_result=None,
+            timeout_seconds=30,
+        )
+        assert result.verdict == "skipped"
+        assert isinstance(result, VerificationResult)
+
+    @pytest.mark.asyncio
+    async def test_verify_passed_python_no_pytest(self, tmp_path: Path):
+        """Python workspace without pytest infra → ruff only → exit 0 → passed."""
+        proc = _make_proc(returncode=0, stdout=b"All checks passed", stderr=b"")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="python",
+                sandbox_result=None,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "passed"
+
+    @pytest.mark.asyncio
+    async def test_verify_passed_python_with_pytest(self, tmp_path: Path):
+        """Python workspace with pyproject.toml → pytest + ruff → both pass."""
+        _make_workspace(tmp_path, "pyproject.toml")
+        proc = _make_proc(returncode=0, stdout=b"passed", stderr=b"")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="python",
+                sandbox_result=None,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "passed"
+
+    @pytest.mark.asyncio
+    async def test_verify_failed_on_first_step(self, tmp_path: Path):
+        """First step fails → verdict=failed, short-circuit."""
+        proc = _make_proc(returncode=1, stdout=b"", stderr=b"FAILED test_foo.py")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="python",
+                sandbox_result=None,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "failed"
+        assert "FAILED" in result.output
+
+    @pytest.mark.asyncio
+    async def test_verify_timeout_single_step(self, tmp_path: Path):
+        """Single step times out → all_timed_out → verdict=timeout."""
+        proc = _make_proc(timeout=True)
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="go",
+                sandbox_result=None,
+                timeout_seconds=1,
+            )
+
+        assert result.verdict == "timeout"
+
+    @pytest.mark.asyncio
+    async def test_verify_timeout_step_does_not_block_other_steps(self, tmp_path: Path):
+        """Timeout on one step is skipped; if remaining steps pass → passed."""
+        _make_workspace(tmp_path, "pyproject.toml")
+
+        call_count = 0
+
+        async def fake_exec(*args, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                # First call (pytest) times out
+                return _make_proc(timeout=True)
+            # Subsequent calls (ruff) pass
+            return _make_proc(returncode=0, stdout=b"clean", stderr=b"")
+
+        with patch("asyncio.create_subprocess_exec", side_effect=fake_exec):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="python",
+                sandbox_result=None,
+                timeout_seconds=1,
+            )
+
+        # ruff passed even though pytest timed out → overall passed
+        assert result.verdict == "passed"
+
+    @pytest.mark.asyncio
+    async def test_verify_go_passed(self, tmp_path: Path):
+        proc = _make_proc(returncode=0, stdout=b"ok  example.com/pkg", stderr=b"")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="go",
+                sandbox_result=None,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "passed"
+
+    @pytest.mark.asyncio
+    async def test_verify_rust_failed(self, tmp_path: Path):
+        proc = _make_proc(returncode=1, stdout=b"", stderr=b"error[E0308]: mismatched types")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="rust",
+                sandbox_result=None,
+                timeout_seconds=30,
+            )
+
+        assert result.verdict == "failed"
+
+    @pytest.mark.asyncio
+    async def test_verify_cmd_run_populated(self, tmp_path: Path):
+        """cmd_run contains the command that was executed."""
+        proc = _make_proc(returncode=0)
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            result = await VerifierAgent().verify(
+                workspace_path=tmp_path,
+                stack="go",
+                sandbox_result=None,
+                timeout_seconds=30,
+            )
+
+        assert result.cmd_run != ""
+        assert "go" in result.cmd_run
+
+
+# ── _run_cmd ──────────────────────────────────────────────────────────────────
+
+
+class TestRunCmd:
+    @pytest.mark.asyncio
+    async def test_run_cmd_tool_not_found(self, tmp_path: Path):
+        """FileNotFoundError → VerificationStep with tool-not-found message, no raise."""
+        with patch(
+            "asyncio.create_subprocess_exec",
+            side_effect=FileNotFoundError("notool"),
+        ):
+            step = await VerifierAgent()._run_cmd(
+                label="test_label",
+                cmd_args=["notool", "--check"],
+                cwd=tmp_path,
+                timeout_seconds=30,
+            )
+
+        assert step.exit_code == -1
+        assert "not found" in step.output
+        assert step.timed_out is False
+
+    @pytest.mark.asyncio
+    async def test_run_cmd_success(self, tmp_path: Path):
+        proc = _make_proc(returncode=0, stdout=b"clean", stderr=b"")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            step = await VerifierAgent()._run_cmd(
+                label="ruff_full",
+                cmd_args=["ruff", "check", "."],
+                cwd=tmp_path,
+                timeout_seconds=30,
+            )
+
+        assert step.exit_code == 0
+        assert step.timed_out is False
+        assert "clean" in step.output
+
+    @pytest.mark.asyncio
+    async def test_run_cmd_timeout(self, tmp_path: Path):
+        proc = _make_proc(timeout=True)
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            step = await VerifierAgent()._run_cmd(
+                label="slow_check",
+                cmd_args=["slow", "cmd"],
+                cwd=tmp_path,
+                timeout_seconds=1,
+            )
+
+        assert step.timed_out is True
+        assert step.exit_code == -1
+        proc.kill.assert_called_once()
+
+
+# ── Container exec path ───────────────────────────────────────────────────────
+
+
+def _make_sandbox_result(container_id: str = "") -> SandboxResult:
+    return SandboxResult(
+        sandbox_id="phalanx-sandbox-test1234",
+        stack="python",
+        image="python:3.12-slim",
+        workspace_path="/tmp/ws",
+        available=True,
+        container_id=container_id,
+    )
+
+
+class TestVerifierContainerExec:
+    @pytest.mark.asyncio
+    async def test_run_cmd_with_container_id_uses_docker_exec(self, tmp_path):
+        """When container_id is set, command is wrapped with docker exec."""
+        proc = _make_proc(returncode=0, stdout=b"clean", stderr=b"")
+
+        captured_args = []
+
+        async def fake_exec(*args, **kwargs):
+            captured_args.extend(args)
+            return proc
+
+        with patch("asyncio.create_subprocess_exec", side_effect=fake_exec):
+            step = await VerifierAgent()._run_cmd(
+                label="ruff_full",
+                cmd_args=["ruff", "check", "."],
+                cwd=tmp_path,
+                timeout_seconds=30,
+                container_id="ctr-abc123",
+            )
+
+        assert step.exit_code == 0
+        assert "docker" in captured_args
+        assert "ctr-abc123" in captured_args
+        assert "ruff" in captured_args
+
+    @pytest.mark.asyncio
+    async def test_run_cmd_without_container_id_runs_locally(self, tmp_path):
+        """When container_id is empty, runs locally (original behaviour)."""
+        proc = _make_proc(returncode=0, stdout=b"ok", stderr=b"")
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            step = await VerifierAgent()._run_cmd(
+                label="go_test",
+                cmd_args=["go", "test", "./..."],
+                cwd=tmp_path,
+                timeout_seconds=30,
+                container_id="",
+            )
+
+        assert step.exit_code == 0
+
+    @pytest.mark.asyncio
+    async def test_verify_passes_container_id_to_run_cmd(self, tmp_path):
+        """verify() extracts container_id from sandbox_result and threads it through."""
+        proc = _make_proc(returncode=0, stdout=b"ok", stderr=b"")
+        captured_container_ids = []
+
+        original_run_cmd = VerifierAgent._run_cmd
+
+        async def recording_run_cmd(self, label, cmd_args, cwd, timeout_seconds, container_id=""):
+            captured_container_ids.append(container_id)
+            return await original_run_cmd(
+                self, label, cmd_args, cwd, timeout_seconds, container_id=container_id
+            )
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            with patch.object(VerifierAgent, "_run_cmd", recording_run_cmd):
+                await VerifierAgent().verify(
+                    workspace_path=tmp_path,
+                    stack="go",
+                    sandbox_result=_make_sandbox_result(container_id="ctr-xyz"),
+                    timeout_seconds=30,
+                )
+
+        assert all(cid == "ctr-xyz" for cid in captured_container_ids)
+
+    @pytest.mark.asyncio
+    async def test_verify_no_container_id_when_sandbox_none(self, tmp_path):
+        """sandbox_result=None → container_id="" → local subprocess path."""
+        proc = _make_proc(returncode=0, stdout=b"ok", stderr=b"")
+        captured_container_ids = []
+
+        original_run_cmd = VerifierAgent._run_cmd
+
+        async def recording_run_cmd(self, label, cmd_args, cwd, timeout_seconds, container_id=""):
+            captured_container_ids.append(container_id)
+            return await original_run_cmd(
+                self, label, cmd_args, cwd, timeout_seconds, container_id=container_id
+            )
+
+        with patch("asyncio.create_subprocess_exec", return_value=proc):
+            with patch.object(VerifierAgent, "_run_cmd", recording_run_cmd):
+                await VerifierAgent().verify(
+                    workspace_path=tmp_path,
+                    stack="go",
+                    sandbox_result=None,
+                    timeout_seconds=30,
+                )
+
+        assert all(cid == "" for cid in captured_container_ids)
+
+    def test_container_id_helper_no_sandbox(self):
+        assert VerifierAgent()._container_id(None) == ""
+
+    def test_container_id_helper_with_container(self):
+        sr = _make_sandbox_result(container_id="ctr-123")
+        assert VerifierAgent()._container_id(sr) == "ctr-123"
+
+    def test_container_id_helper_empty_container(self):
+        sr = _make_sandbox_result(container_id="")
+        assert VerifierAgent()._container_id(sr) == ""
+
+
+# ── VerificationStep dataclass ────────────────────────────────────────────────
+
+
+class TestVerificationStep:
+    def test_defaults(self):
+        step = VerificationStep(
+            label="ruff",
+            cmd="ruff check .",
+            exit_code=0,
+            output="clean",
+            elapsed_seconds=1.2,
+        )
+        assert step.timed_out is False
+
+    def test_timed_out_flag(self):
+        step = VerificationStep(
+            label="pytest",
+            cmd="pytest",
+            exit_code=-1,
+            output="",
+            elapsed_seconds=120.0,
+            timed_out=True,
+        )
+        assert step.timed_out is True
diff --git a/tests/unit/test_ci_validator_unit.py b/tests/unit/test_ci_validator_unit.py
index bbd9b4c7..f22871fe 100644
--- a/tests/unit/test_ci_validator_unit.py
+++ b/tests/unit/test_ci_validator_unit.py
@@ -8,10 +8,8 @@
 
 from unittest.mock import MagicMock, patch
 
-import pytest
-
 from phalanx.ci_fixer.log_parser import LintError, ParsedLog, TestFailure, TypeError
-from phalanx.ci_fixer.validator import ValidationResult, validate_fix
+from phalanx.ci_fixer.validator import validate_fix
 
 
 def _parsed(tool: str, **kwargs) -> ParsedLog:
@@ -27,60 +25,87 @@ def _mock_run(self, returncode: int, stdout: str = "", stderr: str = ""):
         return result
 
     def test_ruff_pass(self, tmp_path):
-        parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
+        parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0, "All good")):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
         assert result.tool == "ruff"
 
     def test_ruff_fail(self, tmp_path):
-        parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
-        with patch("subprocess.run", return_value=self._mock_run(1, "", "phalanx/foo.py:1:1: F401")):
+        parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
+        with patch(
+            "subprocess.run", return_value=self._mock_run(1, "", "phalanx/foo.py:1:1: F401")
+        ):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is False
 
     def test_mypy_pass(self, tmp_path):
-        parsed = _parsed("mypy", type_errors=[
-            TypeError(file="phalanx/foo.py", line=5, col=0, message="type error")
-        ])
+        parsed = _parsed(
+            "mypy",
+            type_errors=[TypeError(file="phalanx/foo.py", line=5, col=0, message="type error")],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0)):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
         assert result.tool == "mypy"
 
     def test_pytest_pass(self, tmp_path):
-        parsed = _parsed("pytest", test_failures=[
-            TestFailure(test_id="tests/unit/test_foo.py::test_bar", file="tests/unit/test_foo.py", message="")
-        ])
+        parsed = _parsed(
+            "pytest",
+            test_failures=[
+                TestFailure(
+                    test_id="tests/unit/test_foo.py::test_bar",
+                    file="tests/unit/test_foo.py",
+                    message="",
+                )
+            ],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0)):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
         assert result.tool == "pytest"
 
     def test_pytest_fail(self, tmp_path):
-        parsed = _parsed("pytest", test_failures=[
-            TestFailure(test_id="tests/unit/test_foo.py::test_bar", file="tests/unit/test_foo.py", message="")
-        ])
+        parsed = _parsed(
+            "pytest",
+            test_failures=[
+                TestFailure(
+                    test_id="tests/unit/test_foo.py::test_bar",
+                    file="tests/unit/test_foo.py",
+                    message="",
+                )
+            ],
+        )
         with patch("subprocess.run", return_value=self._mock_run(1, "", "FAILED")):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is False
 
     def test_tsc_pass(self, tmp_path):
-        parsed = _parsed("tsc", type_errors=[
-            TypeError(file="src/foo.ts", line=1, col=1, message="TS2345: error")
-        ])
+        parsed = _parsed(
+            "tsc",
+            type_errors=[TypeError(file="src/foo.ts", line=1, col=1, message="TS2345: error")],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0)):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
 
     def test_eslint_pass(self, tmp_path):
-        parsed = _parsed("eslint", lint_errors=[
-            LintError(file="src/foo.js", line=1, col=1, code="eslint", message="no-unused-vars")
-        ])
+        parsed = _parsed(
+            "eslint",
+            lint_errors=[
+                LintError(file="src/foo.js", line=1, col=1, code="eslint", message="no-unused-vars")
+            ],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0)):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
@@ -92,9 +117,12 @@ def test_unknown_tool_skips_validation(self, tmp_path):
         assert "skipped" in result.output
 
     def test_tool_not_found_returns_fail(self, tmp_path):
-        parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
+        parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
         with patch("subprocess.run", side_effect=FileNotFoundError):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is False
@@ -102,53 +130,79 @@ def test_tool_not_found_returns_fail(self, tmp_path):
 
     def test_timeout_returns_fail(self, tmp_path):
         import subprocess
-        parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
-        with patch("subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="ruff", timeout=120)):
+
+        parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
+        with patch(
+            "subprocess.run", side_effect=subprocess.TimeoutExpired(cmd="ruff", timeout=120)
+        ):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is False
         assert "timed out" in result.output
 
     def test_mypy_pass(self, tmp_path):
-        parsed = _parsed("mypy", type_errors=[
-            TypeError(file="phalanx/foo.py", line=5, col=0, message="type error")
-        ])
+        parsed = _parsed(
+            "mypy",
+            type_errors=[TypeError(file="phalanx/foo.py", line=5, col=0, message="type error")],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0)):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
         assert result.tool == "mypy"
 
     def test_mypy_fail(self, tmp_path):
-        parsed = _parsed("mypy", type_errors=[
-            TypeError(file="phalanx/foo.py", line=5, col=0, message="type error")
-        ])
+        parsed = _parsed(
+            "mypy",
+            type_errors=[TypeError(file="phalanx/foo.py", line=5, col=0, message="type error")],
+        )
         with patch("subprocess.run", return_value=self._mock_run(1, "", "phalanx/foo.py:5: error")):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is False
 
     def test_pytest_pass(self, tmp_path):
-        parsed = _parsed("pytest", test_failures=[
-            TestFailure(test_id="tests/unit/test_foo.py::test_bar", file="tests/unit/test_foo.py", message="")
-        ])
+        parsed = _parsed(
+            "pytest",
+            test_failures=[
+                TestFailure(
+                    test_id="tests/unit/test_foo.py::test_bar",
+                    file="tests/unit/test_foo.py",
+                    message="",
+                )
+            ],
+        )
         with patch("subprocess.run", return_value=self._mock_run(0)):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is True
         assert result.tool == "pytest"
 
     def test_pytest_fail(self, tmp_path):
-        parsed = _parsed("pytest", test_failures=[
-            TestFailure(test_id="tests/unit/test_foo.py::test_bar", file="tests/unit/test_foo.py", message="")
-        ])
+        parsed = _parsed(
+            "pytest",
+            test_failures=[
+                TestFailure(
+                    test_id="tests/unit/test_foo.py::test_bar",
+                    file="tests/unit/test_foo.py",
+                    message="",
+                )
+            ],
+        )
         with patch("subprocess.run", return_value=self._mock_run(1, "", "FAILED")):
             result = validate_fix(parsed, tmp_path)
         assert result.passed is False
 
     def test_tool_version_captured(self, tmp_path):
         """tool_version is populated from --version output."""
-        parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
+        parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
+
         # First call = --version, subsequent = ruff check
         def side_effect(cmd, **kwargs):
             if "--version" in cmd:
@@ -163,14 +217,19 @@ def side_effect(cmd, **kwargs):
 
     def test_regression_check_fires_on_new_error(self, tmp_path):
         """Regression check catches errors introduced into other files."""
-        from phalanx.ci_fixer.log_parser import parse_log
-
-        original = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
-        fixed_parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
+
+        original = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
+        fixed_parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
         # Primary check passes (foo.py is clean), but broad check finds a NEW error in bar.py
         call_count = {"n": 0}
 
@@ -192,12 +251,20 @@ def side_effect(cmd, **kwargs):
 
     def test_regression_check_skips_pre_existing_errors(self, tmp_path):
         """Pre-existing errors in original_parsed are not counted as regressions."""
-        original = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/bar.py", line=5, col=1, code="E501", message="line too long")
-        ])
-        fixed_parsed = _parsed("ruff", lint_errors=[
-            LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
-        ])
+        original = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(
+                    file="phalanx/bar.py", line=5, col=1, code="E501", message="line too long"
+                )
+            ],
+        )
+        fixed_parsed = _parsed(
+            "ruff",
+            lint_errors=[
+                LintError(file="phalanx/foo.py", line=1, col=1, code="F401", message="unused")
+            ],
+        )
 
         def side_effect(cmd, **kwargs):
             if "--version" in cmd:
diff --git a/tests/unit/test_ci_webhooks_unit.py b/tests/unit/test_ci_webhooks_unit.py
index b714690d..60ba6dc7 100644
--- a/tests/unit/test_ci_webhooks_unit.py
+++ b/tests/unit/test_ci_webhooks_unit.py
@@ -15,6 +15,7 @@
 from phalanx.api.routes.ci_webhooks import (
     _parse_repo_name,
     _verify_buildkite_signature,
+    _verify_circleci_signature,
     _verify_github_signature,
 )
 from phalanx.ci_fixer.log_fetcher import (
@@ -222,36 +223,236 @@ def test_head_preserved(self):
         assert "START_MARKER" in result
 
 
-# ── Stub fetchers (CircleCI, Jenkins) ─────────────────────────────────────────
+# ── CircleCI / Jenkins fetcher helpers ─────────────────────────────────────────
 
 from phalanx.ci_fixer.events import CIFailureEvent  # noqa: E402
 
 
-def _make_event():
+def _make_circleci_event(build_id: str = "wf-uuid-1234") -> CIFailureEvent:
     return CIFailureEvent(
         provider="circleci",
         repo_full_name="acme/api",
-        branch="main",
-        commit_sha="abc",
-        build_id="1",
-        build_url="https://ci.example.com/1",
+        branch="fix/my-branch",
+        commit_sha="deadbeef",
+        build_id=build_id,
+        build_url="https://app.circleci.com/pipelines/github/acme/api/1/workflows/wf-uuid-1234",
     )
 
 
-class TestStubFetchers:
+def _make_circleci_client(
+    jobs_payload: dict | None = None,
+    steps_payload: dict | None = None,
+    log_content: str = "",
+    log_is_json: bool = False,
+    workflow_jobs_fail: bool = False,
+    steps_fail: bool = False,
+    log_fetch_fail: bool = False,
+):
+    """Build a mock httpx.AsyncClient for CircleCI API calls."""
+    from unittest.mock import AsyncMock, MagicMock
+
+    client = MagicMock()
+    client.__aenter__ = AsyncMock(return_value=client)
+    client.__aexit__ = AsyncMock(return_value=False)
+
+    responses: list = []
+
+    # Call 1: GET /workflow/{id}/job
+    if workflow_jobs_fail:
+        job_resp = MagicMock()
+        job_resp.raise_for_status.side_effect = Exception("403 Forbidden")
+    else:
+        job_resp = MagicMock()
+        job_resp.raise_for_status = MagicMock()
+        job_resp.json.return_value = jobs_payload or {
+            "items": [
+                {"job_number": 42, "name": "test-job", "status": "failed"},
+            ]
+        }
+    responses.append(job_resp)
+
+    if not workflow_jobs_fail:
+        # Call 2: GET /project/{slug}/job/{number}/steps
+        if steps_fail:
+            steps_resp = MagicMock()
+            steps_resp.raise_for_status.side_effect = Exception("404 Not Found")
+        else:
+            steps_resp = MagicMock()
+            steps_resp.raise_for_status = MagicMock()
+            steps_resp.json.return_value = steps_payload or {
+                "items": [
+                    {
+                        "name": "Run tests",
+                        "actions": [
+                            {
+                                "exit_code": 1,
+                                "failed": True,
+                                "output_url": "https://circle-output.s3.amazonaws.com/out",
+                            }
+                        ],
+                    }
+                ]
+            }
+        responses.append(steps_resp)
+
+        if not steps_fail:
+            # Call 3: GET output_url
+            if log_fetch_fail:
+                log_resp = MagicMock()
+                log_resp.status_code = 500
+            else:
+                log_resp = MagicMock()
+                log_resp.status_code = 200
+                if log_is_json:
+                    log_resp.headers = {"content-type": "application/json"}
+                    log_resp.json.return_value = [{"message": log_content, "type": "out"}]
+                else:
+                    log_resp.headers = {"content-type": "text/plain"}
+                    log_resp.text = log_content
+            responses.append(log_resp)
+
+    client.get = AsyncMock(side_effect=responses)
+    return client
+
+
+class TestCircleCILogFetcher:
+    @pytest.mark.asyncio
+    async def test_fetch_failed_job_plain_text_log(self):
+        """Happy path: failed job with plain-text output → failure section returned."""
+        log_text = "Step 1\nStep 2\nError: assert failed\nStep 4"
+        client = _make_circleci_client(log_content=log_text)
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert "Error: assert failed" in result
+
+    @pytest.mark.asyncio
+    async def test_fetch_failed_job_json_log(self):
+        """CircleCI JSON log format: array of {message, type} objects."""
+        client = _make_circleci_client(
+            log_content="ruff: F401 unused import",
+            log_is_json=True,
+        )
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert "F401" in result
+
+    @pytest.mark.asyncio
+    async def test_fetch_no_failed_jobs(self):
+        """Workflow with no failed jobs → informative message."""
+        client = _make_circleci_client(jobs_payload={"items": []})
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert "no failed jobs" in result
+
+    @pytest.mark.asyncio
+    async def test_fetch_workflow_jobs_api_fails(self):
+        """GET /workflow/jobs raises → no logs retrieved."""
+        client = _make_circleci_client(workflow_jobs_fail=True)
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert "no failed jobs" in result or isinstance(result, str)
+
     @pytest.mark.asyncio
-    async def test_circleci_returns_string(self):
-        fetcher = CircleCILogFetcher()
-        result = await fetcher.fetch(_make_event(), "key")
+    async def test_fetch_steps_api_fails_gracefully(self):
+        """GET /job/{n}/steps raises → no logs retrieved for that job."""
+        client = _make_circleci_client(steps_fail=True)
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert isinstance(result, str)
+
+    @pytest.mark.asyncio
+    async def test_fetch_log_url_fails_gracefully(self):
+        """Output URL fetch returns 500 → no output for that step."""
+        client = _make_circleci_client(log_content="", log_fetch_fail=True)
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert isinstance(result, str)
+
+    @pytest.mark.asyncio
+    async def test_fetch_timedout_job_included(self):
+        """Jobs with status=timedout are treated as failed."""
+        client = _make_circleci_client(
+            jobs_payload={"items": [{"job_number": 7, "name": "slow-build", "status": "timedout"}]},
+            log_content="Timeout: job exceeded 10 minutes",
+        )
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
         assert isinstance(result, str)
-        assert len(result) > 0
 
+    @pytest.mark.asyncio
+    async def test_fetch_multiple_failed_jobs_limited_to_three(self):
+        """Up to 3 failed jobs are fetched; extras are silently dropped."""
+        jobs = [{"job_number": i, "name": f"job-{i}", "status": "failed"} for i in range(1, 6)]
+        # Build a client that returns jobs list, then step + log for each of the first 3
+        from unittest.mock import AsyncMock, MagicMock
+
+        client = MagicMock()
+        client.__aenter__ = AsyncMock(return_value=client)
+        client.__aexit__ = AsyncMock(return_value=False)
+
+        jobs_resp = MagicMock()
+        jobs_resp.raise_for_status = MagicMock()
+        jobs_resp.json.return_value = {"items": jobs}
+
+        def _make_steps_resp():
+            r = MagicMock()
+            r.raise_for_status = MagicMock()
+            r.json.return_value = {
+                "items": [
+                    {"name": "run", "actions": [{"exit_code": 1, "output_url": "https://s3/out"}]}
+                ]
+            }
+            return r
+
+        def _make_log_resp():
+            r = MagicMock()
+            r.status_code = 200
+            r.headers = {"content-type": "text/plain"}
+            r.text = "Error: something failed"
+            return r
+
+        # jobs + (steps + log) * 3 = 7 calls
+        responses = [jobs_resp]
+        for _ in range(3):
+            responses.append(_make_steps_resp())
+            responses.append(_make_log_resp())
+
+        client.get = AsyncMock(side_effect=responses)
+
+        with patch("phalanx.ci_fixer.log_fetcher.httpx.AsyncClient", return_value=client):
+            result = await CircleCILogFetcher().fetch(_make_circleci_event(), "tok")
+
+        assert isinstance(result, str)
+        # Exactly 3 job log sections (or fewer, combined into one string)
+        assert result.count("JOB:") <= 3
+
+
+class TestJenkinsLogFetcher:
     @pytest.mark.asyncio
     async def test_jenkins_returns_string(self):
-        fetcher = JenkinsLogFetcher()
-        e = _make_event()
-        e.provider = "jenkins"
-        result = await fetcher.fetch(e, "key")
+        event = CIFailureEvent(
+            provider="jenkins",
+            repo_full_name="acme/api",
+            branch="main",
+            commit_sha="abc",
+            build_id="1",
+            build_url="https://jenkins.example.com/job/1",
+        )
+        result = await JenkinsLogFetcher().fetch(event, "key")
         assert isinstance(result, str)
         assert len(result) > 0
 
@@ -551,7 +752,7 @@ def _make_app():
     from phalanx.api.routes.ci_webhooks import router
 
     app = FastAPI()
-    app.include_router(router)
+    app.include_router(router, prefix="/webhook")
     return app
 
 
@@ -668,16 +869,181 @@ def test_build_finished_dispatches(self):
         assert r.json()["status"] == "skipped"
 
 
-class TestStubWebhookRoutes:
+class TestJenkinsWebhookRoute:
     def setup_method(self):
         self.client = TestClient(_make_app())
 
-    def test_circleci_stub(self):
-        r = self.client.post("/webhook/circleci", content=b"{}")
-        assert r.status_code == 200
-        assert r.json()["status"] == "coming_soon"
-
     def test_jenkins_stub(self):
         r = self.client.post("/webhook/jenkins", content=b"{}")
         assert r.status_code == 200
         assert r.json()["status"] == "coming_soon"
+
+
+# ── _verify_circleci_signature ─────────────────────────────────────────────────
+
+
+class TestVerifyCircleCISignature:
+    def _make_sig(self, body: bytes, secret: str) -> str:
+        digest = hmac.new(secret.encode(), body, hashlib.sha256).hexdigest()
+        return f"v1={digest}"
+
+    def test_valid_signature(self):
+        body = b'{"type": "workflow-completed"}'
+        secret = "circle-secret"
+        sig = self._make_sig(body, secret)
+        assert _verify_circleci_signature(body, sig, secret) is True
+
+    def test_invalid_signature(self):
+        body = b'{"type": "workflow-completed"}'
+        assert _verify_circleci_signature(body, "v1=invalidsig", "secret") is False
+
+    def test_no_secret_always_passes(self):
+        assert _verify_circleci_signature(b"anything", "", "") is True
+        assert _verify_circleci_signature(b"anything", "v1=bad", "") is True
+
+    def test_tampered_body_fails(self):
+        body = b'{"type": "workflow-completed"}'
+        secret = "my-secret"
+        sig = self._make_sig(body, secret)
+        tampered = b'{"type": "job-completed"}'
+        assert _verify_circleci_signature(tampered, sig, secret) is False
+
+    def test_empty_signature_with_secret_fails(self):
+        assert _verify_circleci_signature(b"data", "", "some-secret") is False
+
+
+# ── CircleCI webhook route ─────────────────────────────────────────────────────
+
+
+def _circleci_payload(
+    event_type: str = "workflow-completed",
+    status: str = "failed",
+    branch: str = "fix/my-branch",
+    repo_url: str = "https://github.com/acme/api",
+    commit_sha: str = "abc123",
+    workflow_id: str = "wf-uuid-001",
+    pipeline_number: int = 10,
+    pr_author: str | None = "dev-user",
+) -> dict:
+    return {
+        "type": event_type,
+        "workflow": {
+            "id": workflow_id,
+            "name": "build-and-test",
+            "status": status,
+        },
+        "pipeline": {
+            "id": "pipe-uuid",
+            "number": pipeline_number,
+            "vcs": {
+                "origin_repository_url": repo_url,
+                "branch": branch,
+                "revision": commit_sha,
+                "commit": {
+                    "subject": "fix: update deps",
+                    "author": {"login": pr_author} if pr_author else {},
+                },
+            },
+        },
+        "project": {"id": "proj-uuid", "name": "api", "slug": "github/acme/api"},
+        "organization": {"name": "acme"},
+    }
+
+
+class TestCircleCIWebhookRoutes:
+    def setup_method(self):
+        self.client = TestClient(_make_app())
+
+    def _post(self, payload: dict, sig: str = "") -> object:
+        return self.client.post(
+            "/webhook/circleci",
+            content=_json.dumps(payload).encode(),
+            headers={
+                "circleci-signature": sig,
+                "content-type": "application/json",
+            },
+        )
+
+    def test_non_workflow_event_is_ignored(self):
+        r = self._post(_circleci_payload(event_type="job-completed"))
+        assert r.status_code == 200
+        assert r.json()["status"] == "ignored"
+
+    def test_successful_workflow_is_ignored(self):
+        r = self._post(_circleci_payload(status="success"))
+        assert r.status_code == 200
+        assert r.json()["status"] == "ignored"
+
+    def test_workflow_on_hold_is_ignored(self):
+        r = self._post(_circleci_payload(status="on_hold"))
+        assert r.status_code == 200
+        assert r.json()["status"] == "ignored"
+
+    def test_failed_workflow_dispatches(self):
+        with _patch("phalanx.api.routes.ci_webhooks._dispatch_ci_fix", return_value=None):
+            r = self._post(_circleci_payload(status="failed"))
+        assert r.status_code == 200
+        assert r.json()["status"] == "skipped"  # _dispatch_ci_fix returned None
+
+    def test_error_workflow_dispatches(self):
+        """'error' is also a failed state."""
+        with _patch("phalanx.api.routes.ci_webhooks._dispatch_ci_fix", return_value=None):
+            r = self._post(_circleci_payload(status="error"))
+        assert r.status_code == 200
+        assert r.json()["status"] == "skipped"
+
+    def test_unparseable_repo_is_skipped(self):
+        payload = _circleci_payload(
+            status="failed",
+            repo_url="https://gitlab.com/acme/api",  # not github
+        )
+        # Remove project slug so fallback also fails
+        payload["project"] = {"id": "x", "name": "api", "slug": "gitlab/acme/api"}
+        r = self._post(payload)
+        assert r.status_code == 200
+        assert r.json()["status"] == "skipped"
+        assert "cannot_parse_repo" in r.json()["reason"]
+
+    def test_repo_from_project_slug_fallback(self):
+        """When VCS URL is missing, repo is parsed from project.slug."""
+        payload = _circleci_payload(status="failed", repo_url="")
+        payload["project"] = {"slug": "github/acme/api"}
+        with _patch("phalanx.api.routes.ci_webhooks._dispatch_ci_fix", return_value=None):
+            r = self._post(payload)
+        assert r.status_code == 200
+        assert r.json()["status"] == "skipped"
+
+    def test_pr_number_parsed_from_branch(self):
+        """Branch 'pull/42' → pr_number=42."""
+        from unittest.mock import AsyncMock, MagicMock
+
+        captured = {}
+
+        async def capture_dispatch(event):
+            captured["event"] = event
+            return None
+
+        with _patch(
+            "phalanx.api.routes.ci_webhooks._dispatch_ci_fix",
+            side_effect=capture_dispatch,
+        ):
+            self._post(_circleci_payload(status="failed", branch="pull/42"))
+
+        assert captured.get("event") is not None
+        assert captured["event"].pr_number == 42
+
+    def test_invalid_signature_returns_401(self):
+        body = _json.dumps(_circleci_payload(status="failed")).encode()
+        with _patch("phalanx.api.routes.ci_webhooks.settings") as mock_settings:
+            mock_settings.circleci_webhook_secret = "real-secret"
+            mock_settings.buildkite_webhook_token = ""
+            mock_settings.github_webhook_secret = ""
+            r = self.client.post(
+                "/webhook/circleci",
+                content=body,
+                headers={
+                    "circleci-signature": "v1=invalidsignature",
+                    "content-type": "application/json",
+                },
+            )
+        assert r.status_code == 401
diff --git a/tests/unit/test_coverage_boost.py b/tests/unit/test_coverage_boost.py
index 2756992c..23d54dd8 100644
--- a/tests/unit/test_coverage_boost.py
+++ b/tests/unit/test_coverage_boost.py
@@ -13,13 +13,12 @@
 from __future__ import annotations
 
 import json
-from datetime import UTC, datetime, timedelta
+from datetime import UTC, datetime
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-
 # ══════════════════════════════════════════════════════════════════════════════
 # product_manager.py
 # ══════════════════════════════════════════════════════════════════════════════
@@ -76,16 +75,30 @@ async def test_product_manager_execute_for_work_order_success():
     work_order.title = "Build a blog"
     work_order.description = "A simple blogging platform"
 
-    llm_response = json.dumps({
-        "app_type": "web",
-        "tech_stack": "nextjs",
-        "epics": [
-            {"title": "Infrastructure", "description": "DB + auth", "sequence_num": 1, "estimated_complexity": 3},
-            {"title": "Frontend", "description": "React pages", "sequence_num": 2, "estimated_complexity": 2},
-        ],
-        "user_stories": ["As a user I can write posts"],
-        "acceptance_criteria": ["Given I am logged in, When I click New Post, Then I see the editor"],
-    })
+    llm_response = json.dumps(
+        {
+            "app_type": "web",
+            "tech_stack": "nextjs",
+            "epics": [
+                {
+                    "title": "Infrastructure",
+                    "description": "DB + auth",
+                    "sequence_num": 1,
+                    "estimated_complexity": 3,
+                },
+                {
+                    "title": "Frontend",
+                    "description": "React pages",
+                    "sequence_num": 2,
+                    "estimated_complexity": 2,
+                },
+            ],
+            "user_stories": ["As a user I can write posts"],
+            "acceptance_criteria": [
+                "Given I am logged in, When I click New Post, Then I see the editor"
+            ],
+        }
+    )
 
     mock_session = AsyncMock()
     mock_session.add = MagicMock()
@@ -199,9 +212,12 @@ async def test_verifier_execute_task_success():
     """execute_task creates agent and runs it."""
     from phalanx.agents.verifier import execute_task
 
-    with patch("phalanx.agents.verifier.VerifierAgent") as MockAgent, \
-         patch("phalanx.agents.verifier.asyncio.run") as mock_run:
+    with (
+        patch("phalanx.agents.verifier.VerifierAgent") as MockAgent,
+        patch("phalanx.agents.verifier.asyncio.run") as mock_run,
+    ):
         from phalanx.agents.base import AgentResult
+
         mock_instance = MagicMock()
         mock_instance.execute.return_value = AgentResult(success=True, output={})
         MockAgent.return_value = mock_instance
@@ -283,12 +299,17 @@ async def test_verifier_execute_build_errors():
     mock_profile = MagicMock()
     mock_profile.build_cmd = "npm run build"
 
-    with patch("phalanx.agents.verifier.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.verifier.settings") as mock_settings, \
-         patch("phalanx.agents.verifier.detect_tech_stack", return_value="nextjs"), \
-         patch("phalanx.agents.verifier.get_profile", return_value=mock_profile), \
-         patch("phalanx.agents.verifier.run_profile_checks", return_value=["build failed: missing file"]), \
-         patch("phalanx.agents.verifier.merge_workspace", return_value=mock_merged_dir):
+    with (
+        patch("phalanx.agents.verifier.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.verifier.settings") as mock_settings,
+        patch("phalanx.agents.verifier.detect_tech_stack", return_value="nextjs"),
+        patch("phalanx.agents.verifier.get_profile", return_value=mock_profile),
+        patch(
+            "phalanx.agents.verifier.run_profile_checks",
+            return_value=["build failed: missing file"],
+        ),
+        patch("phalanx.agents.verifier.merge_workspace", return_value=mock_merged_dir),
+    ):
         mock_settings.git_workspace = "/tmp/forge"
         result = await agent.execute()
 
@@ -437,8 +458,12 @@ async def test_poll_all_pending_no_runs():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.ci_fixer.outcome_tracker.get_db", return_value=mock_ctx), \
-         patch("phalanx.ci_fixer.outcome_tracker._process_run", new_callable=AsyncMock) as mock_process:
+    with (
+        patch("phalanx.ci_fixer.outcome_tracker.get_db", return_value=mock_ctx),
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._process_run", new_callable=AsyncMock
+        ) as mock_process,
+    ):
         await _poll_all_pending()
 
     mock_process.assert_not_called()
@@ -457,10 +482,11 @@ def test_poll_fix_outcomes_reraises():
     """poll_fix_outcomes re-raises on exception."""
     from phalanx.ci_fixer.outcome_tracker import poll_fix_outcomes
 
-    with patch("phalanx.ci_fixer.outcome_tracker.asyncio.run",
-               side_effect=RuntimeError("boom")):
-        with pytest.raises(RuntimeError, match="boom"):
-            poll_fix_outcomes()
+    with (
+        patch("phalanx.ci_fixer.outcome_tracker.asyncio.run", side_effect=RuntimeError("boom")),
+        pytest.raises(RuntimeError, match="boom"),
+    ):
+        poll_fix_outcomes()
 
 
 # ══════════════════════════════════════════════════════════════════════════════
@@ -509,8 +535,10 @@ def test_validator_subprocess_error(tmp_path):
     (tmp_path / "src").mkdir()
     (tmp_path / "src" / "foo.py").write_text("import os\n")
 
-    with patch("shutil.which", return_value="/usr/bin/ruff"), \
-         patch("subprocess.run", side_effect=FileNotFoundError("ruff: not found")):
+    with (
+        patch("shutil.which", return_value="/usr/bin/ruff"),
+        patch("subprocess.run", side_effect=FileNotFoundError("ruff: not found")),
+    ):
         result = validate_fix(parsed, tmp_path)
 
     assert result.passed is False
@@ -593,12 +621,17 @@ async def test_run_scan_empty_findings_no_comment():
     """_run_scan with empty findings → no comment posted."""
     from phalanx.ci_fixer.proactive_scanner import _run_scan
 
-    with patch("phalanx.ci_fixer.proactive_scanner.scan_pr_for_patterns",
-               new_callable=AsyncMock, return_value=[]), \
-         patch("phalanx.ci_fixer.proactive_scanner._post_comment",
-               new_callable=AsyncMock) as mock_post, \
-         patch("phalanx.ci_fixer.proactive_scanner._record_scan",
-               new_callable=AsyncMock):
+    with (
+        patch(
+            "phalanx.ci_fixer.proactive_scanner.scan_pr_for_patterns",
+            new_callable=AsyncMock,
+            return_value=[],
+        ),
+        patch(
+            "phalanx.ci_fixer.proactive_scanner._post_comment", new_callable=AsyncMock
+        ) as mock_post,
+        patch("phalanx.ci_fixer.proactive_scanner._record_scan", new_callable=AsyncMock),
+    ):
         await _run_scan("acme/backend", 1, "abc", "token")
 
     mock_post.assert_not_called()
@@ -632,8 +665,10 @@ async def test_scan_pr_mypy_patterns():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("httpx.AsyncClient", return_value=mock_client), \
-         patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_ctx):
+    with (
+        patch("httpx.AsyncClient", return_value=mock_client),
+        patch("phalanx.ci_fixer.proactive_scanner.get_db", return_value=mock_ctx),
+    ):
         findings = await scan_pr_for_patterns("acme/backend", 1, "abc", "token")
 
     assert len(findings) >= 0  # At minimum doesn't crash
@@ -657,10 +692,11 @@ def test_promote_patterns_reraises():
     """promote_patterns re-raises on exception."""
     from phalanx.ci_fixer.pattern_promoter import promote_patterns
 
-    with patch("phalanx.ci_fixer.pattern_promoter.asyncio.run",
-               side_effect=RuntimeError("boom")):
-        with pytest.raises(RuntimeError, match="boom"):
-            promote_patterns()
+    with (
+        patch("phalanx.ci_fixer.pattern_promoter.asyncio.run", side_effect=RuntimeError("boom")),
+        pytest.raises(RuntimeError, match="boom"),
+    ):
+        promote_patterns()
 
 
 # ══════════════════════════════════════════════════════════════════════════════
diff --git a/tests/unit/test_coverage_boost2.py b/tests/unit/test_coverage_boost2.py
index 5bbc638a..22612a76 100644
--- a/tests/unit/test_coverage_boost2.py
+++ b/tests/unit/test_coverage_boost2.py
@@ -9,13 +9,10 @@
 
 from __future__ import annotations
 
-import json
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-
 # ══════════════════════════════════════════════════════════════════════════════
 # release.py
 # ══════════════════════════════════════════════════════════════════════════════
@@ -75,8 +72,14 @@ async def test_release_execute_github_skipped():
     agent._load_task_summaries = AsyncMock(return_value=[])
     agent._audit = AsyncMock()
 
-    mock_notes = {"title": "Release X", "summary": "X was built", "changes": [], "testing": "passed",
-                  "rollback": "revert", "breaking_changes": []}
+    mock_notes = {
+        "title": "Release X",
+        "summary": "X was built",
+        "changes": [],
+        "testing": "passed",
+        "rollback": "revert",
+        "breaking_changes": [],
+    }
 
     mock_session = AsyncMock()
     mock_session.execute = AsyncMock(return_value=MagicMock())
@@ -85,10 +88,14 @@ async def test_release_execute_github_skipped():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.release.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.release.settings") as mock_settings, \
-         patch.object(agent, "_generate_release_notes", new_callable=AsyncMock, return_value=mock_notes), \
-         patch.object(agent, "_persist_artifact", new_callable=AsyncMock):
+    with (
+        patch("phalanx.agents.release.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.release.settings") as mock_settings,
+        patch.object(
+            agent, "_generate_release_notes", new_callable=AsyncMock, return_value=mock_notes
+        ),
+        patch.object(agent, "_persist_artifact", new_callable=AsyncMock),
+    ):
         mock_settings.github_token = ""  # no token → skip
         result = await agent.execute()
 
@@ -137,12 +144,16 @@ async def test_release_create_github_pr_import_error():
     mock_run.active_branch = "feature/x"
     mock_run.project_id = "proj-1"
 
-    with patch("phalanx.agents.release.settings") as mock_settings, \
-         patch("phalanx.agents.release.get_db"):
+    with (
+        patch("phalanx.agents.release.settings") as mock_settings,
+        patch("phalanx.agents.release.get_db"),
+    ):
         mock_settings.github_token = "ghp_test"
         # Simulate import error
         with patch.dict("sys.modules", {"github": None}):
-            result = await agent._create_github_pr(mock_run, None, {"changes": [], "breaking_changes": []})
+            result = await agent._create_github_pr(
+                mock_run, None, {"changes": [], "breaking_changes": []}
+            )
 
     # ImportError → returns {}
     assert result == {} or "error" in result
@@ -171,15 +182,23 @@ async def test_release_create_github_pr_exception():
     mock_github = MagicMock()
     mock_github.Github.side_effect = Exception("API error")
 
-    with patch("phalanx.agents.release.settings") as mock_settings, \
-         patch("phalanx.agents.release.get_db", return_value=mock_ctx), \
-         patch.dict("sys.modules", {"github": mock_github}):
+    with (
+        patch("phalanx.agents.release.settings") as mock_settings,
+        patch("phalanx.agents.release.get_db", return_value=mock_ctx),
+        patch.dict("sys.modules", {"github": mock_github}),
+    ):
         mock_settings.github_token = "ghp_test"
         result = await agent._create_github_pr(
             mock_run,
             None,
-            {"summary": "x", "changes": [], "testing": "y", "rollback": "z",
-             "breaking_changes": [], "title": "Release X"},
+            {
+                "summary": "x",
+                "changes": [],
+                "testing": "y",
+                "rollback": "z",
+                "breaking_changes": [],
+                "title": "Release X",
+            },
         )
 
     assert "error" in result
@@ -282,13 +301,19 @@ async def test_integration_wiring_execute_with_builder_tasks(tmp_path):
     mock_profile = MagicMock()
     mock_profile.integration_pattern = "fastapi-router"
 
-    with patch("phalanx.agents.integration_wiring.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.integration_wiring.settings") as s, \
-         patch("phalanx.agents.integration_wiring.merge_workspace", return_value=tmp_path), \
-         patch("phalanx.agents.integration_wiring.detect_tech_stack", return_value="fastapi"), \
-         patch("phalanx.agents.integration_wiring.get_profile", return_value=mock_profile), \
-         patch.object(agent, "_wire", new_callable=AsyncMock,
-                      return_value={"status": "ok", "files_wired": ["main.py"], "notes": []}):
+    with (
+        patch("phalanx.agents.integration_wiring.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.integration_wiring.settings") as s,
+        patch("phalanx.agents.integration_wiring.merge_workspace", return_value=tmp_path),
+        patch("phalanx.agents.integration_wiring.detect_tech_stack", return_value="fastapi"),
+        patch("phalanx.agents.integration_wiring.get_profile", return_value=mock_profile),
+        patch.object(
+            agent,
+            "_wire",
+            new_callable=AsyncMock,
+            return_value={"status": "ok", "files_wired": ["main.py"], "notes": []},
+        ),
+    ):
         s.git_workspace = str(tmp_path)
         result = await agent.execute()
 
@@ -421,8 +446,10 @@ async def test_commit_to_safe_branch_push_success(tmp_path):
         mock_remote = MagicMock()
         mock_repo.remotes = [mock_remote]
 
-        with patch("git.Repo", return_value=mock_repo), \
-             patch("phalanx.agents.ci_fixer.settings") as mock_settings:
+        with (
+            patch("git.Repo", return_value=mock_repo),
+            patch("phalanx.agents.ci_fixer.settings") as mock_settings,
+        ):
             mock_settings.git_author_name = "FORGE"
             mock_settings.git_author_email = "forge@phalanx.dev"
             result = await agent._commit_to_safe_branch(
diff --git a/tests/unit/test_coverage_boost3.py b/tests/unit/test_coverage_boost3.py
index 7f759eb7..a7a7f545 100644
--- a/tests/unit/test_coverage_boost3.py
+++ b/tests/unit/test_coverage_boost3.py
@@ -14,7 +14,6 @@
 
 import pytest
 
-
 # ══════════════════════════════════════════════════════════════════════════════
 # verification_profiles.py
 # ══════════════════════════════════════════════════════════════════════════════
@@ -133,7 +132,11 @@ def test_build_cmd_error_extracted(self, tmp_path):
         from phalanx.agents.verification_profiles import run_profile_checks
 
         profile = MagicMock()
-        profile.build_cmd = ["python", "-c", "import sys; print('error: build failed', file=sys.stderr); sys.exit(1)"]
+        profile.build_cmd = [
+            "python",
+            "-c",
+            "import sys; print('error: build failed', file=sys.stderr); sys.exit(1)",
+        ]
         profile.typecheck_cmd = None
         profile.lint_cmd = None
         profile.test_cmd = None
@@ -270,10 +273,10 @@ def test_run_helper_file_not_found(self, tmp_path):
 
     def test_run_helper_timeout(self, tmp_path):
         """_run catches TimeoutExpired."""
-        from phalanx.agents.verification_profiles import _run
-
         import subprocess
 
+        from phalanx.agents.verification_profiles import _run
+
         with patch("subprocess.run", side_effect=subprocess.TimeoutExpired("cmd", 1)):
             success, stdout, stderr = _run(["sleep", "999"], tmp_path, timeout=1)
 
@@ -367,14 +370,23 @@ async def test_ux_execute_success():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.ux_designer.get_db", return_value=mock_ctx), \
-         patch.object(agent, "_load_planner_context", new_callable=AsyncMock, return_value=""), \
-         patch.object(agent, "_generate_design", new_callable=AsyncMock, return_value=mock_design_str), \
-         patch.object(agent, "_self_check_design", return_value="self-check passed"), \
-         patch.object(agent, "_write_design_handoff", new_callable=AsyncMock, return_value="build with modern style"), \
-         patch.object(agent, "_persist_design_artifact", new_callable=AsyncMock), \
-         patch.object(agent, "_trace", new_callable=AsyncMock), \
-         patch("pathlib.Path.write_text"):
+    with (
+        patch("phalanx.agents.ux_designer.get_db", return_value=mock_ctx),
+        patch.object(agent, "_load_planner_context", new_callable=AsyncMock, return_value=""),
+        patch.object(
+            agent, "_generate_design", new_callable=AsyncMock, return_value=mock_design_str
+        ),
+        patch.object(agent, "_self_check_design", return_value="self-check passed"),
+        patch.object(
+            agent,
+            "_write_design_handoff",
+            new_callable=AsyncMock,
+            return_value="build with modern style",
+        ),
+        patch.object(agent, "_persist_design_artifact", new_callable=AsyncMock),
+        patch.object(agent, "_trace", new_callable=AsyncMock),
+        patch("pathlib.Path.write_text"),
+    ):
         result = await agent.execute()
 
     assert result.success is True
@@ -402,25 +414,29 @@ async def test_ux_generate_design():
     mock_wo.title = "My App"
     mock_wo.description = "An app"
 
-    design_response = json.dumps({
-        "design_spec": {
-            "brand": {"personality": "modern"},
-            "color": {"primary": "#000"},
-            "typography": {},
-            "spacing": {},
-            "components": {},
-            "logo": "",
-            "ux_patterns": {},
-            "accessibility": {},
-        },
-        "handoff_summary": "Modern design.",
-    })
+    json.dumps(
+        {
+            "design_spec": {
+                "brand": {"personality": "modern"},
+                "color": {"primary": "#000"},
+                "typography": {},
+                "spacing": {},
+                "components": {},
+                "logo": "",
+                "ux_patterns": {},
+                "accessibility": {},
+            },
+            "handoff_summary": "Modern design.",
+        }
+    )
 
     if hasattr(agent, "_generate_design"):
         mock_task = MagicMock()
         mock_task.title = "My App"
         mock_task.description = "An app"
-        with patch.object(agent, "_call_claude", new_callable=AsyncMock, return_value="# Design\n\nModern."):
+        with patch.object(
+            agent, "_call_claude", new_callable=AsyncMock, return_value="# Design\n\nModern."
+        ):
             result = await agent._generate_design(
                 task=mock_task,
                 app_type="web",
@@ -502,14 +518,16 @@ async def test_release_generate_notes_valid_json():
     mock_wo.title = "Feature X"
     mock_wo.description = "Build X"
 
-    llm_response = json.dumps({
-        "title": "Release Notes: Feature X",
-        "summary": "X was built",
-        "changes": [{"type": "feat", "description": "Added X"}],
-        "testing": "Tests passed",
-        "rollback": "Revert PR",
-        "breaking_changes": [],
-    })
+    llm_response = json.dumps(
+        {
+            "title": "Release Notes: Feature X",
+            "summary": "X was built",
+            "changes": [{"type": "feat", "description": "Added X"}],
+            "testing": "Tests passed",
+            "rollback": "Revert PR",
+            "breaking_changes": [],
+        }
+    )
 
     with patch.object(agent, "_call_claude", return_value=llm_response):
         result = await agent._generate_release_notes(mock_run, mock_wo, [])
diff --git a/tests/unit/test_coverage_boost4.py b/tests/unit/test_coverage_boost4.py
index 03404aaa..3ab3b07a 100644
--- a/tests/unit/test_coverage_boost4.py
+++ b/tests/unit/test_coverage_boost4.py
@@ -15,15 +15,14 @@
 - _remove_root_conftest (lines 821-830)
 - _derive_coverage_source (lines 900-947)
 """
+
 from __future__ import annotations
 
-import asyncio
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-
 # ── helpers ──────────────────────────────────────────────────────────────────
 
 
@@ -191,9 +190,7 @@ class TestDeriveCoverageSource:
     def test_shared_top_level_dir(self, tmp_path):
         agent = _make_qa_agent(tmp_path)
         (tmp_path / "app").mkdir()  # must exist as dir
-        context = {
-            "changed_files": ["app/routes.py", "app/models.py", "app/utils.py"]
-        }
+        context = {"changed_files": ["app/routes.py", "app/models.py", "app/utils.py"]}
         result = agent._derive_coverage_source(context)
         assert result == "app"
 
@@ -518,7 +515,9 @@ def test_basic_evidence(self, tmp_path):
         from phalanx.agents.qa import LintResult, QAOutcome, TestSuiteResult
 
         agent = _make_qa_agent(tmp_path)
-        suite = TestSuiteResult(name="test", total=5, passed=5, failed=0, errored=0, skipped=0, duration_seconds=1.0)
+        suite = TestSuiteResult(
+            name="test", total=5, passed=5, failed=0, errored=0, skipped=0, duration_seconds=1.0
+        )
         lint = LintResult(tool="ruff", passed=True, violation_count=0, output="")
         evidence = agent._build_evidence([suite], None, [lint], QAOutcome.PASSED)
         assert evidence["gate"] == "qa"
@@ -529,9 +528,15 @@ def test_evidence_with_coverage(self, tmp_path):
         from phalanx.agents.qa import CoverageResult, QAOutcome, TestSuiteResult
 
         agent = _make_qa_agent(tmp_path)
-        suite = TestSuiteResult(name="test", total=3, passed=3, failed=0, errored=0, skipped=0, duration_seconds=0.5)
+        suite = TestSuiteResult(
+            name="test", total=3, passed=3, failed=0, errored=0, skipped=0, duration_seconds=0.5
+        )
         cov = CoverageResult(
-            line_coverage_pct=80.0, branch_coverage_pct=None, threshold=70.0, threshold_met=True, modules_below_threshold=[]
+            line_coverage_pct=80.0,
+            branch_coverage_pct=None,
+            threshold=70.0,
+            threshold_met=True,
+            modules_below_threshold=[],
         )
         evidence = agent._build_evidence([suite], cov, [], QAOutcome.PASSED)
         assert evidence["summary"]["coverage_pct"] == 80.0
@@ -572,7 +577,9 @@ async def test_persist_artifact_success(tmp_path):
     )
 
     mock_session = AsyncMock()
-    mock_session.execute = AsyncMock(return_value=MagicMock(scalar_one=MagicMock(return_value="proj-1")))
+    mock_session.execute = AsyncMock(
+        return_value=MagicMock(scalar_one=MagicMock(return_value="proj-1"))
+    )
     mock_session.add = MagicMock()
     mock_session.commit = AsyncMock()
     mock_ctx = AsyncMock()
diff --git a/tests/unit/test_coverage_boost5.py b/tests/unit/test_coverage_boost5.py
index b19886dc..9eea8708 100644
--- a/tests/unit/test_coverage_boost5.py
+++ b/tests/unit/test_coverage_boost5.py
@@ -9,15 +9,16 @@
 - phalanx/memory/assembler.py — MemoryAssembler.build()
 - phalanx/memory/reader.py — MemoryReader methods
 """
+
 from __future__ import annotations
 
+import contextlib
 from datetime import UTC, datetime
 from unittest.mock import AsyncMock, MagicMock, patch
 from uuid import uuid4
 
 import pytest
 
-
 # ══════════════════════════════════════════════════════════════════════════════
 # memory/assembler.py
 # ══════════════════════════════════════════════════════════════════════════════
@@ -32,7 +33,15 @@ def _make_decision(self, title="Decision", decision="Do X", rationale="Because Y
         d.rejected_alternatives = alts or []
         return d
 
-    def _make_fact(self, fact_type="tech", title="Fact", body="body", confidence=1.0, relevance=0.9, is_standing=True):
+    def _make_fact(
+        self,
+        fact_type="tech",
+        title="Fact",
+        body="body",
+        confidence=1.0,
+        relevance=0.9,
+        is_standing=True,
+    ):
         f = MagicMock()
         f.fact_type = fact_type
         f.title = title
@@ -52,7 +61,9 @@ def test_build_with_decisions(self):
         from phalanx.memory.assembler import MemoryAssembler
 
         a = MemoryAssembler(max_tokens=4000)
-        d = self._make_decision("Use Postgres", "PostgreSQL as primary DB", "Proven at scale", ["MySQL", "SQLite"])
+        d = self._make_decision(
+            "Use Postgres", "PostgreSQL as primary DB", "Proven at scale", ["MySQL", "SQLite"]
+        )
         result = a.build(decisions=[d])
         assert "Use Postgres" in result
         assert "Project Memory" in result
@@ -226,7 +237,7 @@ def _make_ci_integration_obj():
 
 @pytest.mark.asyncio
 async def test_register_integration_create():
-    from phalanx.api.routes.ci_integrations import register_integration, CIIntegrationCreate
+    from phalanx.api.routes.ci_integrations import CIIntegrationCreate, register_integration
 
     body = CIIntegrationCreate(
         repo_full_name="acme/backend",
@@ -236,7 +247,9 @@ async def test_register_integration_create():
     obj = _make_ci_integration_obj()
 
     mock_session = AsyncMock()
-    mock_session.execute = AsyncMock(return_value=MagicMock(scalar_one_or_none=MagicMock(return_value=None)))
+    mock_session.execute = AsyncMock(
+        return_value=MagicMock(scalar_one_or_none=MagicMock(return_value=None))
+    )
     mock_session.add = MagicMock()
     mock_session.commit = AsyncMock()
     mock_session.refresh = AsyncMock(side_effect=lambda x: None)
@@ -245,7 +258,6 @@ async def test_register_integration_create():
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
     # refresh won't return an obj with attributes — so we mock the return value
-    refreshed = obj
     mock_session.refresh = AsyncMock(return_value=None)
     # patch get_db AND capture the integration that was added
     captured = {}
@@ -265,17 +277,15 @@ async def fake_add_and_refresh(x=None):
     with patch("phalanx.api.routes.ci_integrations.get_db", return_value=mock_ctx):
         # This will fail at refresh since the session is mocked
         # Use a simpler approach: just call the route function and catch the error
-        try:
+        with contextlib.suppress(Exception):
             await register_integration(body)
-        except Exception:
-            pass
 
     mock_session.commit.assert_awaited()
 
 
 @pytest.mark.asyncio
 async def test_register_integration_update_existing():
-    from phalanx.api.routes.ci_integrations import register_integration, CIIntegrationCreate
+    from phalanx.api.routes.ci_integrations import CIIntegrationCreate, register_integration
 
     body = CIIntegrationCreate(repo_full_name="acme/backend", github_token="new_token")
     existing = _make_ci_integration_obj()
@@ -291,10 +301,8 @@ async def test_register_integration_update_existing():
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
     with patch("phalanx.api.routes.ci_integrations.get_db", return_value=mock_ctx):
-        try:
+        with contextlib.suppress(Exception):
             await register_integration(body)
-        except Exception:
-            pass
 
     assert existing.github_token == "new_token"
 
@@ -337,6 +345,7 @@ async def test_get_integration_found():
 @pytest.mark.asyncio
 async def test_get_integration_not_found():
     from fastapi import HTTPException
+
     from phalanx.api.routes.ci_integrations import get_integration
 
     mock_session = AsyncMock()
@@ -355,7 +364,8 @@ async def test_get_integration_not_found():
 @pytest.mark.asyncio
 async def test_update_integration_not_found():
     from fastapi import HTTPException
-    from phalanx.api.routes.ci_integrations import update_integration, CIIntegrationUpdate
+
+    from phalanx.api.routes.ci_integrations import CIIntegrationUpdate, update_integration
 
     mock_session = AsyncMock()
     mock_session.get = AsyncMock(return_value=None)
@@ -372,7 +382,7 @@ async def test_update_integration_not_found():
 
 @pytest.mark.asyncio
 async def test_update_integration_success():
-    from phalanx.api.routes.ci_integrations import update_integration, CIIntegrationUpdate
+    from phalanx.api.routes.ci_integrations import CIIntegrationUpdate, update_integration
 
     obj = _make_ci_integration_obj()
     mock_session = AsyncMock()
@@ -385,10 +395,8 @@ async def test_update_integration_success():
 
     update = CIIntegrationUpdate(enabled=False, max_attempts=3, auto_commit=False)
     with patch("phalanx.api.routes.ci_integrations.get_db", return_value=mock_ctx):
-        try:
+        with contextlib.suppress(Exception):
             await update_integration(obj.id, update)
-        except Exception:
-            pass
 
     assert obj.enabled is False
     assert obj.max_attempts == 3
@@ -397,6 +405,7 @@ async def test_update_integration_success():
 @pytest.mark.asyncio
 async def test_delete_integration_not_found():
     from fastapi import HTTPException
+
     from phalanx.api.routes.ci_integrations import delete_integration
 
     mock_session = AsyncMock()
diff --git a/tests/unit/test_coverage_push.py b/tests/unit/test_coverage_push.py
index a71fc11e..6b77cf20 100644
--- a/tests/unit/test_coverage_push.py
+++ b/tests/unit/test_coverage_push.py
@@ -14,7 +14,6 @@
 from __future__ import annotations
 
 import json
-from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -42,12 +41,12 @@ def test_plain_json(self):
         assert result == {"key": "value"}
 
     def test_strips_code_fences(self):
-        text = "```json\n{\"key\": \"value\"}\n```"
+        text = '```json\n{"key": "value"}\n```'
         result = _extract_json(text)
         assert result == {"key": "value"}
 
     def test_strips_plain_backtick_fences(self):
-        text = "```\n{\"key\": \"value\"}\n```"
+        text = '```\n{"key": "value"}\n```'
         result = _extract_json(text)
         assert result == {"key": "value"}
 
@@ -91,6 +90,7 @@ def test_out_of_range_defaults_30(self):
 
 def _make_ci_agent():
     from phalanx.agents.ci_fixer import CIFixerAgent
+
     with patch("phalanx.agents.base.BaseAgent.__init__", return_value=None):
         agent = CIFixerAgent.__new__(CIFixerAgent)
         agent.ci_fix_run_id = "run-cov-001"
@@ -123,6 +123,7 @@ async def test_comment_on_pr_success():
     ci_run.branch = "main"
 
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="ruff")
 
     mock_client = _mock_http_client(201, {"id": 99})
@@ -152,6 +153,7 @@ async def test_comment_on_pr_failure_does_not_raise():
     ci_run.branch = "main"
 
     from phalanx.ci_fixer.log_parser import ParsedLog
+
     parsed = ParsedLog(tool="ruff")
 
     mock_client = _mock_http_client(403)
@@ -295,8 +297,10 @@ async def test_fetch_logs_calls_fetcher():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.ci_fixer.get_log_fetcher", return_value=mock_fetcher):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.ci_fixer.get_log_fetcher", return_value=mock_fetcher),
+    ):
         result = await agent._fetch_logs(event, integration)
 
     assert result == "raw log content"
@@ -332,8 +336,10 @@ async def test_fetch_logs_returns_fallback_on_error():
     mock_ctx.__aenter__ = AsyncMock(return_value=mock_session)
     mock_ctx.__aexit__ = AsyncMock(return_value=None)
 
-    with patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx), \
-         patch("phalanx.agents.ci_fixer.get_log_fetcher", return_value=mock_fetcher):
+    with (
+        patch("phalanx.agents.ci_fixer.get_db", return_value=mock_ctx),
+        patch("phalanx.agents.ci_fixer.get_log_fetcher", return_value=mock_fetcher),
+    ):
         result = await agent._fetch_logs(event, integration)
 
     # Falls back to cached failure_summary
@@ -347,14 +353,14 @@ async def test_fetch_logs_returns_fallback_on_error():
 async def test_clone_repo_gitpython_missing(tmp_path):
     agent = _make_ci_agent()
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
-               new_callable=AsyncMock) as mock_clone:
+    with patch("phalanx.agents.ci_fixer.CIFixerAgent._clone_repo", new_callable=AsyncMock):
         # Simulate ImportError path (gitpython not available)
         # Test directly by patching the import inside the method
         pass
 
     # Test directly without patching the method itself
     import builtins
+
     real_import = builtins.__import__
 
     def mock_import(name, *args, **kwargs):
@@ -372,8 +378,11 @@ def mock_import(name, *args, **kwargs):
 async def test_clone_repo_exception_returns_false(tmp_path):
     agent = _make_ci_agent()
 
-    with patch("phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
-               new_callable=AsyncMock, return_value=False) as mock_clone:
+    with patch(
+        "phalanx.agents.ci_fixer.CIFixerAgent._clone_repo",
+        new_callable=AsyncMock,
+        return_value=False,
+    ):
         result = await agent._clone_repo(tmp_path, "acme/backend", "main", "abc123", "token")
 
     assert result is False
@@ -397,7 +406,7 @@ def test_ruff_with_no_line_number(self):
         assert not result.lint_errors
 
     def test_mypy_error_format(self):
-        log = "src/foo.py:10: error: Argument 1 to \"foo\" has incompatible type\n"
+        log = 'src/foo.py:10: error: Argument 1 to "foo" has incompatible type\n'
         result = parse_log(log)
         # mypy errors should be parsed
         assert result.tool in ("mypy", "unknown") or len(result.type_errors) >= 0
@@ -428,14 +437,17 @@ def test_parsed_log_as_text(self):
 
     def test_has_errors_false_when_empty(self):
         from phalanx.ci_fixer.log_parser import ParsedLog
+
         p = ParsedLog(tool="unknown")
         assert not p.has_errors
 
     def test_has_errors_true_with_lint_error(self):
         from phalanx.ci_fixer.log_parser import LintError, ParsedLog
-        p = ParsedLog(tool="ruff", lint_errors=[
-            LintError(file="f.py", line=1, col=1, code="F401", message="x")
-        ])
+
+        p = ParsedLog(
+            tool="ruff",
+            lint_errors=[LintError(file="f.py", line=1, col=1, code="F401", message="x")],
+        )
         assert p.has_errors
 
 
@@ -445,12 +457,14 @@ def test_has_errors_true_with_lint_error(self):
 class TestAnalystEdgeCases:
     def test_read_files_shim_no_files(self, tmp_path):
         from phalanx.ci_fixer.analyst import RootCauseAnalyst
+
         analyst = RootCauseAnalyst(call_llm=lambda **_: "")
         result = analyst._read_files(tmp_path, [])
         assert "no files found" in result.lower() or isinstance(result, str)
 
     def test_read_files_shim_missing_file(self, tmp_path):
         from phalanx.ci_fixer.analyst import RootCauseAnalyst
+
         analyst = RootCauseAnalyst(call_llm=lambda **_: "")
         result = analyst._read_files(tmp_path, ["nonexistent.py"])
         assert isinstance(result, str)
@@ -458,6 +472,7 @@ def test_read_files_shim_missing_file(self, tmp_path):
     def test_analyze_with_no_errors_returns_low_confidence(self, tmp_path):
         from phalanx.ci_fixer.analyst import RootCauseAnalyst
         from phalanx.ci_fixer.log_parser import ParsedLog
+
         analyst = RootCauseAnalyst(call_llm=lambda **_: "{}")
         plan = analyst.analyze(ParsedLog(tool="unknown"), tmp_path)
         assert plan.confidence == "low"
@@ -475,7 +490,7 @@ def bad_llm(**_):
         analyst = RootCauseAnalyst(call_llm=bad_llm)
         parsed = ParsedLog(
             tool="ruff",
-            lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")]
+            lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")],
         )
         plan = analyst.analyze(parsed, tmp_path)
         assert plan.confidence == "low"
@@ -490,7 +505,7 @@ def test_analyze_malformed_json_returns_low_confidence(self, tmp_path):
         analyst = RootCauseAnalyst(call_llm=lambda **_: "not json at all")
         parsed = ParsedLog(
             tool="ruff",
-            lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")]
+            lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")],
         )
         plan = analyst.analyze(parsed, tmp_path)
         assert plan.confidence == "low"
@@ -503,6 +518,7 @@ class TestValidatorEdgeCases:
     def test_validate_unknown_tool(self, tmp_path):
         from phalanx.ci_fixer.log_parser import ParsedLog
         from phalanx.ci_fixer.validator import validate_fix
+
         parsed = ParsedLog(tool="unknown_tool")
         result = validate_fix(parsed, tmp_path)
         # Unknown tool → should pass or return a graceful result
@@ -511,9 +527,10 @@ def test_validate_unknown_tool(self, tmp_path):
     def test_validate_ruff_with_empty_workspace(self, tmp_path):
         from phalanx.ci_fixer.log_parser import LintError, ParsedLog
         from phalanx.ci_fixer.validator import validate_fix
+
         parsed = ParsedLog(
             tool="ruff",
-            lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")]
+            lint_errors=[LintError(file="src/foo.py", line=1, col=1, code="F401", message="x")],
         )
         # Run ruff against empty workspace — ruff not installed in test env → graceful
         result = validate_fix(parsed, tmp_path)
@@ -523,9 +540,10 @@ def test_validate_ruff_with_empty_workspace(self, tmp_path):
     def test_validate_mypy_with_empty_workspace(self, tmp_path):
         from phalanx.ci_fixer.log_parser import ParsedLog, TypeError
         from phalanx.ci_fixer.validator import validate_fix
+
         parsed = ParsedLog(
             tool="mypy",
-            type_errors=[TypeError(file="src/foo.py", line=1, col=0, message="type error")]
+            type_errors=[TypeError(file="src/foo.py", line=1, col=0, message="type error")],
         )
         result = validate_fix(parsed, tmp_path)
         assert hasattr(result, "passed")
@@ -533,13 +551,16 @@ def test_validate_mypy_with_empty_workspace(self, tmp_path):
     def test_validate_pytest_with_empty_workspace(self, tmp_path):
         from phalanx.ci_fixer.log_parser import ParsedLog, TestFailure
         from phalanx.ci_fixer.validator import validate_fix
+
         parsed = ParsedLog(
             tool="pytest",
-            test_failures=[TestFailure(
-                test_id="tests/test_foo.py::test_bar",
-                file="tests/test_foo.py",
-                message="AssertionError"
-            )]
+            test_failures=[
+                TestFailure(
+                    test_id="tests/test_foo.py::test_bar",
+                    file="tests/test_foo.py",
+                    message="AssertionError",
+                )
+            ],
         )
         result = validate_fix(parsed, tmp_path)
         assert hasattr(result, "passed")
diff --git a/tests/unit/test_log_parser_unit.py b/tests/unit/test_log_parser_unit.py
index 59cf33ac..3cfa856c 100644
--- a/tests/unit/test_log_parser_unit.py
+++ b/tests/unit/test_log_parser_unit.py
@@ -7,8 +7,6 @@
 
 from __future__ import annotations
 
-import pytest
-
 from phalanx.ci_fixer.log_parser import (
     ParsedLog,
     clean_log,
@@ -107,8 +105,7 @@ def test_parses_mypy_error(self):
 
     def test_multiple_mypy_errors(self):
         log = (
-            "src/foo.py:10: error: Item has no attribute\n"
-            "src/bar.py:20: error: Argument of type\n"
+            "src/foo.py:10: error: Item has no attribute\nsrc/bar.py:20: error: Argument of type\n"
         )
         parsed = parse_log(log)
         assert len(parsed.type_errors) == 2
diff --git a/tests/unit/test_outcome_tracker_unit.py b/tests/unit/test_outcome_tracker_unit.py
index 58d5ecf1..debd0be1 100644
--- a/tests/unit/test_outcome_tracker_unit.py
+++ b/tests/unit/test_outcome_tracker_unit.py
@@ -6,7 +6,6 @@
 
 from __future__ import annotations
 
-import json
 import uuid
 from datetime import UTC, datetime, timedelta
 from unittest.mock import AsyncMock, MagicMock, patch
@@ -24,7 +23,6 @@
 )
 from phalanx.db.models import CIFailureFingerprint, CIFixOutcome, CIFixRun
 
-
 # ── Helpers ────────────────────────────────────────────────────────────────────
 
 
@@ -87,8 +85,14 @@ async def test_check_pr_outcome_merged():
     mock_client.__aexit__ = AsyncMock(return_value=None)
     mock_client.get = AsyncMock(return_value=mock_response)
 
-    with patch("phalanx.ci_fixer.outcome_tracker._get_github_token", new_callable=AsyncMock, return_value="ghp_token"), \
-         patch("httpx.AsyncClient", return_value=mock_client):
+    with (
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._get_github_token",
+            new_callable=AsyncMock,
+            return_value="ghp_token",
+        ),
+        patch("httpx.AsyncClient", return_value=mock_client),
+    ):
         result = await _check_pr_outcome(run)
 
     assert result["outcome"] == "merged"
@@ -116,8 +120,14 @@ async def test_check_pr_outcome_closed_unmerged():
     mock_client.__aexit__ = AsyncMock(return_value=None)
     mock_client.get = AsyncMock(return_value=mock_response)
 
-    with patch("phalanx.ci_fixer.outcome_tracker._get_github_token", new_callable=AsyncMock, return_value="ghp_token"), \
-         patch("httpx.AsyncClient", return_value=mock_client):
+    with (
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._get_github_token",
+            new_callable=AsyncMock,
+            return_value="ghp_token",
+        ),
+        patch("httpx.AsyncClient", return_value=mock_client),
+    ):
         result = await _check_pr_outcome(run)
 
     assert result["outcome"] == "closed_unmerged"
@@ -144,8 +154,14 @@ async def test_check_pr_outcome_open():
     mock_client.__aexit__ = AsyncMock(return_value=None)
     mock_client.get = AsyncMock(return_value=mock_response)
 
-    with patch("phalanx.ci_fixer.outcome_tracker._get_github_token", new_callable=AsyncMock, return_value="ghp_token"), \
-         patch("httpx.AsyncClient", return_value=mock_client):
+    with (
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._get_github_token",
+            new_callable=AsyncMock,
+            return_value="ghp_token",
+        ),
+        patch("httpx.AsyncClient", return_value=mock_client),
+    ):
         result = await _check_pr_outcome(run)
 
     assert result["outcome"] == "open"
@@ -164,8 +180,14 @@ async def test_check_pr_outcome_not_found():
     mock_client.__aexit__ = AsyncMock(return_value=None)
     mock_client.get = AsyncMock(return_value=mock_response)
 
-    with patch("phalanx.ci_fixer.outcome_tracker._get_github_token", new_callable=AsyncMock, return_value="ghp_token"), \
-         patch("httpx.AsyncClient", return_value=mock_client):
+    with (
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._get_github_token",
+            new_callable=AsyncMock,
+            return_value="ghp_token",
+        ),
+        patch("httpx.AsyncClient", return_value=mock_client),
+    ):
         result = await _check_pr_outcome(run)
 
     assert result["outcome"] == "not_found"
@@ -176,7 +198,11 @@ async def test_check_pr_outcome_no_token():
     """No GitHub token → returns 'open' without calling GitHub."""
     run = _make_run()
 
-    with patch("phalanx.ci_fixer.outcome_tracker._get_github_token", new_callable=AsyncMock, return_value=None):
+    with patch(
+        "phalanx.ci_fixer.outcome_tracker._get_github_token",
+        new_callable=AsyncMock,
+        return_value=None,
+    ):
         result = await _check_pr_outcome(run)
 
     assert result["outcome"] == "open"
@@ -187,8 +213,14 @@ async def test_check_pr_outcome_network_error():
     """Network error → returns 'open' without raising."""
     run = _make_run()
 
-    with patch("phalanx.ci_fixer.outcome_tracker._get_github_token", new_callable=AsyncMock, return_value="ghp_token"), \
-         patch("httpx.AsyncClient", side_effect=Exception("connection refused")):
+    with (
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._get_github_token",
+            new_callable=AsyncMock,
+            return_value="ghp_token",
+        ),
+        patch("httpx.AsyncClient", side_effect=Exception("connection refused")),
+    ):
         result = await _check_pr_outcome(run)
 
     assert result["outcome"] == "open"
@@ -214,8 +246,12 @@ async def test_record_outcome_writes_row():
         await _record_outcome(
             run,
             poll_number=1,
-            outcome={"outcome": "merged", "pr_state": "closed",
-                     "merged_at": datetime.now(UTC), "closed_at": None},
+            outcome={
+                "outcome": "merged",
+                "pr_state": "closed",
+                "merged_at": datetime.now(UTC),
+                "closed_at": None,
+            },
         )
 
     mock_session.add.assert_called_once()
@@ -333,12 +369,21 @@ async def test_process_run_poll1_due():
     run = _make_run(created_hours_ago=5.0)
     now = datetime.now(UTC)
 
-    with patch("phalanx.ci_fixer.outcome_tracker._check_pr_outcome", new_callable=AsyncMock) as mock_check, \
-         patch("phalanx.ci_fixer.outcome_tracker._record_outcome", new_callable=AsyncMock) as mock_record, \
-         patch("phalanx.ci_fixer.outcome_tracker._update_fingerprint", new_callable=AsyncMock) as mock_update, \
-         patch("phalanx.ci_fixer.outcome_tracker._mark_outcome_checked", new_callable=AsyncMock) as mock_mark, \
-         patch("phalanx.ci_fixer.outcome_tracker.get_db") as mock_db:
-
+    with (
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._check_pr_outcome", new_callable=AsyncMock
+        ) as mock_check,
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._record_outcome", new_callable=AsyncMock
+        ) as mock_record,
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._update_fingerprint", new_callable=AsyncMock
+        ) as mock_update,
+        patch(
+            "phalanx.ci_fixer.outcome_tracker._mark_outcome_checked", new_callable=AsyncMock
+        ) as mock_mark,
+        patch("phalanx.ci_fixer.outcome_tracker.get_db") as mock_db,
+    ):
         # No polls done yet
         mock_result = MagicMock()
         mock_result.all.return_value = []
@@ -350,8 +395,10 @@ async def test_process_run_poll1_due():
         mock_db.return_value = mock_ctx
 
         mock_check.return_value = {
-            "outcome": "merged", "pr_state": "closed",
-            "merged_at": datetime.now(UTC), "closed_at": None
+            "outcome": "merged",
+            "pr_state": "closed",
+            "merged_at": datetime.now(UTC),
+            "closed_at": None,
         }
 
         await _process_run(run, now)
@@ -379,7 +426,9 @@ async def test_process_run_all_polls_done():
         mock_ctx.__aexit__ = AsyncMock(return_value=None)
         mock_db.return_value = mock_ctx
 
-        with patch("phalanx.ci_fixer.outcome_tracker._check_pr_outcome", new_callable=AsyncMock) as mock_check:
+        with patch(
+            "phalanx.ci_fixer.outcome_tracker._check_pr_outcome", new_callable=AsyncMock
+        ) as mock_check:
             await _process_run(run, now)
 
     # Nothing new to check
@@ -393,7 +442,9 @@ async def test_process_run_no_created_at():
     run.created_at = None
     now = datetime.now(UTC)
 
-    with patch("phalanx.ci_fixer.outcome_tracker._check_pr_outcome", new_callable=AsyncMock) as mock_check:
+    with patch(
+        "phalanx.ci_fixer.outcome_tracker._check_pr_outcome", new_callable=AsyncMock
+    ) as mock_check:
         await _process_run(run, now)
 
     mock_check.assert_not_called()
@@ -441,8 +492,10 @@ async def broken_process(run, now):
         call_count["n"] += 1
         raise RuntimeError("simulated DB error")
 
-    with patch("phalanx.ci_fixer.outcome_tracker.get_db", return_value=mock_ctx), \
-         patch("phalanx.ci_fixer.outcome_tracker._process_run", side_effect=broken_process):
+    with (
+        patch("phalanx.ci_fixer.outcome_tracker.get_db", return_value=mock_ctx),
+        patch("phalanx.ci_fixer.outcome_tracker._process_run", side_effect=broken_process),
+    ):
         await _poll_all_pending()
 
     # Both runs were attempted despite the first one failing
diff --git a/tests/unit/test_sandbox_pool.py b/tests/unit/test_sandbox_pool.py
new file mode 100644
index 00000000..10bf3dca
--- /dev/null
+++ b/tests/unit/test_sandbox_pool.py
@@ -0,0 +1,887 @@
+"""
+Tests for phalanx.ci_fixer.sandbox_pool — SandboxPool, PooledContainer,
+get_sandbox_pool, wrap_cmd_for_container, wrap_shell_cmd_for_container.
+
+Coverage targets:
+  - SandboxPool._warmup(): min_size=0 (skip), min_size>0 (starts containers)
+  - SandboxPool.checkout(): happy path, timeout, health check fail + retry
+  - SandboxPool.checkin(): reset ok → re-enqueue; reset fail → replace; unhealthy after reset → replace
+  - SandboxPool.borrow(): context manager guarantees checkin on raise
+  - SandboxPool.shutdown(): drains queues, kills checked-out containers
+  - SandboxPool._reaper_loop(): kills stale checked-out containers
+  - SandboxPool._resolve_image(): preferred present → preferred; preferred absent → fallback
+  - SandboxPool._start_and_enqueue(): pool full → kills extra container
+  - SandboxUnavailableError raised when pool for unknown stack
+  - get_sandbox_pool(): lazy singleton, returns same instance on repeat calls
+  - reset_pool_for_testing(): clears singleton
+  - wrap_cmd_for_container(): correct docker exec prefix
+  - wrap_shell_cmd_for_container(): correct sh -c wrapping
+"""
+
+from __future__ import annotations
+
+import asyncio
+import time
+from typing import TYPE_CHECKING
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from phalanx.ci_fixer.sandbox_pool import (
+    PooledContainer,
+    SandboxPool,
+    SandboxUnavailableError,
+    get_sandbox_pool,
+    reset_pool_for_testing,
+    wrap_cmd_for_container,
+    wrap_shell_cmd_for_container,
+)
+
+# ── Helpers ───────────────────────────────────────────────────────────────────
+
+
+def _make_container(
+    container_id: str = "abc123",
+    stack: str = "python",
+    image: str = "phalanx-sandbox-python:latest",
+    checked_out_seconds_ago: float = 0,
+) -> PooledContainer:
+    c = PooledContainer(container_id=container_id, stack=stack, image=image)
+    c.checked_out_at = time.monotonic() - checked_out_seconds_ago
+    return c
+
+
+def _make_proc(returncode: int = 0, stdout: bytes = b"ok", stderr: bytes = b"") -> MagicMock:
+    proc = MagicMock()
+    proc.returncode = returncode
+    proc.communicate = AsyncMock(return_value=(stdout, stderr))
+    return proc
+
+
+def _mock_settings(
+    min_size: int = 1,
+    max_size: int = 2,
+    checkout_timeout: int = 5,
+    max_hold: int = 300,
+    reaper_interval: int = 60,
+    docker_cmd: str = "docker",
+):
+    s = MagicMock()
+    s.sandbox_pool_min_size = min_size
+    s.sandbox_pool_max_size = max_size
+    s.sandbox_checkout_timeout_seconds = checkout_timeout
+    s.sandbox_max_hold_seconds = max_hold
+    s.sandbox_reaper_interval_seconds = reaper_interval
+    s.sandbox_docker_cmd = docker_cmd
+    return s
+
+
+# ── wrap helpers ──────────────────────────────────────────────────────────────
+
+
+class TestWrapHelpers:
+    def test_wrap_cmd_for_container(self):
+        result = wrap_cmd_for_container("ctr123", ["ruff", "check", "."], "/workspace")
+        assert result == ["docker", "exec", "-w", "/workspace", "ctr123", "ruff", "check", "."]
+
+    def test_wrap_cmd_custom_docker_cmd(self):
+        result = wrap_cmd_for_container(
+            "ctr123", ["go", "test", "./..."], "/ws", docker_cmd="podman"
+        )
+        assert result[0] == "podman"
+        assert "ctr123" in result
+
+    def test_wrap_shell_cmd_for_container(self):
+        result = wrap_shell_cmd_for_container("ctr123", "ruff check .")
+        assert result == [
+            "docker",
+            "exec",
+            "-w",
+            "/workspace",
+            "ctr123",
+            "sh",
+            "-c",
+            "ruff check .",
+        ]
+
+    def test_wrap_shell_cmd_custom_docker(self):
+        result = wrap_shell_cmd_for_container("ctr456", "npm test", docker_cmd="podman")
+        assert result[0] == "podman"
+        assert "sh" in result
+        assert "npm test" in result
+
+
+# ── PooledContainer ───────────────────────────────────────────────────────────
+
+
+class TestPooledContainer:
+    def test_defaults(self):
+        c = PooledContainer(container_id="abc", stack="python", image="img:latest")
+        assert c.healthy is True
+        assert c.container_id == "abc"
+        assert isinstance(c.checked_out_at, float)
+
+    def test_fields(self):
+        c = _make_container(container_id="xyz", stack="go", image="golang:1.22-alpine")
+        assert c.stack == "go"
+        assert c.image == "golang:1.22-alpine"
+
+
+# ── SandboxPool._warmup ───────────────────────────────────────────────────────
+
+
+class TestSandboxPoolWarmup:
+    @pytest.mark.asyncio
+    async def test_warmup_min_size_zero_skips(self):
+        """min_size=0 → no containers started, queues initialised empty."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        assert "python" in pool._queues
+        assert pool._queues["python"].qsize() == 0
+        assert pool._reaper_task is None
+
+    @pytest.mark.asyncio
+    async def test_warmup_starts_containers(self):
+        """min_size=1 → _start_and_enqueue called for each stack."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=1, max_size=2)
+
+        start_calls = []
+
+        async def fake_start_and_enqueue(stack):
+            container = _make_container(container_id=f"ctr-{stack}", stack=stack)
+            await pool._queues[stack].put(container)
+            start_calls.append(stack)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_start_and_enqueue", side_effect=fake_start_and_enqueue):
+                with patch.object(pool, "_reaper_loop", new_callable=AsyncMock):
+                    await pool._warmup()
+
+        assert len(start_calls) >= 1
+        # Reaper task should have been created
+        assert pool._reaper_task is not None
+        pool._reaper_task.cancel()
+
+    @pytest.mark.asyncio
+    async def test_warmup_errors_swallowed(self):
+        """Errors during warmup don't raise — pool starts empty."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=1)
+
+        async def failing_start(stack):
+            raise RuntimeError("docker not found")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_start_and_enqueue", side_effect=failing_start):
+                with patch.object(pool, "_reaper_loop", new_callable=AsyncMock):
+                    await pool._warmup()  # should not raise
+
+        # Queues exist but are empty
+        assert pool._queues["python"].qsize() == 0
+
+
+# ── SandboxPool.checkout ──────────────────────────────────────────────────────
+
+
+class TestSandboxPoolCheckout:
+    @pytest.mark.asyncio
+    async def test_checkout_happy_path(self):
+        """Container in queue → returned immediately, removed from queue."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, checkout_timeout=5)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container(container_id="ctr1", stack="python")
+        await pool._queues["python"].put(container)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_health_check", return_value=True):
+                with patch.object(pool, "_refill", new_callable=AsyncMock):
+                    result = await pool.checkout("python", timeout=5)
+
+        assert result.container_id == "ctr1"
+        assert "ctr1" in pool._checked_out
+
+    @pytest.mark.asyncio
+    async def test_checkout_timeout_raises(self):
+        """Empty queue + short timeout → SandboxUnavailableError."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with pytest.raises(SandboxUnavailableError):
+                await pool.checkout("python", timeout=1)
+
+    @pytest.mark.asyncio
+    async def test_checkout_unknown_stack_raises(self):
+        """Stack not in pool → SandboxUnavailableError immediately."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+            with pytest.raises(SandboxUnavailableError, match="no pool"):
+                await pool.checkout("cobol", timeout=1)
+
+    @pytest.mark.asyncio
+    async def test_checkout_unhealthy_container_triggers_retry(self):
+        """Unhealthy container is killed, fresh one started, retry succeeds."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, checkout_timeout=5)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        bad = _make_container("bad-ctr", "python")
+        good = _make_container("good-ctr", "python")
+        await pool._queues["python"].put(bad)
+
+        health_calls = []
+
+        async def fake_health(c):
+            health_calls.append(c.container_id)
+            return c.container_id == "good-ctr"
+
+        async def fake_kill(cid):
+            pass
+
+        async def fake_start_enqueue(stack):
+            await pool._queues[stack].put(good)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_health_check", side_effect=fake_health):
+                with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                    with patch.object(pool, "_start_and_enqueue", side_effect=fake_start_enqueue):
+                        with patch.object(pool, "_refill", new_callable=AsyncMock):
+                            result = await pool.checkout("python", timeout=5)
+
+        assert result.container_id == "good-ctr"
+        assert "bad-ctr" in health_calls
+
+    @pytest.mark.asyncio
+    async def test_checkout_refill_triggered(self):
+        """checkout() triggers _refill as a background task that eventually runs."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("ctr1", "go")
+        await pool._queues["go"].put(container)
+
+        refill_calls = []
+        refill_event = asyncio.Event()
+
+        async def fake_refill(stack):
+            refill_calls.append(stack)
+            refill_event.set()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_health_check", return_value=True):
+                with patch.object(pool, "_refill", side_effect=fake_refill):
+                    await pool.checkout("go", timeout=5)
+                    # Give the background task a chance to run
+                    await asyncio.wait_for(refill_event.wait(), timeout=2)
+
+        assert "go" in refill_calls
+
+
+# ── SandboxPool.checkin ───────────────────────────────────────────────────────
+
+
+class TestSandboxPoolCheckin:
+    @pytest.mark.asyncio
+    async def test_checkin_re_enqueues_after_reset(self):
+        """Reset succeeds + health ok → container back in queue."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("ctr1", "python")
+        pool._checked_out["ctr1"] = container
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_reset_container", return_value=True):
+                with patch.object(pool, "_health_check", return_value=True):
+                    await pool.checkin(container)
+
+        assert pool._queues["python"].qsize() == 1
+        assert "ctr1" not in pool._checked_out
+
+    @pytest.mark.asyncio
+    async def test_checkin_reset_fails_replaces_container(self):
+        """Reset fails → container killed, new one started asynchronously."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("bad-ctr", "python")
+        pool._checked_out["bad-ctr"] = container
+
+        kill_calls = []
+        start_calls = []
+        start_event = asyncio.Event()
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        async def fake_start(stack):
+            start_calls.append(stack)
+            start_event.set()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_reset_container", return_value=False):
+                with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                    with patch.object(pool, "_start_and_enqueue", side_effect=fake_start):
+                        await pool.checkin(container)
+                        await asyncio.wait_for(start_event.wait(), timeout=2)
+
+        assert "bad-ctr" in kill_calls
+        assert "python" in start_calls
+        assert pool._queues["python"].qsize() == 0  # no re-enqueue
+
+    @pytest.mark.asyncio
+    async def test_checkin_unhealthy_after_reset_replaces(self):
+        """Reset ok but health check fails → kill and replace."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("sick-ctr", "go")
+        pool._checked_out["sick-ctr"] = container
+
+        kill_calls = []
+        start_calls = []
+        start_event = asyncio.Event()
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        async def fake_start(stack):
+            start_calls.append(stack)
+            start_event.set()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_reset_container", return_value=True):
+                with patch.object(pool, "_health_check", return_value=False):
+                    with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                        with patch.object(pool, "_start_and_enqueue", side_effect=fake_start):
+                            await pool.checkin(container)
+                            await asyncio.wait_for(start_event.wait(), timeout=2)
+
+        assert "sick-ctr" in kill_calls
+        assert pool._queues["go"].qsize() == 0
+
+    @pytest.mark.asyncio
+    async def test_checkin_during_shutdown_kills_container(self):
+        """When pool is shutting down, checked-in container is killed not re-queued."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        pool._shutdown = True
+        container = _make_container("ctr1", "python")
+
+        kill_calls = []
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                await pool.checkin(container)
+
+        assert "ctr1" in kill_calls
+        assert pool._queues["python"].qsize() == 0
+
+
+# ── SandboxPool.borrow ────────────────────────────────────────────────────────
+
+
+class TestSandboxPoolBorrow:
+    @pytest.mark.asyncio
+    async def test_borrow_checks_in_on_success(self):
+        """borrow() context manager checks container back in after normal exit."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("ctr1", "python")
+        await pool._queues["python"].put(container)
+
+        checkin_calls = []
+
+        async def fake_checkin(c):
+            checkin_calls.append(c.container_id)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_health_check", return_value=True):
+                with patch.object(pool, "_refill", new_callable=AsyncMock):
+                    with patch.object(pool, "checkin", side_effect=fake_checkin):
+                        async with pool.borrow("python", timeout=5) as borrowed:
+                            assert borrowed.container_id == "ctr1"
+
+        assert "ctr1" in checkin_calls
+
+    @pytest.mark.asyncio
+    async def test_borrow_checks_in_on_exception(self):
+        """borrow() guarantees checkin even when the body raises."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("ctr1", "python")
+        await pool._queues["python"].put(container)
+
+        checkin_calls = []
+
+        async def fake_checkin(c):
+            checkin_calls.append(c.container_id)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_health_check", return_value=True):
+                with patch.object(pool, "_refill", new_callable=AsyncMock):
+                    with patch.object(pool, "checkin", side_effect=fake_checkin):
+                        with pytest.raises(ValueError):
+                            async with pool.borrow("python", timeout=5):
+                                raise ValueError("fix run crashed")
+
+        assert "ctr1" in checkin_calls
+
+
+# ── SandboxPool.shutdown ──────────────────────────────────────────────────────
+
+
+class TestSandboxPoolShutdown:
+    @pytest.mark.asyncio
+    async def test_shutdown_kills_queued_containers(self):
+        """shutdown() kills all containers in queues."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        c1 = _make_container("ctr1", "python")
+        c2 = _make_container("ctr2", "go")
+        await pool._queues["python"].put(c1)
+        await pool._queues["go"].put(c2)
+
+        kill_calls = []
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                await pool.shutdown()
+
+        assert "ctr1" in kill_calls
+        assert "ctr2" in kill_calls
+
+    @pytest.mark.asyncio
+    async def test_shutdown_kills_checked_out_containers(self):
+        """shutdown() also kills containers currently checked out."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        container = _make_container("live-ctr", "rust")
+        pool._checked_out["live-ctr"] = container
+
+        kill_calls = []
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                await pool.shutdown()
+
+        assert "live-ctr" in kill_calls
+
+
+# ── SandboxPool._reaper_loop ──────────────────────────────────────────────────
+
+
+class TestSandboxPoolReaper:
+    @pytest.mark.asyncio
+    async def test_reaper_kills_stale_container(self):
+        """Container checked out > max_hold_seconds → reaped and replaced."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, max_hold=10, reaper_interval=1)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        stale = _make_container("stale-ctr", "python", checked_out_seconds_ago=20)
+        pool._checked_out["stale-ctr"] = stale
+
+        kill_calls = []
+        start_calls = []
+        done_event = asyncio.Event()
+
+        async def fake_sleep(secs):
+            pass  # instant
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        async def fake_start(stack):
+            start_calls.append(stack)
+            pool._shutdown = True  # stop loop after this iteration
+            done_event.set()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                with patch.object(pool, "_start_and_enqueue", side_effect=fake_start):
+                    with patch("asyncio.sleep", side_effect=fake_sleep):
+                        task = asyncio.create_task(pool._reaper_loop())
+                        await asyncio.wait_for(done_event.wait(), timeout=5)
+                        await task
+
+        assert "stale-ctr" in kill_calls
+        assert "python" in start_calls
+
+    @pytest.mark.asyncio
+    async def test_reaper_leaves_fresh_container_alone(self):
+        """Container checked out recently → not reaped."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, max_hold=300, reaper_interval=1)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        fresh = _make_container("fresh-ctr", "python", checked_out_seconds_ago=5)
+        pool._checked_out["fresh-ctr"] = fresh
+
+        kill_calls = []
+        slept = asyncio.Event()
+
+        async def fake_sleep(secs):
+            slept.set()
+            pool._shutdown = True  # stop after first iteration
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                with patch("asyncio.sleep", side_effect=fake_sleep):
+                    task = asyncio.create_task(pool._reaper_loop())
+                    await asyncio.wait_for(slept.wait(), timeout=5)
+                    await task
+
+        assert "fresh-ctr" not in kill_calls
+
+    @pytest.mark.asyncio
+    async def test_reaper_stops_on_cancelled(self):
+        """CancelledError exits the loop cleanly."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, reaper_interval=1)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        async def raise_cancel(secs):
+            raise asyncio.CancelledError()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.sleep", side_effect=raise_cancel):
+                await pool._reaper_loop()  # should return cleanly, not propagate
+
+
+# ── SandboxPool._resolve_image ────────────────────────────────────────────────
+
+
+class TestResolveImage:
+    @pytest.mark.asyncio
+    async def test_preferred_image_present(self):
+        """docker image inspect returns 0 → preferred image used."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        proc = _make_proc(returncode=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._resolve_image("python")
+
+        assert result == "phalanx-sandbox-python:latest"
+
+    @pytest.mark.asyncio
+    async def test_preferred_image_absent_uses_fallback(self):
+        """docker image inspect returns non-zero → fallback image used."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        proc = _make_proc(returncode=1)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._resolve_image("python")
+
+        assert result == "python:3.12-slim"
+
+    @pytest.mark.asyncio
+    async def test_unknown_stack_returns_ubuntu(self):
+        """Unknown stack → ubuntu:22.04 fallback."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        proc = _make_proc(returncode=1)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._resolve_image("unknown")
+
+        assert result == "ubuntu:22.04"
+
+
+# ── SandboxPool._start_and_enqueue ───────────────────────────────────────────
+
+
+class TestStartAndEnqueue:
+    @pytest.mark.asyncio
+    async def test_enqueues_when_pool_not_full(self):
+        """Container started + pool has room → added to queue."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, max_size=2)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        async def fake_start(stack):
+            return "new-ctr"
+
+        async def fake_resolve(stack):
+            return "phalanx-sandbox-python:latest"
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_start_container", side_effect=fake_start):
+                with patch.object(pool, "_resolve_image", side_effect=fake_resolve):
+                    await pool._start_and_enqueue("python")
+
+        assert pool._queues["python"].qsize() == 1
+
+    @pytest.mark.asyncio
+    async def test_kills_extra_when_pool_full(self):
+        """Container started but pool already at max_size → kill the extra."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0, max_size=1)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        # Pre-fill the queue to max_size
+        existing = _make_container("existing", "python")
+        await pool._queues["python"].put(existing)
+
+        kill_calls = []
+
+        async def fake_start(stack):
+            return "overflow-ctr"
+
+        async def fake_resolve(stack):
+            return "img:latest"
+
+        async def fake_kill(cid):
+            kill_calls.append(cid)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_start_container", side_effect=fake_start):
+                with patch.object(pool, "_resolve_image", side_effect=fake_resolve):
+                    with patch.object(pool, "_kill_container", side_effect=fake_kill):
+                        await pool._start_and_enqueue("python")
+
+        assert "overflow-ctr" in kill_calls
+        assert pool._queues["python"].qsize() == 1  # still just the existing one
+
+    @pytest.mark.asyncio
+    async def test_start_failure_is_swallowed(self):
+        """_start_container raises → error logged, no exception propagated."""
+        pool = SandboxPool()
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            await pool._warmup()
+
+        async def fake_start(stack):
+            raise RuntimeError("docker daemon not found")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch.object(pool, "_start_container", side_effect=fake_start):
+                await pool._start_and_enqueue("python")  # must not raise
+
+        assert pool._queues["python"].qsize() == 0
+
+
+# ── get_sandbox_pool singleton ────────────────────────────────────────────────
+
+
+class TestGetSandboxPool:
+    def setup_method(self):
+        reset_pool_for_testing()
+
+    def teardown_method(self):
+        reset_pool_for_testing()
+
+    @pytest.mark.asyncio
+    async def test_returns_pool_instance(self):
+        """get_sandbox_pool() returns a SandboxPool."""
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch(
+                "phalanx.ci_fixer.sandbox_pool.SandboxPool._warmup",
+                new_callable=AsyncMock,
+            ):
+                pool = await get_sandbox_pool()
+
+        assert isinstance(pool, SandboxPool)
+
+    @pytest.mark.asyncio
+    async def test_returns_same_instance_on_repeat_calls(self):
+        """Second call returns the same singleton."""
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch(
+                "phalanx.ci_fixer.sandbox_pool.SandboxPool._warmup",
+                new_callable=AsyncMock,
+            ):
+                p1 = await get_sandbox_pool()
+                p2 = await get_sandbox_pool()
+
+        assert p1 is p2
+
+    @pytest.mark.asyncio
+    async def test_reset_allows_new_instance(self):
+        """reset_pool_for_testing() clears singleton → next call creates fresh pool."""
+        mock_settings = _mock_settings(min_size=0)
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch(
+                "phalanx.ci_fixer.sandbox_pool.SandboxPool._warmup",
+                new_callable=AsyncMock,
+            ):
+                p1 = await get_sandbox_pool()
+
+        reset_pool_for_testing()
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch(
+                "phalanx.ci_fixer.sandbox_pool.SandboxPool._warmup",
+                new_callable=AsyncMock,
+            ):
+                p2 = await get_sandbox_pool()
+
+        assert p1 is not p2
+
+
+# ── SandboxPool._health_check ─────────────────────────────────────────────────
+
+
+class TestHealthCheck:
+    @pytest.mark.asyncio
+    async def test_healthy_container(self):
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        container = _make_container("ctr1")
+        proc = _make_proc(returncode=0, stdout=b"ok")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._health_check(container)
+
+        assert result is True
+
+    @pytest.mark.asyncio
+    async def test_unhealthy_container_nonzero_exit(self):
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        container = _make_container("ctr1")
+        proc = _make_proc(returncode=1, stdout=b"")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._health_check(container)
+
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_health_check_exception_returns_false(self):
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        container = _make_container("ctr1")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", side_effect=FileNotFoundError("docker")):
+                result = await pool._health_check(container)
+
+        assert result is False
+
+
+# ── SandboxPool._reset_container ─────────────────────────────────────────────
+
+
+class TestResetContainer:
+    @pytest.mark.asyncio
+    async def test_reset_success(self):
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        container = _make_container("ctr1")
+        proc = _make_proc(returncode=0, stdout=b"done")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._reset_container(container)
+
+        assert result is True
+
+    @pytest.mark.asyncio
+    async def test_reset_failure_nonzero(self):
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        container = _make_container("ctr1")
+        proc = _make_proc(returncode=1, stdout=b"")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", return_value=proc):
+                result = await pool._reset_container(container)
+
+        assert result is False
+
+    @pytest.mark.asyncio
+    async def test_reset_exception_returns_false(self):
+        pool = SandboxPool()
+        mock_settings = _mock_settings()
+        container = _make_container("ctr1")
+
+        with patch("phalanx.ci_fixer.sandbox_pool.settings", mock_settings):
+            with patch("asyncio.create_subprocess_exec", side_effect=Exception("timeout")):
+                result = await pool._reset_container(container)
+
+        assert result is False