diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c7b936da..ba39998f 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -314,7 +314,7 @@ jobs:
         run: pip install -e ".[dev]"
 
       - name: Run migrations
-        run: alembic upgrade head
+        run: alembic -c alembic/alembic.ini upgrade head
 
       - name: Run unit tests
         run: |
@@ -419,7 +419,7 @@ jobs:
         run: pip install -e ".[dev]"
 
       - name: Run migrations
-        run: alembic upgrade head
+        run: alembic -c alembic/alembic.ini upgrade head
 
       - name: Run integration tests
         env:
@@ -491,6 +491,7 @@ jobs:
           context: .
           target: production
           push: false
+          load: true
           cache-from: type=gha
           cache-to: type=gha,mode=max
           tags: forge:prod-${{ github.sha }}
diff --git a/Dockerfile b/Dockerfile
index 62c66670..0d93d305 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -66,6 +66,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     nodejs \
     npm \
+    ca-certificates \
+    gnupg \
+    && install -m 0755 -d /etc/apt/keyrings \
+    && curl -fsSL https://download.docker.com/linux/debian/gpg | gpg --dearmor -o /etc/apt/keyrings/docker.gpg \
+    && chmod a+r /etc/apt/keyrings/docker.gpg \
+    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(. /etc/os-release && echo $VERSION_CODENAME) stable" > /etc/apt/sources.list.d/docker.list \
+    && apt-get update && apt-get install -y --no-install-recommends docker-ce-cli \
     && rm -rf /var/lib/apt/lists/*
 
 RUN groupadd --gid 1001 forge && \
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index d5344b8a..daa339e0 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -113,6 +113,7 @@ services:
       --queues=default,commander,planner,reviewer,qa,release,security,builder,ingestion,skill_drills,ci_fixer
       --concurrency=4
       --max-tasks-per-child=100
+    user: root
     environment:
       PHALANX_WORKER: "1"   # forces NullPool — prevents "Future attached to different loop" in forked workers
       OPENAI_MODEL_REASONING: "gpt-4.1"  # reasoning agents: Commander, Planner, QA, Reviewer, Release
@@ -120,6 +121,7 @@ services:
       - forge-repos:/tmp/forge-repos
       - ./configs:/app/configs:ro
       - ./skill-registry:/app/skill-registry:ro
+      - /var/run/docker.sock:/var/run/docker.sock  # required: CI fixer spawns sandbox containers
     depends_on:
       postgres:
         condition: service_healthy
diff --git a/docker/sandbox/go/Dockerfile b/docker/sandbox/go/Dockerfile
index b93a6565..521bf9e2 100644
--- a/docker/sandbox/go/Dockerfile
+++ b/docker/sandbox/go/Dockerfile
@@ -1,16 +1,15 @@
 FROM golang:1.22-alpine
 
-# Install staticcheck and golangci-lint for broader Go lint coverage
-RUN go install honnef.co/go/tools/cmd/staticcheck@2024.1.0 && \
-    go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.59.1
+RUN apk add --no-cache git
 
-# Create non-root user
-RUN adduser -D -u 1000 phalanx
+RUN go install honnef.co/go/tools/cmd/staticcheck@v0.5.1
 
-COPY ../reset.sh /phalanx/reset.sh
-RUN chmod +x /phalanx/reset.sh
+RUN adduser -D -u 1000 phalanx
 
-RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
+COPY reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh && \
+    mkdir -p /workspace && \
+    chown phalanx /workspace
 
 WORKDIR /workspace
 USER phalanx
diff --git a/docker/sandbox/node/Dockerfile b/docker/sandbox/node/Dockerfile
index 07a2e6ed..c2ff3ab4 100644
--- a/docker/sandbox/node/Dockerfile
+++ b/docker/sandbox/node/Dockerfile
@@ -1,19 +1,18 @@
 FROM node:20-slim
 
-# Create non-root user
-RUN useradd -m -u 1000 -s /bin/bash phalanx 2>/dev/null || true
+RUN usermod -l phalanx -d /home/phalanx -m node && \
+    groupmod -n phalanx node 2>/dev/null || true
 
-# Install common Node tooling used by the CI fixer.
 RUN npm install -g \
     eslint@8.57.0 \
     typescript@5.4.5 \
     jest@29.7.0 \
     --no-fund --no-audit
 
-COPY ../reset.sh /phalanx/reset.sh
-RUN chmod +x /phalanx/reset.sh
-
-RUN mkdir -p /workspace && chown phalanx:phalanx /workspace
+COPY reset.sh /phalanx/reset.sh
+RUN chmod +x /phalanx/reset.sh && \
+    mkdir -p /workspace && \
+    chown phalanx:phalanx /workspace 2>/dev/null || chown phalanx /workspace
 
 WORKDIR /workspace
 USER phalanx
diff --git a/phalanx/agents/ci_fixer.py b/phalanx/agents/ci_fixer.py
index aa6a5475..2e224392 100644
--- a/phalanx/agents/ci_fixer.py
+++ b/phalanx/agents/ci_fixer.py
@@ -1460,7 +1460,18 @@ async def _update_fingerprint_on_success(
     # ── Auth helpers ────────────────────────────────────────────────────────────
 
     def _decrypt_key(self, encrypted_key: str) -> str:
-        return encrypted_key  # Phase 2: KMS decrypt
+        if not encrypted_key:
+            return ""
+        enc_key = getattr(settings, "encryption_key", None)
+        if not enc_key:
+            return encrypted_key
+        try:
+            from cryptography.fernet import Fernet, InvalidToken  # noqa: PLC0415
+
+            f = Fernet(enc_key.encode())
+            return f.decrypt(encrypted_key.encode()).decode()
+        except (InvalidToken, Exception):
+            return encrypted_key
 
     def _get_github_token(self, integration: CIIntegration) -> str:
         return integration.github_token or settings.github_token
diff --git a/phalanx/ci_fixer/analyst.py b/phalanx/ci_fixer/analyst.py
index 68f72c51..c568455a 100644
--- a/phalanx/ci_fixer/analyst.py
+++ b/phalanx/ci_fixer/analyst.py
@@ -123,8 +123,9 @@ def is_actionable(self) -> bool:
 "corrected_lines" (a JSON array of strings, one per line, each ending with \\n).
 3. "corrected_lines" may differ from the original window by at most \
 {max_line_delta} lines (adding or removing).  Do NOT rewrite the whole file.
-4. NEVER modify test files (paths containing /test or test_).
-5. For unused imports (F401): delete the import line only.
+4. NEVER rewrite test logic — only mechanical lint fixes (unused imports, line length)
+   in test files are allowed. Do NOT change assertions, test structure, or test data.
+5. For unused imports (F401): delete the import line only. Set corrected_lines to [].
 6. For line-too-long (E501): wrap or shorten the line only.
 7. For future-import order (F404): move the __future__ import to line 1 only.
 8. If you cannot produce a high or medium confidence fix, set \
@@ -205,11 +206,18 @@ def analyze(
                 root_cause="Could not read any of the failing files from workspace",
             )
 
+        lint_only = bool(
+            parsed_log.lint_errors
+            and not parsed_log.type_errors
+            and not parsed_log.test_failures
+            and not parsed_log.build_errors
+        )
+
         # ── Phase 2: history check ─────────────────────────────────────────────
         if fingerprint_hash and self._history_lookup is not None:
             cached = self._history_lookup(fingerprint_hash)
             if cached:
-                patches = self._parse_and_validate_patches(cached, windows)
+                patches = self._parse_and_validate_patches(cached, windows, lint_only=lint_only)
                 if patches:
                     log.info(
                         "ci_analyst.history_hit",
@@ -258,7 +266,9 @@ def analyze(
             log.warning("ci_analyst.json_parse_failed", error=str(exc), raw=raw[:500])
             return FixPlan(confidence="low", root_cause="LLM returned non-JSON response")
 
-        patches = self._parse_and_validate_patches(data.get("patches", []), windows)
+        patches = self._parse_and_validate_patches(
+            data.get("patches", []), windows, lint_only=lint_only
+        )
         confidence = data.get("confidence", "low")
 
         # If patch validation rejected everything, downgrade to low
@@ -283,10 +293,10 @@ def _read_windows(self, workspace: Path, parsed_log: ParsedLog) -> list[FileWind
         """
         # Build map: file_path → list of error line numbers
         error_lines_by_file: dict[str, list[int]] = {}
-        for e in parsed_log.lint_errors:
-            error_lines_by_file.setdefault(e.file, []).append(e.line)
-        for e in parsed_log.type_errors:
-            error_lines_by_file.setdefault(e.file, []).append(e.line)
+        for le in parsed_log.lint_errors:
+            error_lines_by_file.setdefault(le.file, []).append(le.line)
+        for te in parsed_log.type_errors:
+            error_lines_by_file.setdefault(te.file, []).append(te.line)
         # For test failures we have no line number — read top of file
         for f in parsed_log.test_failures:
             error_lines_by_file.setdefault(f.file, []).append(1)
@@ -333,7 +343,7 @@ def _read_windows(self, workspace: Path, parsed_log: ParsedLog) -> list[FileWind
     # ── Patch validation ───────────────────────────────────────────────────────
 
     def _parse_and_validate_patches(
-        self, raw_patches: list, windows: list[FileWindow]
+        self, raw_patches: list, windows: list[FileWindow], lint_only: bool = False
     ) -> list[FilePatch]:
         """
         Parse LLM patch dicts, apply guard rails, return only safe patches.
@@ -342,7 +352,8 @@ def _parse_and_validate_patches(
           - path not in the windows we sent (LLM invented a file)
           - start_line / end_line don't match the window we sent (off-by-more-than-2)
           - |delta| > _MAX_LINE_DELTA  (LLM rewrote too much)
-          - path looks like a test file
+          - path looks like a test file (unless lint_only=True — lint fixes in test
+            files are valid, e.g. removing unused imports)
         """
         window_by_path = {w.path: w for w in windows}
         safe: list[FilePatch] = []
@@ -358,14 +369,14 @@ def _parse_and_validate_patches(
                 log.warning("ci_analyst.patch_unknown_file", path=path)
                 continue
 
-            # Guard: never touch test files
-            if _is_test_file(path):
+            # Guard: never touch test files unless it's a lint-only fix
+            if _is_test_file(path) and not lint_only:
                 log.warning("ci_analyst.patch_test_file_rejected", path=path)
                 continue
 
-            # Guard: corrected_lines must be a non-empty list of strings
-            if not isinstance(corrected, list) or not corrected:
-                log.warning("ci_analyst.patch_empty_corrected_lines", path=path)
+            # Guard: corrected_lines must be a list (empty = delete the lines)
+            if not isinstance(corrected, list):
+                log.warning("ci_analyst.patch_invalid_corrected_lines", path=path)
                 continue
 
             # Ensure every line ends with \n
@@ -379,18 +390,23 @@ def _parse_and_validate_patches(
                 log.warning("ci_analyst.patch_missing_line_range", path=path)
                 continue
 
-            if abs(start - window.start_line) > 2 or abs(end - window.end_line) > 2:
+            # Accept sub-ranges (LLM targeting a specific line within the window is correct).
+            # Clamp to window bounds if the patch extends outside.
+            if start < window.start_line or end > window.end_line:
                 log.warning(
-                    "ci_analyst.patch_line_range_mismatch",
+                    "ci_analyst.patch_line_range_outside_window",
                     path=path,
-                    expected_start=window.start_line,
-                    expected_end=window.end_line,
+                    window_start=window.start_line,
+                    window_end=window.end_line,
                     got_start=start,
                     got_end=end,
                 )
-                # Clamp to the window we actually sent — safer than rejecting
-                start = window.start_line
-                end = window.end_line
+                start = window.start_line if start < window.start_line else start
+                end = window.end_line if end > window.end_line else end
+                # If clamping made start > end, fall back to the full window
+                if start > end:
+                    start = window.start_line
+                    end = window.end_line
 
             original_size = end - start + 1
             delta = len(corrected) - original_size
diff --git a/phalanx/ci_fixer/log_parser.py b/phalanx/ci_fixer/log_parser.py
index ac22e73a..87535fde 100644
--- a/phalanx/ci_fixer/log_parser.py
+++ b/phalanx/ci_fixer/log_parser.py
@@ -84,22 +84,22 @@ def all_files(self) -> list[str]:
         """All unique files mentioned across all error types."""
         seen: set[str] = set()
         files: list[str] = []
-        for e in self.lint_errors:
-            if e.file not in seen:
-                seen.add(e.file)
-                files.append(e.file)
-        for e in self.type_errors:
-            if e.file not in seen:
-                seen.add(e.file)
-                files.append(e.file)
-        for e in self.test_failures:
-            if e.file not in seen:
-                seen.add(e.file)
-                files.append(e.file)
-        for e in self.build_errors:
-            if e.file and e.file not in seen:
-                seen.add(e.file)
-                files.append(e.file)
+        for le in self.lint_errors:
+            if le.file not in seen:
+                seen.add(le.file)
+                files.append(le.file)
+        for te in self.type_errors:
+            if te.file not in seen:
+                seen.add(te.file)
+                files.append(te.file)
+        for tf in self.test_failures:
+            if tf.file not in seen:
+                seen.add(tf.file)
+                files.append(tf.file)
+        for be in self.build_errors:
+            if be.file and be.file not in seen:
+                seen.add(be.file)
+                files.append(be.file)
         return files
 
     def summary(self) -> str:
@@ -128,24 +128,24 @@ def as_text(self) -> str:
 
         if self.type_errors:
             lines.append("TYPE ERRORS:")
-            for e in self.type_errors[:10]:
-                lines.append(f"  {e.file}:{e.line}: {e.message}")
+            for te in self.type_errors[:10]:
+                lines.append(f"  {te.file}:{te.line}: {te.message}")
             lines.append("")
 
         if self.test_failures:
             lines.append("TEST FAILURES:")
-            for f in self.test_failures[:10]:
-                lines.append(f"  {f.test_id}")
-                if f.message:
-                    for msg_line in f.message.splitlines()[:5]:
+            for tf in self.test_failures[:10]:
+                lines.append(f"  {tf.test_id}")
+                if tf.message:
+                    for msg_line in tf.message.splitlines()[:5]:
                         lines.append(f"    {msg_line}")
             lines.append("")
 
         if self.build_errors:
             lines.append("BUILD ERRORS:")
-            for e in self.build_errors[:5]:
-                prefix = f"  {e.file}: " if e.file else "  "
-                lines.append(f"{prefix}{e.message}")
+            for be in self.build_errors[:5]:
+                prefix = f"  {be.file}: " if be.file else "  "
+                lines.append(f"{prefix}{be.message}")
             lines.append("")
 
         return "\n".join(lines)
@@ -153,13 +153,21 @@ def as_text(self) -> str:
 
 # ── Regex patterns ─────────────────────────────────────────────────────────────
 
-# ruff: phalanx/agents/foo.py:1:10: F401 'os' imported but unused
+# ruff standard format: phalanx/agents/foo.py:1:10: F401 'os' imported but unused
 _RUFF_RE = re.compile(
     r"^([\w./\-]+\.py):(\d+):(\d+):\s+([A-Z]\d+)\s+(.+)$",
     re.MULTILINE,
 )
 
-# mypy: phalanx/agents/foo.py:42: error: Incompatible return value
+# ruff rich/diagnostic format (--output-format=full or terminal default):
+#   F401 [*] `sys` imported but unused
+#      --> tests/test_eval_outcome.py:259:8
+_RUFF_RICH_RE = re.compile(
+    r"^([A-Z]\d+)\s+(?:\[\*\]\s+)?(.+?)\n\s+-->\s+([\w./\-]+\.py):(\d+):(\d+)",
+    re.MULTILINE,
+)
+
+# mypy output format: phalanx/agents/foo.py:42: error: Incompatible return value
 _MYPY_RE = re.compile(
     r"^([\w./\-]+\.py):(\d+):\s+error:\s+(.+)$",
     re.MULTILINE,
@@ -248,7 +256,7 @@ def parse_log(raw: str) -> ParsedLog:
 
     # Determine primary tool
     if lint_errors:
-        tool = "ruff" if _RUFF_RE.search(text) else "eslint"
+        tool = "ruff" if (_RUFF_RE.search(text) or _RUFF_RICH_RE.search(text)) else "eslint"
     elif type_errors:
         tool = "mypy" if _MYPY_RE.search(text) else "tsc"
     elif test_failures:
@@ -272,16 +280,39 @@ def parse_log(raw: str) -> ParsedLog:
 
 def _parse_ruff(text: str) -> list[LintError]:
     errors: list[LintError] = []
+    seen: set[tuple] = set()
+
     for m in _RUFF_RE.finditer(text):
-        errors.append(
-            LintError(
-                file=m.group(1),
-                line=int(m.group(2)),
-                col=int(m.group(3)),
-                code=m.group(4),
-                message=m.group(5).strip(),
+        key = (m.group(1), int(m.group(2)), m.group(4))
+        if key not in seen:
+            seen.add(key)
+            errors.append(
+                LintError(
+                    file=m.group(1),
+                    line=int(m.group(2)),
+                    col=int(m.group(3)),
+                    code=m.group(4),
+                    message=m.group(5).strip(),
+                )
             )
-        )
+
+    # Also parse rich/diagnostic format (--output-format=full or terminal default):
+    #   F401 [*] `sys` imported but unused
+    #      --> tests/test_eval_outcome.py:259:8
+    for m in _RUFF_RICH_RE.finditer(text):
+        key = (m.group(3), int(m.group(4)), m.group(1))
+        if key not in seen:
+            seen.add(key)
+            errors.append(
+                LintError(
+                    file=m.group(3),
+                    line=int(m.group(4)),
+                    col=int(m.group(5)),
+                    code=m.group(1),
+                    message=m.group(2).strip(),
+                )
+            )
+
     return errors
 
 
diff --git a/phalanx/ci_fixer/outcome_tracker.py b/phalanx/ci_fixer/outcome_tracker.py
index 022f63c1..a4b6a1fb 100644
--- a/phalanx/ci_fixer/outcome_tracker.py
+++ b/phalanx/ci_fixer/outcome_tracker.py
@@ -198,11 +198,12 @@ async def _get_github_token(run: CIFixRun) -> str | None:
             from phalanx.config.settings import get_settings  # noqa: PLC0415
 
             settings = get_settings()
-            if settings.encryption_key:
+            enc_key = getattr(settings, "encryption_key", None)
+            if enc_key:
                 try:
                     from cryptography.fernet import Fernet  # noqa: PLC0415
 
-                    f = Fernet(settings.encryption_key.encode())
+                    f = Fernet(enc_key.encode())
                     return f.decrypt(integration.ci_api_key_enc.encode()).decode()
                 except Exception:
                     pass
diff --git a/phalanx/ci_fixer/validator.py b/phalanx/ci_fixer/validator.py
index b47f767d..1e9ad0c1 100644
--- a/phalanx/ci_fixer/validator.py
+++ b/phalanx/ci_fixer/validator.py
@@ -7,6 +7,10 @@
   2. Regression check — after the primary per-file check passes, the broader
      codebase is scanned for NEW errors introduced by the patch.
      A fix that breaks other files is treated as failed.
+  3. CI-parity discovery — reads .github/workflows/*.yml in the workspace to
+     discover the exact commands the CI runs (e.g. ruff format --check, mypy flags,
+     pytest --cov-fail-under). Falls back to sensible defaults when no CI config
+     is found, so it works for any GitHub Actions repo generically.
 
 Supports: ruff, mypy, pytest, tsc, eslint.
 Unknown tools → skipped (passed=True, explicit log).
@@ -14,16 +18,115 @@
 
 from __future__ import annotations
 
+import re
 import subprocess
 from dataclasses import dataclass, field
+from pathlib import Path  # noqa: TC003
 from typing import TYPE_CHECKING
 
 import structlog
 
 if TYPE_CHECKING:
-    from pathlib import Path
+    from phalanx.ci_fixer.log_parser import LintError, ParsedLog
+    from phalanx.ci_fixer.log_parser import TypeError as TypeErr
+
+# ── CI config discovery ────────────────────────────────────────────────────────
+
+# Regexes to extract tool commands from CI YAML step `run:` blocks
+_YAML_RUN_RE = re.compile(r"^\s*run:\s*[|>]?\s*(.+)$", re.MULTILINE)
+# Multi-line run blocks (|- or |) — capture everything indented under `run:`
+_YAML_RUN_BLOCK_RE = re.compile(r"run:\s*\|[-]?\n((?:[ \t]+.+\n?)*)", re.MULTILINE)
+
+
+def _discover_ci_commands(tool: str, workspace: Path) -> dict:
+    """
+    Read .github/workflows/*.yml in the workspace and extract commands relevant
+    to the given tool.
+
+    Returns a dict with tool-specific flags discovered from CI, e.g.:
+      ruff  → {"run_format_check": True}
+      mypy  → {"extra_flags": ["--ignore-missing-imports"]}
+      pytest → {"cov_fail_under": 70, "extra_flags": ["-x"]}
+
+    Falls back to empty/False defaults when CI YAML is absent or tool not found.
+    This ensures the validator stays generic across any GitHub Actions repo.
+    """
+    workflows_dir = workspace / ".github" / "workflows"
+    if not workflows_dir.is_dir():
+        return {}
+
+    all_run_lines: list[str] = []
+    for yml_file in workflows_dir.glob("*.yml"):
+        try:
+            text = yml_file.read_text(errors="replace")
+            # Extract inline run: value lines
+            for m in _YAML_RUN_RE.finditer(text):
+                all_run_lines.append(m.group(1).strip())
+            # Extract multi-line run block lines
+            for m in _YAML_RUN_BLOCK_RE.finditer(text):
+                for line in m.group(1).splitlines():
+                    stripped = line.strip()
+                    if stripped:
+                        all_run_lines.append(stripped)
+        except Exception:
+            continue
+
+    if tool == "ruff":
+        return _discover_ruff_config(all_run_lines)
+    if tool == "mypy":
+        return _discover_mypy_config(all_run_lines)
+    if tool == "pytest":
+        return _discover_pytest_config(all_run_lines)
+    return {}
+
+
+def _discover_ruff_config(run_lines: list[str]) -> dict:
+    """Detect whether CI runs `ruff format --check` in addition to `ruff check`."""
+    run_format_check = any(
+        "ruff" in line
+        and "format" in line
+        and ("--check" in line or "check" in line.split("format")[-1])
+        for line in run_lines
+    )
+    return {"run_format_check": run_format_check}
+
+
+def _discover_mypy_config(run_lines: list[str]) -> dict:
+    """Extract extra mypy flags used in CI (e.g. --ignore-missing-imports)."""
+    extra_flags: list[str] = []
+    known_flags = [
+        "--ignore-missing-imports",
+        "--strict",
+        "--disallow-untyped-defs",
+        "--no-implicit-optional",
+        "--warn-return-any",
+        "--warn-unused-ignores",
+        "--check-untyped-defs",
+    ]
+    for line in run_lines:
+        if "mypy" not in line:
+            continue
+        for flag in known_flags:
+            if flag in line and flag not in extra_flags:
+                extra_flags.append(flag)
+    return {"extra_flags": extra_flags}
+
+
+def _discover_pytest_config(run_lines: list[str]) -> dict:
+    """Extract pytest --cov-fail-under threshold and common flags from CI."""
+    cov_fail_under: int | None = None
+    extra_flags: list[str] = []
+    for line in run_lines:
+        if "pytest" not in line:
+            continue
+        m = re.search(r"--cov-fail-under[=\s]+(\d+)", line)
+        if m:
+            cov_fail_under = int(m.group(1))
+        for flag in ("-x", "--tb=short", "--tb=long", "--tb=no", "-q", "-v"):
+            if flag in line.split() and flag not in extra_flags:
+                extra_flags.append(flag)
+    return {"cov_fail_under": cov_fail_under, "extra_flags": extra_flags}
 
-    from phalanx.ci_fixer.log_parser import ParsedLog
 
 log = structlog.get_logger(__name__)
 
@@ -63,12 +166,14 @@ def validate_fix(
     files = parsed_log.all_files[:6]
     tool_version = _get_tool_version(tool)
 
+    ci_config = _discover_ci_commands(tool, workspace)
+
     if tool == "ruff":
-        result = _run_ruff(workspace, files, tool_version)
+        result = _run_ruff(workspace, files, tool_version, ci_config)
     elif tool == "mypy":
-        result = _run_mypy(workspace, files, tool_version)
+        result = _run_mypy(workspace, files, tool_version, ci_config)
     elif tool == "pytest":
-        result = _run_pytest(workspace, parsed_log, tool_version)
+        result = _run_pytest(workspace, parsed_log, tool_version, ci_config)
     elif tool in ("tsc", "eslint"):
         result = _run_node_linter(workspace, tool, files, tool_version)
     else:
@@ -111,28 +216,75 @@ def validate_fix(
 # ── Tool runners ───────────────────────────────────────────────────────────────
 
 
-def _run_ruff(workspace: Path, files: list[str], tool_version: str) -> ValidationResult:
+def _run_ruff(
+    workspace: Path, files: list[str], tool_version: str, ci_config: dict | None = None
+) -> ValidationResult:
     targets = files if files else ["."]
+    ci_config = ci_config or {}
+
+    # Step 1: ruff check (lint)
     code, output = _run(["ruff", "check"] + targets, workspace)
-    passed = code == 0
-    log.info("ci_validator.ruff", passed=passed, files=files, version=tool_version)
-    return ValidationResult(passed=passed, tool="ruff", output=output, tool_version=tool_version)
+    if code != 0:
+        log.info("ci_validator.ruff_check", passed=False, files=files, version=tool_version)
+        return ValidationResult(passed=False, tool="ruff", output=output, tool_version=tool_version)
+
+    # Step 2: ruff format --check — only if CI actually runs it
+    if ci_config.get("run_format_check"):
+        fmt_code, fmt_output = _run(["ruff", "format", "--check"] + targets, workspace)
+        combined = (output + "\n" + fmt_output).strip()
+        passed = fmt_code == 0
+        log.info("ci_validator.ruff_format_check", passed=passed, files=files, version=tool_version)
+        return ValidationResult(
+            passed=passed, tool="ruff", output=combined, tool_version=tool_version
+        )
+
+    log.info("ci_validator.ruff", passed=True, files=files, version=tool_version)
+    return ValidationResult(passed=True, tool="ruff", output=output, tool_version=tool_version)
 
 
-def _run_mypy(workspace: Path, files: list[str], tool_version: str) -> ValidationResult:
+def _run_mypy(
+    workspace: Path, files: list[str], tool_version: str, ci_config: dict | None = None
+) -> ValidationResult:
     targets = files if files else ["."]
-    code, output = _run(["mypy"] + targets, workspace)
+    ci_config = ci_config or {}
+    extra_flags: list[str] = ci_config.get("extra_flags", [])
+    code, output = _run(["mypy"] + extra_flags + targets, workspace)
     passed = code == 0
-    log.info("ci_validator.mypy", passed=passed, files=files, version=tool_version)
+    log.info(
+        "ci_validator.mypy", passed=passed, files=files, flags=extra_flags, version=tool_version
+    )
     return ValidationResult(passed=passed, tool="mypy", output=output, tool_version=tool_version)
 
 
-def _run_pytest(workspace: Path, parsed_log: ParsedLog, tool_version: str) -> ValidationResult:
+def _run_pytest(
+    workspace: Path, parsed_log: ParsedLog, tool_version: str, ci_config: dict | None = None
+) -> ValidationResult:
+    ci_config = ci_config or {}
     test_files = list({f.file for f in parsed_log.test_failures})
     targets = test_files if test_files else ["tests/"]
-    code, output = _run(["python", "-m", "pytest", "-x", "-q"] + targets, workspace)
+
+    base_flags = ["-x", "-q"]
+    # Apply extra CI flags discovered (e.g. --tb=short), avoiding duplicates
+    for flag in ci_config.get("extra_flags", []):
+        if flag not in base_flags:
+            base_flags.append(flag)
+
+    # Apply coverage threshold if CI enforces one and we're running the full suite
+    cov_fail_under: int | None = ci_config.get("cov_fail_under")
+    cov_flags: list[str] = []
+    if cov_fail_under is not None and not test_files:
+        cov_flags = [f"--cov-fail-under={cov_fail_under}"]
+
+    cmd = ["python", "-m", "pytest"] + base_flags + cov_flags + targets
+    code, output = _run(cmd, workspace)
     passed = code == 0
-    log.info("ci_validator.pytest", passed=passed, files=targets, version=tool_version)
+    log.info(
+        "ci_validator.pytest",
+        passed=passed,
+        files=targets,
+        cov_threshold=cov_fail_under,
+        version=tool_version,
+    )
     return ValidationResult(passed=passed, tool="pytest", output=output, tool_version=tool_version)
 
 
@@ -180,19 +332,19 @@ def _regression_check(
 
     # Build set of pre-existing (file, code) pairs
     pre_existing: set[tuple[str, str]] = set()
-    for e in original_parsed.lint_errors:
-        pre_existing.add((e.file, e.code))
-    for e in original_parsed.type_errors:
-        pre_existing.add((e.file, getattr(e, "code", e.message[:30])))
-
-    regressions = []
-    for e in new_parsed.lint_errors:
-        if (e.file, e.code) not in pre_existing:
-            regressions.append(e)
-    for e in new_parsed.type_errors:
-        key = (e.file, getattr(e, "code", e.message[:30]))
+    for le in original_parsed.lint_errors:
+        pre_existing.add((le.file, le.code))
+    for te in original_parsed.type_errors:
+        pre_existing.add((te.file, getattr(te, "code", te.message[:30])))
+
+    regressions: list[LintError | TypeErr] = []
+    for le in new_parsed.lint_errors:
+        if (le.file, le.code) not in pre_existing:
+            regressions.append(le)
+    for te in new_parsed.type_errors:
+        key = (te.file, getattr(te, "code", te.message[:30]))
         if key not in pre_existing:
-            regressions.append(e)
+            regressions.append(te)
 
     return regressions
 
diff --git a/phalanx/db/models.py b/phalanx/db/models.py
index 218ce5ff..474065c3 100644
--- a/phalanx/db/models.py
+++ b/phalanx/db/models.py
@@ -454,7 +454,7 @@ class AgentTrace(Base):
 
     id: Mapped[str] = mapped_column(UUID(as_uuid=False), primary_key=True, default=_uuid)
     run_id: Mapped[str] = mapped_column(ForeignKey("runs.id", ondelete="CASCADE"), nullable=False)
-    task_id: Mapped[str | None] = mapped_column(String(36))
+    task_id: Mapped[str | None] = mapped_column(UUID(as_uuid=False))
     agent_role: Mapped[str] = mapped_column(String(100), nullable=False)
     agent_id: Mapped[str] = mapped_column(String(100), nullable=False)
     trace_type: Mapped[str] = mapped_column(String(50), nullable=False)
diff --git a/phalanx/sim_test_scratch.py b/phalanx/sim_test_scratch.py
new file mode 100644
index 00000000..1d23a35a
--- /dev/null
+++ b/phalanx/sim_test_scratch.py
@@ -0,0 +1,3 @@
+"""Temporary scratch file for CI fixer simulation test — safe to delete."""
+
+x = 1
diff --git a/skill-registry/index.yaml b/skill-registry/index.yaml
index 06f6dac7..876d2bf3 100644
--- a/skill-registry/index.yaml
+++ b/skill-registry/index.yaml
@@ -11,3 +11,4 @@ skills:
   task-decomposition: "skills/task-decomposition.yaml"
   system-design: "skills/system-design.yaml"
   orchestration: "skills/orchestration.yaml"
+  risk-assessment: "skills/risk-assessment.yaml"
diff --git a/skill-registry/skills/risk-assessment.yaml b/skill-registry/skills/risk-assessment.yaml
new file mode 100644
index 00000000..18fca369
--- /dev/null
+++ b/skill-registry/skills/risk-assessment.yaml
@@ -0,0 +1,38 @@
+id: risk-assessment
+name: "Risk Assessment"
+version: "1.0.0"
+domain: "engineering"
+category: "quality"
+stability: "stable"
+applicable_roles:
+  - commander
+  - tech_lead
+min_level: "ic5"
+
+prerequisites:
+  - system-design
+  - task-decomposition
+
+quality_criteria:
+  - "Risks are identified before work begins, not after failure"
+  - "Each risk has a severity (low/medium/high/critical) and a mitigation"
+  - "Blocking risks are escalated before task dispatch"
+  - "Risk assessment is recorded in the run's audit_log"
+
+principles:
+  - "Identify unknowns before committing to a plan."
+  - "High-severity risks require human acknowledgement before proceeding."
+  - "Mitigations are concrete actions, not vague reassurances."
+
+procedures:
+  proficient:
+    - "Review the work order for scope, dependencies, and external services."
+    - "Enumerate risks across: correctness, security, rollback-ability, and blast radius."
+    - "Assign severity and likelihood to each risk."
+    - "Define a concrete mitigation or fallback for each high/critical risk."
+    - "Surface critical risks to the human approver before plan approval."
+
+anti_patterns:
+  - "Treating risk assessment as a post-mortem step."
+  - "Marking all risks as 'low' to unblock dispatch."
+  - "Proceeding past a critical risk without explicit human sign-off."