From 018bf1c8c9bb8d8d588afe509342d5f0a7ff584b Mon Sep 17 00:00:00 2001
From: Michael Kuehl <hello@txs.vc>
Date: Wed, 22 Apr 2026 09:48:21 +0200
Subject: [PATCH] feat(engine): add pre-flight check to skip coder sessions
 when work is done (fixes #511)

---
 agent_fox/engine/engine.py          |  68 +++++++++
 agent_fox/engine/preflight.py       | 191 +++++++++++++++++++++++++
 agent_fox/knowledge/audit.py        |   1 +
 tests/unit/engine/test_preflight.py | 213 ++++++++++++++++++++++++++++
 4 files changed, 473 insertions(+)
 create mode 100644 agent_fox/engine/preflight.py
 create mode 100644 tests/unit/engine/test_preflight.py

diff --git a/agent_fox/engine/engine.py b/agent_fox/engine/engine.py
index e4041d37..adfb24d6 100644
--- a/agent_fox/engine/engine.py
+++ b/agent_fox/engine/engine.py
@@ -359,6 +359,13 @@ async def _prepare_launch(
         if verdict != "allowed":
             return None
 
+        # Pre-flight: skip coder sessions when the task group is already
+        # complete, tests pass, and no unresolved critical/major findings.
+        if archetype == "coder" and attempt == 1:
+            skip = self._run_preflight(node_id)
+            if skip:
+                return None
+
         attempt_tracker[node_id] = attempt
         previous_error = error_tracker.get(node_id)
         instances = self._get_node_instances(node_id)
@@ -1239,6 +1246,67 @@ def _process_completed_parallel(
 
         return barrier_needed
 
+    def _run_preflight(self, node_id: str) -> bool:
+        """Run pre-flight check and skip the session if work is done.
+
+        Returns True if the session should be skipped, False otherwise.
+        Marks the node as completed and emits an audit event on skip.
+        """
+        from agent_fox.core.config import resolve_spec_root
+        from agent_fox.core.node_id import parse_node_id
+        from agent_fox.engine.preflight import PreflightVerdict, run_preflight
+
+        parsed = parse_node_id(node_id)
+        specs_dir = self._specs_dir
+        if specs_dir is None and self._full_config is not None:
+            specs_dir = resolve_spec_root(self._full_config, Path.cwd())
+        if specs_dir is None:
+            return False
+
+        verdict = run_preflight(
+            spec_name=parsed.spec_name,
+            group_number=parsed.group_number,
+            conn=self._knowledge_db_conn,
+            specs_dir=specs_dir,
+            cwd=Path.cwd(),
+        )
+        if verdict != PreflightVerdict.SKIP:
+            return False
+
+        if self._graph_sync is not None:
+            prev_status = self._graph_sync.node_states.get(node_id, "pending")
+            self._graph_sync.mark_completed(node_id)
+            emit_audit_event(
+                self._sink,
+                self._run_id,
+                AuditEventType.PREFLIGHT_SKIP,
+                node_id=node_id,
+                payload={
+                    "from_status": prev_status,
+                    "reason": "checkboxes done, no active findings, tests pass",
+                },
+            )
+            if self._knowledge_db_conn is not None:
+                try:
+                    from agent_fox.engine.state import persist_node_status
+
+                    persist_node_status(self._knowledge_db_conn, node_id, "completed")
+                except Exception:
+                    logger.debug("Failed to persist preflight skip status", exc_info=True)
+
+            if self._task_callback is not None:
+                self._task_callback(
+                    TaskEvent(
+                        node_id=node_id,
+                        status="completed",
+                        duration_s=0.0,
+                        archetype="coder",
+                    )
+                )
+
+        logger.info("Preflight skip: %s", node_id)
+        return True
+
     def _get_node(self, node_id: str) -> Any | None:
         """Look up a TaskNode by ID, returning None if graph is unset."""
         if self._graph is not None:
diff --git a/agent_fox/engine/preflight.py b/agent_fox/engine/preflight.py
new file mode 100644
index 00000000..98b3e9df
--- /dev/null
+++ b/agent_fox/engine/preflight.py
@@ -0,0 +1,191 @@
+"""Pre-flight check: skip coder sessions when work is already complete.
+
+Before launching an expensive coder session, checks three gates:
+1. Task group completion — DB first, tasks.md fallback
+2. No active critical/major review findings
+3. Tests pass (only when gates 1 & 2 pass)
+
+If all gates pass the node is marked completed and the session is skipped.
+"""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+from enum import StrEnum
+from pathlib import Path
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+_TEST_TIMEOUT_SECONDS = 300
+
+
+class PreflightVerdict(StrEnum):
+    LAUNCH = "launch"
+    SKIP = "skip"
+
+
+def is_task_group_done_db(
+    conn: Any,
+    spec_name: str,
+    group_number: int,
+) -> bool | None:
+    """Check plan_nodes DB for task group completion.
+
+    Returns True if the node status is 'completed', False if it exists
+    but is not completed, or None if the node is not found in the DB
+    (indicating the caller should fall back to tasks.md).
+    """
+    try:
+        row = conn.execute(
+            """
+            SELECT status
+            FROM plan_nodes
+            WHERE spec_name = ? AND group_number = ?
+            LIMIT 1
+            """,
+            [spec_name, group_number],
+        ).fetchone()
+    except Exception:
+        logger.debug(
+            "Failed to query plan_nodes for %s:%d",
+            spec_name,
+            group_number,
+            exc_info=True,
+        )
+        return None
+
+    if row is None:
+        return None
+    return row[0] == "completed"
+
+
+def is_task_group_done_file(
+    specs_dir: Path,
+    spec_name: str,
+    group_number: int,
+) -> bool:
+    """Check tasks.md checkbox state for a specific task group.
+
+    Returns True only when the task group exists and has completed=True.
+    """
+    from agent_fox.spec.parser import parse_tasks
+
+    tasks_path = specs_dir / spec_name / "tasks.md"
+    if not tasks_path.is_file():
+        return False
+    try:
+        groups = parse_tasks(tasks_path)
+    except Exception:
+        logger.debug(
+            "Failed to parse tasks.md for %s",
+            spec_name,
+            exc_info=True,
+        )
+        return False
+
+    for group in groups:
+        if group.number == group_number:
+            return group.completed
+    return False
+
+
+def is_task_group_done(
+    conn: Any | None,
+    spec_name: str,
+    group_number: int,
+    specs_dir: Path,
+) -> bool:
+    """Check whether a task group is already complete.
+
+    Uses the DB as the source of truth, falling back to tasks.md
+    when DB state is unavailable.
+    """
+    if conn is not None:
+        db_result = is_task_group_done_db(conn, spec_name, group_number)
+        if db_result is not None:
+            return db_result
+
+    return is_task_group_done_file(specs_dir, spec_name, group_number)
+
+
+def has_active_critical_findings(
+    conn: Any | None,
+    spec_name: str,
+    task_group: int,
+) -> bool:
+    """Return True if unresolved critical/major findings exist."""
+    if conn is None:
+        return False
+    try:
+        from agent_fox.knowledge.review_store import query_active_findings
+
+        findings = query_active_findings(conn, spec_name, str(task_group))
+        return any(f.severity in ("critical", "major") for f in findings)
+    except Exception:
+        logger.debug(
+            "Failed to query review findings for %s:%d",
+            spec_name,
+            task_group,
+            exc_info=True,
+        )
+        return False
+
+
+def do_tests_pass(cwd: Path) -> bool:
+    """Run ``make test`` and return True if exit code is 0."""
+    try:
+        result = subprocess.run(
+            ["make", "test"],
+            cwd=cwd,
+            capture_output=True,
+            timeout=_TEST_TIMEOUT_SECONDS,
+        )
+        return result.returncode == 0
+    except subprocess.TimeoutExpired:
+        logger.warning("Pre-flight test run timed out after %ds", _TEST_TIMEOUT_SECONDS)
+        return False
+    except Exception:
+        logger.debug("Pre-flight test run failed", exc_info=True)
+        return False
+
+
+def run_preflight(
+    spec_name: str,
+    group_number: int,
+    conn: Any | None,
+    specs_dir: Path,
+    cwd: Path,
+) -> PreflightVerdict:
+    """Run the pre-flight check for a coder session.
+
+    Gates are evaluated in order with short-circuit: if any gate
+    fails, the check returns LAUNCH immediately to avoid running
+    later (more expensive) gates.
+    """
+    if not is_task_group_done(conn, spec_name, group_number, specs_dir):
+        return PreflightVerdict.LAUNCH
+
+    if has_active_critical_findings(conn, spec_name, group_number):
+        logger.info(
+            "Preflight: %s:%d has done checkboxes but active findings, launching coder",
+            spec_name,
+            group_number,
+        )
+        return PreflightVerdict.LAUNCH
+
+    if not do_tests_pass(cwd):
+        logger.info(
+            "Preflight: %s:%d has done checkboxes but tests fail, launching coder",
+            spec_name,
+            group_number,
+        )
+        return PreflightVerdict.LAUNCH
+
+    logger.info(
+        "Preflight: %s:%d is complete — checkboxes done, no findings, tests pass. Skipping coder session.",
+        spec_name,
+        group_number,
+    )
+    return PreflightVerdict.SKIP
diff --git a/agent_fox/knowledge/audit.py b/agent_fox/knowledge/audit.py
index d8f9c755..18549065 100644
--- a/agent_fox/knowledge/audit.py
+++ b/agent_fox/knowledge/audit.py
@@ -89,6 +89,7 @@ class AuditEventType(StrEnum):
     CONSOLIDATION_COST = "consolidation.cost"
     SLEEP_COMPUTE_COMPLETE = "SLEEP_COMPUTE_COMPLETE"
     KNOWLEDGE_RETRIEVAL = "knowledge.retrieval"  # 113-REQ-7.1
+    PREFLIGHT_SKIP = "preflight.skip"
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/unit/engine/test_preflight.py b/tests/unit/engine/test_preflight.py
new file mode 100644
index 00000000..aa72507d
--- /dev/null
+++ b/tests/unit/engine/test_preflight.py
@@ -0,0 +1,213 @@
+"""Tests for the coder session pre-flight check.
+
+Verifies that the preflight module correctly identifies task groups
+that are already complete (checkboxes done, no findings, tests pass)
+and recommends skipping the coder session.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import duckdb
+import pytest
+
+from agent_fox.engine.preflight import (
+    PreflightVerdict,
+    has_active_critical_findings,
+    is_task_group_done,
+    is_task_group_done_db,
+    is_task_group_done_file,
+    run_preflight,
+)
+from agent_fox.knowledge.migrations import run_migrations
+
+
+def _make_conn() -> duckdb.DuckDBPyConnection:
+    conn = duckdb.connect(":memory:")
+    run_migrations(conn)
+    return conn
+
+
+def _insert_plan_node(
+    conn: duckdb.DuckDBPyConnection,
+    node_id: str,
+    spec_name: str,
+    group_number: int,
+    status: str = "pending",
+) -> None:
+    conn.execute(
+        """
+        INSERT INTO plan_nodes (id, spec_name, group_number, title, body, status)
+        VALUES (?, ?, ?, ?, '', ?)
+        """,
+        [node_id, spec_name, group_number, f"Task {group_number}", status],
+    )
+
+
+class TestIsTaskGroupDoneDb:
+    def test_completed_node_returns_true(self) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "completed")
+        assert is_task_group_done_db(conn, "spec", 1) is True
+
+    def test_pending_node_returns_false(self) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "pending")
+        assert is_task_group_done_db(conn, "spec", 1) is False
+
+    def test_missing_node_returns_none(self) -> None:
+        conn = _make_conn()
+        assert is_task_group_done_db(conn, "spec", 1) is None
+
+    def test_none_conn_returns_none(self) -> None:
+        assert is_task_group_done_db(None, "spec", 1) is None
+
+
+class TestIsTaskGroupDoneFile:
+    def test_completed_group_returns_true(self, tmp_path: Path) -> None:
+        spec_dir = tmp_path / "my_spec"
+        spec_dir.mkdir()
+        (spec_dir / "tasks.md").write_text(
+            "- [x] 1. First task\n  - Some details\n"
+        )
+        assert is_task_group_done_file(tmp_path, "my_spec", 1) is True
+
+    def test_incomplete_group_returns_false(self, tmp_path: Path) -> None:
+        spec_dir = tmp_path / "my_spec"
+        spec_dir.mkdir()
+        (spec_dir / "tasks.md").write_text(
+            "- [ ] 1. First task\n  - Some details\n"
+        )
+        assert is_task_group_done_file(tmp_path, "my_spec", 1) is False
+
+    def test_missing_tasks_file_returns_false(self, tmp_path: Path) -> None:
+        assert is_task_group_done_file(tmp_path, "my_spec", 1) is False
+
+    def test_wrong_group_number_returns_false(self, tmp_path: Path) -> None:
+        spec_dir = tmp_path / "my_spec"
+        spec_dir.mkdir()
+        (spec_dir / "tasks.md").write_text(
+            "- [x] 1. First task\n  - Some details\n"
+        )
+        assert is_task_group_done_file(tmp_path, "my_spec", 99) is False
+
+
+class TestIsTaskGroupDone:
+    def test_db_takes_precedence_over_file(self, tmp_path: Path) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "pending")
+        spec_dir = tmp_path / "spec"
+        spec_dir.mkdir()
+        (spec_dir / "tasks.md").write_text("- [x] 1. Done task\n")
+        assert is_task_group_done(conn, "spec", 1, tmp_path) is False
+
+    def test_falls_back_to_file_when_db_missing(self, tmp_path: Path) -> None:
+        conn = _make_conn()
+        spec_dir = tmp_path / "spec"
+        spec_dir.mkdir()
+        (spec_dir / "tasks.md").write_text("- [x] 1. Done task\n")
+        assert is_task_group_done(conn, "spec", 1, tmp_path) is True
+
+    def test_falls_back_to_file_when_conn_none(self, tmp_path: Path) -> None:
+        spec_dir = tmp_path / "spec"
+        spec_dir.mkdir()
+        (spec_dir / "tasks.md").write_text("- [x] 1. Done task\n")
+        assert is_task_group_done(None, "spec", 1, tmp_path) is True
+
+
+class TestHasActiveCriticalFindings:
+    def test_no_findings_returns_false(self) -> None:
+        conn = _make_conn()
+        assert has_active_critical_findings(conn, "spec", 1) is False
+
+    def test_critical_finding_returns_true(self) -> None:
+        conn = _make_conn()
+        conn.execute(
+            """
+            INSERT INTO review_findings
+                (id, severity, description, spec_name, task_group, session_id)
+            VALUES
+                (gen_random_uuid(), 'critical', 'broken', 'spec', '1', 'sess-1')
+            """
+        )
+        assert has_active_critical_findings(conn, "spec", 1) is True
+
+    def test_major_finding_returns_true(self) -> None:
+        conn = _make_conn()
+        conn.execute(
+            """
+            INSERT INTO review_findings
+                (id, severity, description, spec_name, task_group, session_id)
+            VALUES
+                (gen_random_uuid(), 'major', 'needs work', 'spec', '1', 'sess-1')
+            """
+        )
+        assert has_active_critical_findings(conn, "spec", 1) is True
+
+    def test_minor_finding_returns_false(self) -> None:
+        conn = _make_conn()
+        conn.execute(
+            """
+            INSERT INTO review_findings
+                (id, severity, description, spec_name, task_group, session_id)
+            VALUES
+                (gen_random_uuid(), 'minor', 'nit', 'spec', '1', 'sess-1')
+            """
+        )
+        assert has_active_critical_findings(conn, "spec", 1) is False
+
+    def test_none_conn_returns_false(self) -> None:
+        assert has_active_critical_findings(None, "spec", 1) is False
+
+
+class TestRunPreflight:
+    def test_incomplete_task_returns_launch(self, tmp_path: Path) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "pending")
+        verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path)
+        assert verdict == PreflightVerdict.LAUNCH
+
+    def test_done_with_findings_returns_launch(self, tmp_path: Path) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "completed")
+        conn.execute(
+            """
+            INSERT INTO review_findings
+                (id, severity, description, spec_name, task_group, session_id)
+            VALUES
+                (gen_random_uuid(), 'critical', 'broken', 'spec', '1', 'sess-1')
+            """
+        )
+        verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path)
+        assert verdict == PreflightVerdict.LAUNCH
+
+    @patch("agent_fox.engine.preflight.do_tests_pass", return_value=False)
+    def test_done_no_findings_tests_fail_returns_launch(
+        self, _mock_tests: MagicMock, tmp_path: Path
+    ) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "completed")
+        verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path)
+        assert verdict == PreflightVerdict.LAUNCH
+
+    @patch("agent_fox.engine.preflight.do_tests_pass", return_value=True)
+    def test_all_gates_pass_returns_skip(
+        self, _mock_tests: MagicMock, tmp_path: Path
+    ) -> None:
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "completed")
+        verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path)
+        assert verdict == PreflightVerdict.SKIP
+
+    @patch("agent_fox.engine.preflight.do_tests_pass", return_value=True)
+    def test_short_circuits_on_first_failure(
+        self, mock_tests: MagicMock, tmp_path: Path
+    ) -> None:
+        """Tests are not run when task group is incomplete."""
+        conn = _make_conn()
+        _insert_plan_node(conn, "spec:1", "spec", 1, "pending")
+        verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path)
+        assert verdict == PreflightVerdict.LAUNCH
+        mock_tests.assert_not_called()