From 018bf1c8c9bb8d8d588afe509342d5f0a7ff584b Mon Sep 17 00:00:00 2001 From: Michael Kuehl Date: Wed, 22 Apr 2026 09:48:21 +0200 Subject: [PATCH] feat(engine): add pre-flight check to skip coder sessions when work is done (fixes #511) --- agent_fox/engine/engine.py | 68 +++++++++ agent_fox/engine/preflight.py | 191 +++++++++++++++++++++++++ agent_fox/knowledge/audit.py | 1 + tests/unit/engine/test_preflight.py | 213 ++++++++++++++++++++++++++++ 4 files changed, 473 insertions(+) create mode 100644 agent_fox/engine/preflight.py create mode 100644 tests/unit/engine/test_preflight.py diff --git a/agent_fox/engine/engine.py b/agent_fox/engine/engine.py index e4041d37..adfb24d6 100644 --- a/agent_fox/engine/engine.py +++ b/agent_fox/engine/engine.py @@ -359,6 +359,13 @@ async def _prepare_launch( if verdict != "allowed": return None + # Pre-flight: skip coder sessions when the task group is already + # complete, tests pass, and no unresolved critical/major findings. + if archetype == "coder" and attempt == 1: + skip = self._run_preflight(node_id) + if skip: + return None + attempt_tracker[node_id] = attempt previous_error = error_tracker.get(node_id) instances = self._get_node_instances(node_id) @@ -1239,6 +1246,67 @@ def _process_completed_parallel( return barrier_needed + def _run_preflight(self, node_id: str) -> bool: + """Run pre-flight check and skip the session if work is done. + + Returns True if the session should be skipped, False otherwise. + Marks the node as completed and emits an audit event on skip. + """ + from agent_fox.core.config import resolve_spec_root + from agent_fox.core.node_id import parse_node_id + from agent_fox.engine.preflight import PreflightVerdict, run_preflight + + parsed = parse_node_id(node_id) + specs_dir = self._specs_dir + if specs_dir is None and self._full_config is not None: + specs_dir = resolve_spec_root(self._full_config, Path.cwd()) + if specs_dir is None: + return False + + verdict = run_preflight( + spec_name=parsed.spec_name, + group_number=parsed.group_number, + conn=self._knowledge_db_conn, + specs_dir=specs_dir, + cwd=Path.cwd(), + ) + if verdict != PreflightVerdict.SKIP: + return False + + if self._graph_sync is not None: + prev_status = self._graph_sync.node_states.get(node_id, "pending") + self._graph_sync.mark_completed(node_id) + emit_audit_event( + self._sink, + self._run_id, + AuditEventType.PREFLIGHT_SKIP, + node_id=node_id, + payload={ + "from_status": prev_status, + "reason": "checkboxes done, no active findings, tests pass", + }, + ) + if self._knowledge_db_conn is not None: + try: + from agent_fox.engine.state import persist_node_status + + persist_node_status(self._knowledge_db_conn, node_id, "completed") + except Exception: + logger.debug("Failed to persist preflight skip status", exc_info=True) + + if self._task_callback is not None: + self._task_callback( + TaskEvent( + node_id=node_id, + status="completed", + duration_s=0.0, + archetype="coder", + ) + ) + + logger.info("Preflight skip: %s", node_id) + return True + def _get_node(self, node_id: str) -> Any | None: """Look up a TaskNode by ID, returning None if graph is unset.""" if self._graph is not None: diff --git a/agent_fox/engine/preflight.py b/agent_fox/engine/preflight.py new file mode 100644 index 00000000..98b3e9df --- /dev/null +++ b/agent_fox/engine/preflight.py @@ -0,0 +1,191 @@ +"""Pre-flight check: skip coder sessions when work is already complete. + +Before launching an expensive coder session, checks three gates: +1. Task group completion — DB first, tasks.md fallback +2. No active critical/major review findings +3. Tests pass (only when gates 1 & 2 pass) + +If all gates pass the node is marked completed and the session is skipped. +""" + +from __future__ import annotations + +import logging +import subprocess +from enum import StrEnum +from pathlib import Path +from typing import Any + +logger = logging.getLogger(__name__) + +_TEST_TIMEOUT_SECONDS = 300 + + +class PreflightVerdict(StrEnum): + LAUNCH = "launch" + SKIP = "skip" + + +def is_task_group_done_db( + conn: Any, + spec_name: str, + group_number: int, +) -> bool | None: + """Check plan_nodes DB for task group completion. + + Returns True if the node status is 'completed', False if it exists + but is not completed, or None if the node is not found in the DB + (indicating the caller should fall back to tasks.md). + """ + try: + row = conn.execute( + """ + SELECT status + FROM plan_nodes + WHERE spec_name = ? AND group_number = ? + LIMIT 1 + """, + [spec_name, group_number], + ).fetchone() + except Exception: + logger.debug( + "Failed to query plan_nodes for %s:%d", + spec_name, + group_number, + exc_info=True, + ) + return None + + if row is None: + return None + return row[0] == "completed" + + +def is_task_group_done_file( + specs_dir: Path, + spec_name: str, + group_number: int, +) -> bool: + """Check tasks.md checkbox state for a specific task group. + + Returns True only when the task group exists and has completed=True. + """ + from agent_fox.spec.parser import parse_tasks + + tasks_path = specs_dir / spec_name / "tasks.md" + if not tasks_path.is_file(): + return False + try: + groups = parse_tasks(tasks_path) + except Exception: + logger.debug( + "Failed to parse tasks.md for %s", + spec_name, + exc_info=True, + ) + return False + + for group in groups: + if group.number == group_number: + return group.completed + return False + + +def is_task_group_done( + conn: Any | None, + spec_name: str, + group_number: int, + specs_dir: Path, +) -> bool: + """Check whether a task group is already complete. + + Uses the DB as the source of truth, falling back to tasks.md + when DB state is unavailable. + """ + if conn is not None: + db_result = is_task_group_done_db(conn, spec_name, group_number) + if db_result is not None: + return db_result + + return is_task_group_done_file(specs_dir, spec_name, group_number) + + +def has_active_critical_findings( + conn: Any | None, + spec_name: str, + task_group: int, +) -> bool: + """Return True if unresolved critical/major findings exist.""" + if conn is None: + return False + try: + from agent_fox.knowledge.review_store import query_active_findings + + findings = query_active_findings(conn, spec_name, str(task_group)) + return any(f.severity in ("critical", "major") for f in findings) + except Exception: + logger.debug( + "Failed to query review findings for %s:%d", + spec_name, + task_group, + exc_info=True, + ) + return False + + +def do_tests_pass(cwd: Path) -> bool: + """Run ``make test`` and return True if exit code is 0.""" + try: + result = subprocess.run( + ["make", "test"], + cwd=cwd, + capture_output=True, + timeout=_TEST_TIMEOUT_SECONDS, + ) + return result.returncode == 0 + except subprocess.TimeoutExpired: + logger.warning("Pre-flight test run timed out after %ds", _TEST_TIMEOUT_SECONDS) + return False + except Exception: + logger.debug("Pre-flight test run failed", exc_info=True) + return False + + +def run_preflight( + spec_name: str, + group_number: int, + conn: Any | None, + specs_dir: Path, + cwd: Path, +) -> PreflightVerdict: + """Run the pre-flight check for a coder session. + + Gates are evaluated in order with short-circuit: if any gate + fails, the check returns LAUNCH immediately to avoid running + later (more expensive) gates. + """ + if not is_task_group_done(conn, spec_name, group_number, specs_dir): + return PreflightVerdict.LAUNCH + + if has_active_critical_findings(conn, spec_name, group_number): + logger.info( + "Preflight: %s:%d has done checkboxes but active findings, launching coder", + spec_name, + group_number, + ) + return PreflightVerdict.LAUNCH + + if not do_tests_pass(cwd): + logger.info( + "Preflight: %s:%d has done checkboxes but tests fail, launching coder", + spec_name, + group_number, + ) + return PreflightVerdict.LAUNCH + + logger.info( + "Preflight: %s:%d is complete — checkboxes done, no findings, tests pass. Skipping coder session.", + spec_name, + group_number, + ) + return PreflightVerdict.SKIP diff --git a/agent_fox/knowledge/audit.py b/agent_fox/knowledge/audit.py index d8f9c755..18549065 100644 --- a/agent_fox/knowledge/audit.py +++ b/agent_fox/knowledge/audit.py @@ -89,6 +89,7 @@ class AuditEventType(StrEnum): CONSOLIDATION_COST = "consolidation.cost" SLEEP_COMPUTE_COMPLETE = "SLEEP_COMPUTE_COMPLETE" KNOWLEDGE_RETRIEVAL = "knowledge.retrieval" # 113-REQ-7.1 + PREFLIGHT_SKIP = "preflight.skip" # --------------------------------------------------------------------------- diff --git a/tests/unit/engine/test_preflight.py b/tests/unit/engine/test_preflight.py new file mode 100644 index 00000000..aa72507d --- /dev/null +++ b/tests/unit/engine/test_preflight.py @@ -0,0 +1,213 @@ +"""Tests for the coder session pre-flight check. + +Verifies that the preflight module correctly identifies task groups +that are already complete (checkboxes done, no findings, tests pass) +and recommends skipping the coder session. +""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import duckdb +import pytest + +from agent_fox.engine.preflight import ( + PreflightVerdict, + has_active_critical_findings, + is_task_group_done, + is_task_group_done_db, + is_task_group_done_file, + run_preflight, +) +from agent_fox.knowledge.migrations import run_migrations + + +def _make_conn() -> duckdb.DuckDBPyConnection: + conn = duckdb.connect(":memory:") + run_migrations(conn) + return conn + + +def _insert_plan_node( + conn: duckdb.DuckDBPyConnection, + node_id: str, + spec_name: str, + group_number: int, + status: str = "pending", +) -> None: + conn.execute( + """ + INSERT INTO plan_nodes (id, spec_name, group_number, title, body, status) + VALUES (?, ?, ?, ?, '', ?) + """, + [node_id, spec_name, group_number, f"Task {group_number}", status], + ) + + +class TestIsTaskGroupDoneDb: + def test_completed_node_returns_true(self) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "completed") + assert is_task_group_done_db(conn, "spec", 1) is True + + def test_pending_node_returns_false(self) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "pending") + assert is_task_group_done_db(conn, "spec", 1) is False + + def test_missing_node_returns_none(self) -> None: + conn = _make_conn() + assert is_task_group_done_db(conn, "spec", 1) is None + + def test_none_conn_returns_none(self) -> None: + assert is_task_group_done_db(None, "spec", 1) is None + + +class TestIsTaskGroupDoneFile: + def test_completed_group_returns_true(self, tmp_path: Path) -> None: + spec_dir = tmp_path / "my_spec" + spec_dir.mkdir() + (spec_dir / "tasks.md").write_text( + "- [x] 1. First task\n - Some details\n" + ) + assert is_task_group_done_file(tmp_path, "my_spec", 1) is True + + def test_incomplete_group_returns_false(self, tmp_path: Path) -> None: + spec_dir = tmp_path / "my_spec" + spec_dir.mkdir() + (spec_dir / "tasks.md").write_text( + "- [ ] 1. First task\n - Some details\n" + ) + assert is_task_group_done_file(tmp_path, "my_spec", 1) is False + + def test_missing_tasks_file_returns_false(self, tmp_path: Path) -> None: + assert is_task_group_done_file(tmp_path, "my_spec", 1) is False + + def test_wrong_group_number_returns_false(self, tmp_path: Path) -> None: + spec_dir = tmp_path / "my_spec" + spec_dir.mkdir() + (spec_dir / "tasks.md").write_text( + "- [x] 1. First task\n - Some details\n" + ) + assert is_task_group_done_file(tmp_path, "my_spec", 99) is False + + +class TestIsTaskGroupDone: + def test_db_takes_precedence_over_file(self, tmp_path: Path) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "pending") + spec_dir = tmp_path / "spec" + spec_dir.mkdir() + (spec_dir / "tasks.md").write_text("- [x] 1. Done task\n") + assert is_task_group_done(conn, "spec", 1, tmp_path) is False + + def test_falls_back_to_file_when_db_missing(self, tmp_path: Path) -> None: + conn = _make_conn() + spec_dir = tmp_path / "spec" + spec_dir.mkdir() + (spec_dir / "tasks.md").write_text("- [x] 1. Done task\n") + assert is_task_group_done(conn, "spec", 1, tmp_path) is True + + def test_falls_back_to_file_when_conn_none(self, tmp_path: Path) -> None: + spec_dir = tmp_path / "spec" + spec_dir.mkdir() + (spec_dir / "tasks.md").write_text("- [x] 1. Done task\n") + assert is_task_group_done(None, "spec", 1, tmp_path) is True + + +class TestHasActiveCriticalFindings: + def test_no_findings_returns_false(self) -> None: + conn = _make_conn() + assert has_active_critical_findings(conn, "spec", 1) is False + + def test_critical_finding_returns_true(self) -> None: + conn = _make_conn() + conn.execute( + """ + INSERT INTO review_findings + (id, severity, description, spec_name, task_group, session_id) + VALUES + (gen_random_uuid(), 'critical', 'broken', 'spec', '1', 'sess-1') + """ + ) + assert has_active_critical_findings(conn, "spec", 1) is True + + def test_major_finding_returns_true(self) -> None: + conn = _make_conn() + conn.execute( + """ + INSERT INTO review_findings + (id, severity, description, spec_name, task_group, session_id) + VALUES + (gen_random_uuid(), 'major', 'needs work', 'spec', '1', 'sess-1') + """ + ) + assert has_active_critical_findings(conn, "spec", 1) is True + + def test_minor_finding_returns_false(self) -> None: + conn = _make_conn() + conn.execute( + """ + INSERT INTO review_findings + (id, severity, description, spec_name, task_group, session_id) + VALUES + (gen_random_uuid(), 'minor', 'nit', 'spec', '1', 'sess-1') + """ + ) + assert has_active_critical_findings(conn, "spec", 1) is False + + def test_none_conn_returns_false(self) -> None: + assert has_active_critical_findings(None, "spec", 1) is False + + +class TestRunPreflight: + def test_incomplete_task_returns_launch(self, tmp_path: Path) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "pending") + verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path) + assert verdict == PreflightVerdict.LAUNCH + + def test_done_with_findings_returns_launch(self, tmp_path: Path) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "completed") + conn.execute( + """ + INSERT INTO review_findings + (id, severity, description, spec_name, task_group, session_id) + VALUES + (gen_random_uuid(), 'critical', 'broken', 'spec', '1', 'sess-1') + """ + ) + verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path) + assert verdict == PreflightVerdict.LAUNCH + + @patch("agent_fox.engine.preflight.do_tests_pass", return_value=False) + def test_done_no_findings_tests_fail_returns_launch( + self, _mock_tests: MagicMock, tmp_path: Path + ) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "completed") + verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path) + assert verdict == PreflightVerdict.LAUNCH + + @patch("agent_fox.engine.preflight.do_tests_pass", return_value=True) + def test_all_gates_pass_returns_skip( + self, _mock_tests: MagicMock, tmp_path: Path + ) -> None: + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "completed") + verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path) + assert verdict == PreflightVerdict.SKIP + + @patch("agent_fox.engine.preflight.do_tests_pass", return_value=True) + def test_short_circuits_on_first_failure( + self, mock_tests: MagicMock, tmp_path: Path + ) -> None: + """Tests are not run when task group is incomplete.""" + conn = _make_conn() + _insert_plan_node(conn, "spec:1", "spec", 1, "pending") + verdict = run_preflight("spec", 1, conn, tmp_path, tmp_path) + assert verdict == PreflightVerdict.LAUNCH + mock_tests.assert_not_called()