agent-fox-dev · mickume · Apr 20, 2026
diff --git a/agent_fox/engine/reset.py b/agent_fox/engine/reset.py
@@ -36,6 +36,51 @@
 _RESETTABLE_STATUSES = frozenset({"failed", "blocked", "in_progress"})
 
 
+_SESSION_TABLES_ALL = (
+    "runs",
+    "session_outcomes",
+    "review_findings",
+    "verification_results",
+    "drift_findings",
+    "blocking_history",
+)
+
+
+def _clear_session_tables(
+    db_conn: duckdb.DuckDBPyConnection | None,
+    *,
+    spec_names: set[str] | None = None,
+) -> None:
+    """Delete stale session-scoped data so the next run starts clean.
+
+    When *spec_names* is ``None``, all rows are deleted (used by full resets).
+    When *spec_names* is provided, only rows matching those specs are removed
+    from tables that have a ``spec_name`` column; tables without one (``runs``)
+    are always fully cleared because a stale terminal ``run_status`` causes a
+    death-loop regardless of which spec triggered it.
+    """
+    if db_conn is None:
+        return
+    try:
+        db_conn.execute("DELETE FROM runs")
+        if spec_names is None:
+            for table in _SESSION_TABLES_ALL:
+                if table == "runs":
+                    continue
+                db_conn.execute(f"DELETE FROM {table}")  # noqa: S608
+        else:
+            specs = list(spec_names)
+            for table in _SESSION_TABLES_ALL:
+                if table == "runs":
+                    continue
+                db_conn.execute(
+                    f"DELETE FROM {table} WHERE spec_name = ANY(?)",  # noqa: S608
+                    [specs],
+                )
+    except Exception:
+        logger.debug("Failed to clear session tables", exc_info=True)
+
+
 def _persist_resets(
     db_conn: duckdb.DuckDBPyConnection | None,
     task_ids: list[str],
@@ -217,6 +262,8 @@ def reset_all(
             state.blocked_reasons.pop(task_id, None)
         _persist_resets(db_conn, reset_tasks)
 
+    _clear_session_tables(db_conn)
+
     return ResetResult(
         reset_tasks=reset_tasks,
         unblocked_tasks=[],  # Full reset has no cascade concept
@@ -310,6 +357,10 @@ def reset_task(
     # Persist updated state to DB
     _persist_resets(db_conn, reset_tasks + unblocked_tasks)
 
+    all_ids = reset_tasks + unblocked_tasks
+    spec_names = {parse_node_id(tid).spec_name for tid in all_ids}
+    _clear_session_tables(db_conn, spec_names=spec_names)
+
     return ResetResult(
         reset_tasks=reset_tasks,
         unblocked_tasks=unblocked_tasks,
@@ -386,6 +437,8 @@ def reset_spec(
     if non_pending:
         _persist_resets(db_conn, spec_node_ids)
 
+    _clear_session_tables(db_conn, spec_names={spec_name})
+
     return ResetResult(
         reset_tasks=non_pending,
         unblocked_tasks=[],
@@ -432,6 +485,9 @@ def _perform_hard_reset(
     # Persist resets to DB
     _persist_resets(db_conn, affected_ids)
 
+    # Clear session-scoped tables so the next run starts clean (issue #501)
+    _clear_session_tables(db_conn)
+
     return HardResetResult(
         reset_tasks=affected_ids,
         cleaned_worktrees=cleaned_worktrees,

diff --git a/tests/unit/engine/test_reset.py b/tests/unit/engine/test_reset.py
@@ -7,6 +7,8 @@
 
 from __future__ import annotations
 
+import uuid
+from datetime import UTC, datetime
 from pathlib import Path
 from unittest.mock import patch
 
@@ -16,7 +18,9 @@
 from agent_fox.engine.reset import (
     _task_id_to_branch_name,
     _task_id_to_worktree_path,
+    hard_reset_all,
     reset_all,
+    reset_spec,
     reset_task,
 )
 from agent_fox.engine.state import ExecutionState
@@ -347,3 +351,199 @@ def test_completed_task_populates_skipped_completed(self, tmp_path: Path) -> Non
             result = reset_task("s:1", worktrees_dir, repo_path, db_conn=db_conn)
 
         assert result.skipped_completed == ["s:1"]
+
+
+# ---------------------------------------------------------------------------
+# Session table cleanup (issue #501)
+# ---------------------------------------------------------------------------
+
+
+def _seed_session_tables(conn, spec_name: str = "s", node_id: str = "s:1") -> None:
+    """Populate session-scoped tables with stale data for cleanup tests."""
+    now = datetime.now(UTC).isoformat()
+    run_id = f"run_stale_{uuid.uuid4().hex[:8]}"
+    session_id = f"{node_id}:1"
+
+    conn.execute(
+        "INSERT INTO runs (id, plan_content_hash, status, started_at) VALUES (?, ?, 'block_limit', ?)",
+        [run_id, "hash123", now],
+    )
+    conn.execute(
+        "INSERT INTO session_outcomes (id, spec_name, task_group, node_id, status, created_at, run_id, archetype) "
+        "VALUES (?, ?, '1', ?, 'completed', ?, ?, 'reviewer')",
+        [str(uuid.uuid4()), spec_name, node_id, now, run_id],
+    )
+    conn.execute(
+        "INSERT INTO review_findings (id, severity, description, requirement_ref, spec_name, task_group, session_id) "
+        "VALUES (?, 'critical', 'stale finding', 'REQ-1', ?, '1', ?)",
+        [str(uuid.uuid4()), spec_name, session_id],
+    )
+    conn.execute(
+        "INSERT INTO blocking_history (id, spec_name, archetype, critical_count, threshold, blocked) "
+        "VALUES (?, ?, 'reviewer', 2, 3, true)",
+        [str(uuid.uuid4()), spec_name],
+    )
+    conn.execute(
+        "INSERT INTO verification_results (id, requirement_id, verdict, spec_name, task_group, session_id) "
+        "VALUES (?, 'REQ-1', 'fail', ?, '1', ?)",
+        [str(uuid.uuid4()), spec_name, session_id],
+    )
+    conn.execute(
+        "INSERT INTO drift_findings (id, severity, description, spec_name, task_group, session_id) "
+        "VALUES (?, 'major', 'stale drift', ?, '1', ?)",
+        [str(uuid.uuid4()), spec_name, session_id],
+    )
+
+
+def _count(conn, table: str) -> int:
+    return conn.execute(f"SELECT count(*) FROM {table}").fetchone()[0]
+
+
+class TestHardResetClearsSessionTables:
+    """Issue #501: hard_reset_all must clear session-scoped tables."""
+
+    def test_clears_runs(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "failed"})
+        state.session_history = []
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            hard_reset_all(tmp_path / "wt", tmp_path, tmp_path / "mem.jsonl", db_conn=db_conn)
+
+        assert _count(db_conn, "runs") == 0
+
+    def test_clears_session_outcomes(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "failed"})
+        state.session_history = []
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            hard_reset_all(tmp_path / "wt", tmp_path, tmp_path / "mem.jsonl", db_conn=db_conn)
+
+        assert _count(db_conn, "session_outcomes") == 0
+
+    def test_clears_review_findings(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "failed"})
+        state.session_history = []
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            hard_reset_all(tmp_path / "wt", tmp_path, tmp_path / "mem.jsonl", db_conn=db_conn)
+
+        assert _count(db_conn, "review_findings") == 0
+
+    def test_clears_blocking_history(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "failed"})
+        state.session_history = []
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            hard_reset_all(tmp_path / "wt", tmp_path, tmp_path / "mem.jsonl", db_conn=db_conn)
+
+        assert _count(db_conn, "blocking_history") == 0
+
+    def test_clears_verification_results(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "failed"})
+        state.session_history = []
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            hard_reset_all(tmp_path / "wt", tmp_path, tmp_path / "mem.jsonl", db_conn=db_conn)
+
+        assert _count(db_conn, "verification_results") == 0
+
+    def test_clears_drift_findings(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "failed"})
+        state.session_history = []
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            hard_reset_all(tmp_path / "wt", tmp_path, tmp_path / "mem.jsonl", db_conn=db_conn)
+
+        assert _count(db_conn, "drift_findings") == 0
+
+
+class TestSoftResetClearsSessionTables:
+    """Issue #501: soft reset must also clear blocking session state."""
+
+    def test_reset_all_clears_runs(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "blocked"})
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            reset_all(tmp_path / "wt", tmp_path, db_conn=db_conn)
+
+        assert _count(db_conn, "runs") == 0
+
+    def test_reset_all_clears_review_findings(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "blocked"})
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            reset_all(tmp_path / "wt", tmp_path, db_conn=db_conn)
+
+        assert _count(db_conn, "review_findings") == 0
+
+    def test_reset_all_clears_session_outcomes(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn)
+        state = _make_state({"s:1": "blocked"})
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            reset_all(tmp_path / "wt", tmp_path, db_conn=db_conn)
+
+        assert _count(db_conn, "session_outcomes") == 0
+
+    def test_reset_task_clears_session_tables(self, tmp_path: Path) -> None:
+        nodes = {"s:1": {"title": "T1"}, "s:2": {"title": "T2"}}
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn, node_id="s:1")
+        state = _make_state({"s:1": "failed", "s:2": "pending"})
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            reset_task("s:1", tmp_path / "wt", tmp_path, db_conn=db_conn)
+
+        assert _count(db_conn, "runs") == 0
+        assert _count(db_conn, "review_findings") == 0
+
+
+class TestResetSpecClearsSessionTables:
+    """Issue #501: reset_spec must clear session tables for the spec."""
+
+    def test_clears_spec_session_data(self, tmp_path: Path) -> None:
+        nodes = {
+            "a:1": {"title": "T1", "spec_name": "a"},
+            "b:1": {"title": "T2", "spec_name": "b"},
+        }
+        db_conn = write_plan_to_db(nodes, [])
+        _seed_session_tables(db_conn, spec_name="a", node_id="a:1")
+        _seed_session_tables(db_conn, spec_name="b", node_id="b:1")
+        state = _make_state({"a:1": "blocked", "b:1": "pending"})
+
+        with patch("agent_fox.engine.reset._load_state_or_raise", return_value=state):
+            reset_spec("a", tmp_path / "wt", tmp_path, db_conn=db_conn)
+
+        # Spec "a" data cleared, spec "b" data preserved
+        a_findings = db_conn.execute("SELECT count(*) FROM review_findings WHERE spec_name = 'a'").fetchone()[0]
+        b_findings = db_conn.execute("SELECT count(*) FROM review_findings WHERE spec_name = 'b'").fetchone()[0]
+        assert a_findings == 0
+        assert b_findings == 1
+        # Runs always fully cleared
+        assert _count(db_conn, "runs") == 0