From c7f7f445da9c0e4ff0957385cc80ab6824881a16 Mon Sep 17 00:00:00 2001 From: Michael Kuehl Date: Thu, 23 Apr 2026 12:23:18 +0200 Subject: [PATCH] feat(knowledge): reinstate lightweight errata generation from blocking (fixes #522) --- agent_fox/engine/result_handler.py | 48 +++ agent_fox/knowledge/audit.py | 1 + agent_fox/knowledge/errata.py | 229 ++++++++++++++ agent_fox/knowledge/fox_provider.py | 34 +- agent_fox/knowledge/migrations.py | 27 ++ docs/memory.md | 7 + tests/property/knowledge/test_db_props.py | 4 +- .../knowledge/test_review_store_props.py | 2 +- tests/test_knowledge_pruning.py | 2 +- tests/unit/engine/test_errata_on_blocking.py | 246 +++++++++++++++ tests/unit/knowledge/test_db.py | 8 +- tests/unit/knowledge/test_errata.py | 290 ++++++++++++++++++ tests/unit/knowledge/test_fox_provider.py | 1 + tests/unit/knowledge/test_review_store.py | 2 +- 14 files changed, 890 insertions(+), 11 deletions(-) create mode 100644 agent_fox/knowledge/errata.py create mode 100644 tests/unit/engine/test_errata_on_blocking.py create mode 100644 tests/unit/knowledge/test_errata.py diff --git a/agent_fox/engine/result_handler.py b/agent_fox/engine/result_handler.py index 920883b8..ed45b9a7 100644 --- a/agent_fox/engine/result_handler.py +++ b/agent_fox/engine/result_handler.py @@ -116,9 +116,57 @@ def check_skeptic_blocking( ) if decision.should_block: self._block_task(decision.coder_node_id, state, decision.reason) + self._generate_errata(record) return True return False + def _generate_errata(self, record: SessionRecord) -> None: + """Generate errata from critical/major findings that caused blocking.""" + if self._knowledge_db_conn is None: + return + try: + from agent_fox.core.node_id import parse_node_id + from agent_fox.knowledge.errata import ( + generate_errata_from_findings, + persist_erratum_markdown, + store_errata, + ) + from agent_fox.knowledge.review_store import query_findings_by_session + + parsed = parse_node_id(record.node_id) + spec_name = parsed.spec_name + task_group = str(parsed.group_number) if parsed.group_number else "1" + session_id = f"{record.node_id}:{record.attempt}" + + findings = query_findings_by_session(self._knowledge_db_conn, session_id) + errata = generate_errata_from_findings(findings, spec_name, task_group) + if not errata: + return + + stored = store_errata(self._knowledge_db_conn, errata) + + from pathlib import Path + + persist_erratum_markdown(errata, Path.cwd()) + + emit_audit_event( + self._sink, + self._run_id, + AuditEventType.ERRATA_GENERATED, + node_id=record.node_id, + payload={ + "spec_name": spec_name, + "task_group": task_group, + "count": stored, + }, + ) + except Exception: + logger.warning( + "Failed to generate errata for %s", + record.node_id, + exc_info=True, + ) + def process( self, record: SessionRecord, diff --git a/agent_fox/knowledge/audit.py b/agent_fox/knowledge/audit.py index 18549065..2179c9a7 100644 --- a/agent_fox/knowledge/audit.py +++ b/agent_fox/knowledge/audit.py @@ -90,6 +90,7 @@ class AuditEventType(StrEnum): SLEEP_COMPUTE_COMPLETE = "SLEEP_COMPUTE_COMPLETE" KNOWLEDGE_RETRIEVAL = "knowledge.retrieval" # 113-REQ-7.1 PREFLIGHT_SKIP = "preflight.skip" + ERRATA_GENERATED = "errata.generated" # --------------------------------------------------------------------------- diff --git a/agent_fox/knowledge/errata.py b/agent_fox/knowledge/errata.py new file mode 100644 index 00000000..3f735730 --- /dev/null +++ b/agent_fox/knowledge/errata.py @@ -0,0 +1,229 @@ +"""Lightweight errata generation, storage, and retrieval. + +Auto-generates errata when reviewer blocking occurs, stores them in +DuckDB for retrieval during future coder sessions, and persists them +to ``docs/errata/`` for human visibility. + +Errata capture institutional knowledge: what went wrong, which +requirement was violated, and context about the failure. This enables +the "don't repeat mistakes" feedback loop. +""" + +from __future__ import annotations + +import logging +import uuid +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import duckdb + +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class Erratum: + """A single erratum generated from a blocking review finding.""" + + id: str + spec_name: str + task_group: str + finding_summary: str + requirement_ref: str | None = None + fix_summary: str | None = None + created_at: datetime | None = None + + +def generate_errata_from_findings( + findings: list[Any], + spec_name: str, + task_group: str, +) -> list[Erratum]: + """Create errata from critical/major review findings. + + Each critical or major finding produces one erratum. Minor and + observation findings are excluded — they don't carry enough signal + to justify persisting as institutional knowledge. + + Args: + findings: List of ReviewFinding (or compatible) objects with + ``severity``, ``description``, and ``requirement_ref`` attrs. + spec_name: Spec being reviewed. + task_group: Task group within the spec. + + Returns: + List of Erratum objects ready for storage. + """ + errata: list[Erratum] = [] + for f in findings: + severity = getattr(f, "severity", "").lower() + if severity not in ("critical", "major"): + continue + errata.append( + Erratum( + id=str(uuid.uuid4()), + spec_name=spec_name, + task_group=task_group, + finding_summary=f"[{severity}] {f.description}", + requirement_ref=getattr(f, "requirement_ref", None), + ) + ) + return errata + + +def store_errata( + conn: duckdb.DuckDBPyConnection, + errata: list[Erratum], +) -> int: + """Insert errata into the DuckDB ``errata`` table. + + Returns the number of rows inserted. Silently returns 0 if the + errata table does not exist (e.g. migration not yet applied). + """ + if not errata: + return 0 + try: + for e in errata: + conn.execute( + "INSERT INTO errata " + "(id, spec_name, task_group, finding_summary, requirement_ref, fix_summary, created_at) " + "VALUES (?, ?, ?, ?, ?, ?, ?)", + [ + e.id, + e.spec_name, + e.task_group, + e.finding_summary, + e.requirement_ref, + e.fix_summary, + e.created_at or datetime.now(UTC), + ], + ) + except duckdb.CatalogException: + logger.debug("errata table does not exist, skipping store") + return 0 + except Exception: + logger.warning("Failed to store errata", exc_info=True) + return 0 + logger.info( + "Stored %d errata for %s/%s", + len(errata), + errata[0].spec_name, + errata[0].task_group, + ) + return len(errata) + + +def query_errata( + conn: duckdb.DuckDBPyConnection, + spec_name: str, + *, + limit: int = 10, +) -> list[Erratum]: + """Retrieve errata for a spec, most recent first. + + Returns an empty list if the table does not exist or the query + fails for any reason. + """ + try: + rows = conn.execute( + "SELECT id, spec_name, task_group, finding_summary, " + "requirement_ref, fix_summary, created_at " + "FROM errata WHERE spec_name = ? " + "ORDER BY created_at DESC LIMIT ?", + [spec_name, limit], + ).fetchall() + except Exception: + logger.debug("Could not query errata for %s", spec_name) + return [] + return [ + Erratum( + id=row[0], + spec_name=row[1], + task_group=row[2], + finding_summary=row[3], + requirement_ref=row[4], + fix_summary=row[5], + created_at=row[6], + ) + for row in rows + ] + + +def format_errata_for_prompt(errata: list[Erratum]) -> list[str]: + """Format errata as ``[ERRATA]``-prefixed strings for prompt injection. + + Returns one string per erratum, suitable for inclusion in coder + session context alongside ``[REVIEW]`` findings. + """ + result: list[str] = [] + for e in errata: + parts = [f"[ERRATA] {e.finding_summary}"] + if e.requirement_ref: + parts.append(f"(ref: {e.requirement_ref})") + if e.fix_summary: + parts.append(f"Fix: {e.fix_summary}") + result.append(" ".join(parts)) + return result + + +def persist_erratum_markdown( + errata: list[Erratum], + project_root: Path, +) -> Path | None: + """Write errata to a markdown file in ``docs/errata/``. + + Groups errata by spec_name and writes a single file per + invocation. Returns the path of the written file, or None if + no errata were provided or the write failed. + + The filename uses the spec_name to match the existing convention + in ``docs/errata/``. + """ + if not errata: + return None + + spec_name = errata[0].spec_name + errata_dir = project_root / "docs" / "errata" + + try: + errata_dir.mkdir(parents=True, exist_ok=True) + except OSError: + logger.warning("Failed to create errata directory: %s", errata_dir) + return None + + filename = f"{spec_name}_auto_errata.md" + filepath = errata_dir / filename + + lines = [ + f"# Errata: {spec_name} (auto-generated)", + "", + f"**Spec:** {spec_name}", + f"**Date:** {datetime.now(UTC).strftime('%Y-%m-%d')}", + "**Status:** Active", + "**Source:** Auto-generated from reviewer blocking findings", + "", + "## Findings", + "", + ] + + for i, e in enumerate(errata, 1): + lines.append(f"### Finding {i}") + lines.append("") + lines.append(f"**Summary:** {e.finding_summary}") + if e.requirement_ref: + lines.append(f"**Requirement:** {e.requirement_ref}") + lines.append(f"**Task Group:** {e.task_group}") + if e.fix_summary: + lines.append(f"**Fix:** {e.fix_summary}") + lines.append("") + + try: + filepath.write_text("\n".join(lines), encoding="utf-8") + except OSError: + logger.warning("Failed to write erratum markdown: %s", filepath) + return None + + logger.info("Wrote erratum markdown to %s", filepath) + return filepath diff --git a/agent_fox/knowledge/fox_provider.py b/agent_fox/knowledge/fox_provider.py index bf7d78fc..d3eea458 100644 --- a/agent_fox/knowledge/fox_provider.py +++ b/agent_fox/knowledge/fox_provider.py @@ -48,8 +48,8 @@ def retrieve( ) -> list[str]: """Retrieve knowledge context for an upcoming session. - Queries active critical/major review findings for the given spec - and returns them as ``[REVIEW]``-prefixed strings, capped at + Queries active critical/major review findings and errata for the + given spec and returns them as prefixed strings, capped at ``max_items``. Args: @@ -69,14 +69,18 @@ def retrieve( raise reviews = self._query_reviews(conn, spec_name) + errata = self._query_errata(conn, spec_name) + + combined = reviews + errata logger.debug( - "Retrieved %d review items for %s", + "Retrieved %d review + %d errata items for %s", len(reviews), + len(errata), spec_name, ) - return reviews[: self._config.max_items] + return combined[: self._config.max_items] def ingest( self, @@ -133,3 +137,25 @@ def _query_reviews( parts.append(f.description) result.append(f"[REVIEW] {' '.join(parts)}") return result + + def _query_errata( + self, + conn: Any, + spec_name: str, + ) -> list[str]: + """Query errata for the spec and format as prompt-ready strings. + + Handles missing ``errata`` table gracefully by returning an + empty list. + """ + try: + from agent_fox.knowledge.errata import format_errata_for_prompt, query_errata + + errata = query_errata(conn, spec_name) + return format_errata_for_prompt(errata) + except Exception: + logger.debug( + "Could not query errata for %s", + spec_name, + ) + return [] diff --git a/agent_fox/knowledge/migrations.py b/agent_fox/knowledge/migrations.py index 135ee245..b472bf6a 100644 --- a/agent_fox/knowledge/migrations.py +++ b/agent_fox/knowledge/migrations.py @@ -714,6 +714,28 @@ def _migrate_v18(conn: duckdb.DuckDBPyConnection) -> None: """) +def _migrate_v19(conn: duckdb.DuckDBPyConnection) -> None: + """Add errata table for lightweight errata generation from blocking findings. + + Stores errata auto-generated when reviewer blocking occurs: the finding + summary, optional requirement reference, and optional fix summary. + Scoped by spec_name for retrieval during future coder sessions. + + Uses CREATE TABLE IF NOT EXISTS for idempotency. + """ + conn.execute(""" + CREATE TABLE IF NOT EXISTS errata ( + id VARCHAR PRIMARY KEY, + spec_name VARCHAR NOT NULL, + task_group VARCHAR NOT NULL, + finding_summary TEXT NOT NULL, + requirement_ref VARCHAR, + fix_summary TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """) + + # Registry of all migrations, ordered by version. MIGRATIONS: list[Migration] = [ Migration( @@ -801,6 +823,11 @@ def _migrate_v18(conn: duckdb.DuckDBPyConnection) -> None: description="drop unused knowledge tables", apply=_migrate_v18, ), + Migration( + version=19, + description="add errata table for lightweight errata generation", + apply=_migrate_v19, + ), ] diff --git a/docs/memory.md b/docs/memory.md index f047f66f..b18389a2 100644 --- a/docs/memory.md +++ b/docs/memory.md @@ -2,6 +2,13 @@ _3175 facts | last updated: 2026-04-23_ +**2026-04-23 errata generation (issue #522):** Added lightweight errata +auto-generation from reviewer blocking. When a reviewer blocks a coder task +with critical/major findings, errata are stored in DuckDB (migration v19: +`errata` table) and written to `docs/errata/`. FoxKnowledgeProvider.retrieve() +now includes `[ERRATA]`-prefixed strings in coder context alongside `[REVIEW]` +findings. +1 new module (knowledge/errata.py), +27 tests. All 4319 tests pass. + **2026-04-23 strategy extraction (issue #518):** Extracted collaborator classes from the three largest files: blocking logic from result_handler.py to engine/blocking.py, dispatch strategies from engine.py to engine/dispatch.py diff --git a/tests/property/knowledge/test_db_props.py b/tests/property/knowledge/test_db_props.py index 91551fef..64be9716 100644 --- a/tests/property/knowledge/test_db_props.py +++ b/tests/property/knowledge/test_db_props.py @@ -31,6 +31,8 @@ "plan_edges", "plan_meta", "runs", + # Added by migration v19 (issue #522: errata generation) + "errata", } @@ -65,7 +67,7 @@ def test_n_open_close_cycles_produce_same_state(self, n: int, tmp_path_factory: version_count = db.connection.execute("SELECT COUNT(*) FROM schema_version").fetchone() assert version_count is not None - assert version_count[0] == 18 + assert version_count[0] == 19 tables = {r[0] for r in db.connection.execute("SHOW TABLES").fetchall()} assert tables == EXPECTED_TABLES diff --git a/tests/property/knowledge/test_review_store_props.py b/tests/property/knowledge/test_review_store_props.py index 13b4af4d..db8eab41 100644 --- a/tests/property/knowledge/test_review_store_props.py +++ b/tests/property/knowledge/test_review_store_props.py @@ -115,7 +115,7 @@ def test_migration_idempotency(self, n_runs: int) -> None: # Version should be 18 (latest migration: v18 drop unused knowledge tables) version = conn.execute("SELECT MAX(version) FROM schema_version").fetchone() assert version is not None - assert version[0] == 18 + assert version[0] == 19 # Tables should exist (v2 + v4 migrations; v3 tables dropped by v14) tables = conn.execute( diff --git a/tests/test_knowledge_pruning.py b/tests/test_knowledge_pruning.py index 86f710a9..d55843ce 100644 --- a/tests/test_knowledge_pruning.py +++ b/tests/test_knowledge_pruning.py @@ -475,7 +475,7 @@ def test_migration_v18_fresh_db(self) -> None: run_migrations(conn) version = conn.execute("SELECT MAX(version) FROM schema_version").fetchone()[0] - assert version == 18 + assert version == 19 conn.close() diff --git a/tests/unit/engine/test_errata_on_blocking.py b/tests/unit/engine/test_errata_on_blocking.py new file mode 100644 index 00000000..9881522c --- /dev/null +++ b/tests/unit/engine/test_errata_on_blocking.py @@ -0,0 +1,246 @@ +"""Tests for errata generation when reviewer blocking occurs. + +Verifies that SessionResultHandler._generate_errata is called when +check_skeptic_blocking returns True, and that errata are stored in +DuckDB and written to markdown. +""" + +from __future__ import annotations + +import uuid +from dataclasses import dataclass +from typing import Any +from unittest.mock import MagicMock, patch + +import duckdb +import pytest + +from agent_fox.engine.result_handler import SessionResultHandler +from agent_fox.engine.state import ExecutionState, SessionRecord + + +@dataclass(frozen=True) +class FakeBlockDecision: + should_block: bool + coder_node_id: str = "" + reason: str = "" + + +def _make_state() -> ExecutionState: + return ExecutionState(plan_hash="test-hash", node_states={}) + + +def _make_handler(*, knowledge_db_conn: Any = None) -> SessionResultHandler: + """Create a minimal SessionResultHandler for testing.""" + graph_sync = MagicMock() + graph_sync.node_states = {} + graph_sync.predecessors.return_value = [] + + return SessionResultHandler( + graph_sync=graph_sync, + routing_ladders={}, + retries_before_escalation=2, + max_retries=3, + task_callback=None, + sink=MagicMock(), + run_id="test-run-1", + graph=None, + archetypes_config=None, + knowledge_db_conn=knowledge_db_conn, + block_task_fn=MagicMock(), + check_block_budget_fn=MagicMock(), + ) + + +def _make_record( + node_id: str = "spec_42:1:reviewer:pre-review", + status: str = "completed", + archetype: str = "reviewer", + attempt: int = 1, +) -> SessionRecord: + return SessionRecord( + node_id=node_id, + attempt=attempt, + status=status, + input_tokens=100, + output_tokens=200, + cost=0.1, + duration_ms=5000, + error_message=None, + timestamp="2026-04-23T12:00:00", + archetype=archetype, + ) + + +def _setup_errata_db() -> duckdb.DuckDBPyConnection: + """Create in-memory DB with review_findings and errata tables.""" + from agent_fox.knowledge.migrations import run_migrations + + conn = duckdb.connect(":memory:") + run_migrations(conn) + return conn + + +class TestGenerateErrataOnBlocking: + def test_errata_generated_when_blocking_occurs(self) -> None: + conn = _setup_errata_db() + + finding_id = str(uuid.uuid4()) + conn.execute( + "INSERT INTO review_findings " + "(id, severity, description, requirement_ref, spec_name, task_group, session_id) " + "VALUES (?, 'critical', 'Auth bypass', 'REQ-1.1', 'spec_42', '1', 'spec_42:1:reviewer:pre-review:1')", + [finding_id], + ) + + handler = _make_handler(knowledge_db_conn=conn) + + record = _make_record() + with ( + patch( + "agent_fox.engine.result_handler.evaluate_review_blocking", + return_value=FakeBlockDecision( + should_block=True, + coder_node_id="spec_42:1", + reason="blocking", + ), + ), + patch("agent_fox.knowledge.errata.persist_erratum_markdown", return_value=None), + ): + state = _make_state() + handler.check_skeptic_blocking(record, state) + + rows = conn.execute("SELECT COUNT(*) FROM errata WHERE spec_name = 'spec_42'").fetchone() + assert rows is not None and rows[0] >= 1 + + conn.close() + + def test_no_errata_when_not_blocking(self) -> None: + conn = _setup_errata_db() + + handler = _make_handler(knowledge_db_conn=conn) + record = _make_record() + + with patch( + "agent_fox.engine.result_handler.evaluate_review_blocking", + return_value=FakeBlockDecision(should_block=False), + ): + state = _make_state() + result = handler.check_skeptic_blocking(record, state) + + assert result is False + rows = conn.execute("SELECT COUNT(*) FROM errata").fetchone() + assert rows is not None and rows[0] == 0 + + conn.close() + + def test_errata_generation_failure_does_not_propagate(self) -> None: + handler = _make_handler(knowledge_db_conn=None) + record = _make_record() + + with patch( + "agent_fox.engine.result_handler.evaluate_review_blocking", + return_value=FakeBlockDecision( + should_block=True, + coder_node_id="spec_42:1", + reason="blocking", + ), + ): + state = _make_state() + result = handler.check_skeptic_blocking(record, state) + + assert result is True + + def test_errata_only_for_critical_major(self) -> None: + conn = _setup_errata_db() + + f1_id = str(uuid.uuid4()) + f2_id = str(uuid.uuid4()) + conn.execute( + "INSERT INTO review_findings " + "(id, severity, description, spec_name, task_group, session_id) " + "VALUES (?, 'critical', 'Problem', 'spec_42', '1', 'spec_42:1:reviewer:pre-review:1'), " + " (?, 'minor', 'Style', 'spec_42', '1', 'spec_42:1:reviewer:pre-review:1')", + [f1_id, f2_id], + ) + + handler = _make_handler(knowledge_db_conn=conn) + record = _make_record() + + with ( + patch( + "agent_fox.engine.result_handler.evaluate_review_blocking", + return_value=FakeBlockDecision( + should_block=True, + coder_node_id="spec_42:1", + reason="blocking", + ), + ), + patch("agent_fox.knowledge.errata.persist_erratum_markdown", return_value=None), + ): + state = _make_state() + handler.check_skeptic_blocking(record, state) + + rows = conn.execute("SELECT COUNT(*) FROM errata WHERE spec_name = 'spec_42'").fetchone() + assert rows is not None and rows[0] == 1 + + row = conn.execute("SELECT finding_summary FROM errata WHERE spec_name = 'spec_42'").fetchone() + assert row is not None + assert "[critical]" in row[0] + + conn.close() + + +class TestErrataInFoxProvider: + def test_retrieve_includes_errata(self) -> None: + conn = _setup_errata_db() + + conn.execute( + "INSERT INTO errata (id, spec_name, task_group, finding_summary) " + "VALUES ('e1-uuid-placeholder', 'spec_42', '1', '[critical] Auth bypass')" + ) + + from agent_fox.core.config import KnowledgeProviderConfig + from agent_fox.knowledge.fox_provider import FoxKnowledgeProvider + + knowledge_db = MagicMock() + knowledge_db.connection = conn + config = KnowledgeProviderConfig() + + provider = FoxKnowledgeProvider(knowledge_db, config) + result = provider.retrieve("spec_42", "task description") + + assert any("[ERRATA]" in item for item in result) + assert any("Auth bypass" in item for item in result) + + conn.close() + + def test_retrieve_handles_missing_errata_table(self) -> None: + conn = duckdb.connect(":memory:") + conn.execute(""" + CREATE TABLE review_findings ( + id UUID PRIMARY KEY, + severity TEXT NOT NULL, + description TEXT NOT NULL, + requirement_ref TEXT, + spec_name TEXT NOT NULL, + task_group TEXT NOT NULL, + session_id TEXT NOT NULL, + superseded_by TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + category TEXT + ) + """) + + from agent_fox.core.config import KnowledgeProviderConfig + from agent_fox.knowledge.fox_provider import FoxKnowledgeProvider + + knowledge_db = MagicMock() + knowledge_db.connection = conn + config = KnowledgeProviderConfig() + + provider = FoxKnowledgeProvider(knowledge_db, config) + result = provider.retrieve("spec_42", "task description") + assert isinstance(result, list) + + conn.close() diff --git a/tests/unit/knowledge/test_db.py b/tests/unit/knowledge/test_db.py index 86be171e..401a6d26 100644 --- a/tests/unit/knowledge/test_db.py +++ b/tests/unit/knowledge/test_db.py @@ -34,6 +34,8 @@ "plan_edges", "plan_meta", "runs", + # Added by migration v19 (issue #522: errata generation) + "errata", } @@ -73,11 +75,11 @@ def test_version_1_recorded(self, knowledge_config: KnowledgeConfig) -> None: rows = db.connection.execute( "SELECT version, applied_at, description FROM schema_version ORDER BY version" ).fetchall() - assert len(rows) == 18 + assert len(rows) == 19 assert rows[0][0] == 1 assert rows[0][1] is not None # applied_at is a valid timestamp assert len(rows[0][2]) > 0 # description is non-empty - for i, expected_version in enumerate(range(1, 19)): + for i, expected_version in enumerate(range(1, 20)): assert rows[i][0] == expected_version db.close() @@ -134,7 +136,7 @@ def test_double_open_does_not_duplicate(self, knowledge_config: KnowledgeConfig) db2.open() count = db2.connection.execute("SELECT COUNT(*) FROM schema_version").fetchone() assert count is not None - assert count[0] == 18 + assert count[0] == 19 db2.close() diff --git a/tests/unit/knowledge/test_errata.py b/tests/unit/knowledge/test_errata.py new file mode 100644 index 00000000..3bfc0223 --- /dev/null +++ b/tests/unit/knowledge/test_errata.py @@ -0,0 +1,290 @@ +"""Tests for agent_fox.knowledge.errata — errata generation, storage, retrieval.""" + +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import duckdb +import pytest + +from agent_fox.knowledge.errata import ( + Erratum, + format_errata_for_prompt, + generate_errata_from_findings, + persist_erratum_markdown, + query_errata, + store_errata, +) + + +# -- Helpers / stubs ----------------------------------------------------------- + + +@dataclass(frozen=True) +class FakeFinding: + """Minimal stub matching ReviewFinding's relevant attrs.""" + + severity: str + description: str + requirement_ref: str | None = None + + +def _create_errata_table(conn: duckdb.DuckDBPyConnection) -> None: + """Create the errata table for tests.""" + conn.execute(""" + CREATE TABLE IF NOT EXISTS errata ( + id VARCHAR PRIMARY KEY, + spec_name VARCHAR NOT NULL, + task_group VARCHAR NOT NULL, + finding_summary TEXT NOT NULL, + requirement_ref VARCHAR, + fix_summary TEXT, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP + ) + """) + + +@pytest.fixture +def errata_conn() -> duckdb.DuckDBPyConnection: + """In-memory DuckDB with the errata table.""" + conn = duckdb.connect(":memory:") + _create_errata_table(conn) + yield conn # type: ignore[misc] + conn.close() + + +# -- generate_errata_from_findings --------------------------------------------- + + +class TestGenerateErrataFromFindings: + def test_critical_findings_produce_errata(self) -> None: + findings = [ + FakeFinding("critical", "Auth bypass in login flow", "REQ-1.1"), + FakeFinding("major", "Missing input validation", "REQ-2.3"), + ] + errata = generate_errata_from_findings(findings, "spec_42", "1") + + assert len(errata) == 2 + assert all(isinstance(e, Erratum) for e in errata) + assert "[critical]" in errata[0].finding_summary + assert "[major]" in errata[1].finding_summary + assert errata[0].requirement_ref == "REQ-1.1" + assert errata[0].spec_name == "spec_42" + assert errata[0].task_group == "1" + + def test_minor_and_observation_excluded(self) -> None: + findings = [ + FakeFinding("minor", "Style issue"), + FakeFinding("observation", "Could be better"), + FakeFinding("critical", "Real problem"), + ] + errata = generate_errata_from_findings(findings, "spec_1", "2") + + assert len(errata) == 1 + assert "[critical]" in errata[0].finding_summary + + def test_empty_findings_produce_empty_errata(self) -> None: + errata = generate_errata_from_findings([], "spec_1", "1") + assert errata == [] + + def test_all_minor_produce_empty_errata(self) -> None: + findings = [FakeFinding("minor", "Trivial")] + errata = generate_errata_from_findings(findings, "spec_1", "1") + assert errata == [] + + def test_errata_have_unique_ids(self) -> None: + findings = [ + FakeFinding("critical", "First"), + FakeFinding("critical", "Second"), + ] + errata = generate_errata_from_findings(findings, "spec_1", "1") + ids = [e.id for e in errata] + assert len(set(ids)) == 2 + + +# -- store_errata -------------------------------------------------------------- + + +class TestStoreErrata: + def test_stores_errata_and_returns_count(self, errata_conn: duckdb.DuckDBPyConnection) -> None: + errata = [ + Erratum(id="e1", spec_name="spec_1", task_group="1", finding_summary="[critical] Problem"), + Erratum(id="e2", spec_name="spec_1", task_group="1", finding_summary="[major] Issue"), + ] + count = store_errata(errata_conn, errata) + + assert count == 2 + rows = errata_conn.execute("SELECT COUNT(*) FROM errata").fetchone() + assert rows is not None and rows[0] == 2 + + def test_empty_list_returns_zero(self, errata_conn: duckdb.DuckDBPyConnection) -> None: + count = store_errata(errata_conn, []) + assert count == 0 + + def test_missing_table_returns_zero(self) -> None: + conn = duckdb.connect(":memory:") + errata = [Erratum(id="e1", spec_name="s", task_group="1", finding_summary="x")] + count = store_errata(conn, errata) + assert count == 0 + conn.close() + + +# -- query_errata -------------------------------------------------------------- + + +class TestQueryErrata: + def test_queries_by_spec_name(self, errata_conn: duckdb.DuckDBPyConnection) -> None: + errata_conn.execute( + "INSERT INTO errata (id, spec_name, task_group, finding_summary) " + "VALUES ('e1', 'spec_A', '1', 'Finding A'), " + " ('e2', 'spec_B', '1', 'Finding B'), " + " ('e3', 'spec_A', '2', 'Finding A2')" + ) + results = query_errata(errata_conn, "spec_A") + + assert len(results) == 2 + assert all(e.spec_name == "spec_A" for e in results) + + def test_respects_limit(self, errata_conn: duckdb.DuckDBPyConnection) -> None: + for i in range(5): + errata_conn.execute( + "INSERT INTO errata (id, spec_name, task_group, finding_summary) " + "VALUES (?, 'spec_X', '1', ?)", + [f"e{i}", f"Finding {i}"], + ) + results = query_errata(errata_conn, "spec_X", limit=3) + assert len(results) == 3 + + def test_missing_table_returns_empty(self) -> None: + conn = duckdb.connect(":memory:") + results = query_errata(conn, "spec_1") + assert results == [] + conn.close() + + def test_no_matching_spec_returns_empty(self, errata_conn: duckdb.DuckDBPyConnection) -> None: + errata_conn.execute( + "INSERT INTO errata (id, spec_name, task_group, finding_summary) " + "VALUES ('e1', 'other_spec', '1', 'Finding')" + ) + results = query_errata(errata_conn, "spec_1") + assert results == [] + + +# -- format_errata_for_prompt -------------------------------------------------- + + +class TestFormatErrataForPrompt: + def test_formats_basic_errata(self) -> None: + errata = [Erratum(id="e1", spec_name="s", task_group="1", finding_summary="[critical] Bad thing")] + result = format_errata_for_prompt(errata) + + assert len(result) == 1 + assert result[0] == "[ERRATA] [critical] Bad thing" + + def test_includes_requirement_ref(self) -> None: + errata = [ + Erratum( + id="e1", + spec_name="s", + task_group="1", + finding_summary="[critical] Bad thing", + requirement_ref="REQ-1.1", + ) + ] + result = format_errata_for_prompt(errata) + + assert "(ref: REQ-1.1)" in result[0] + + def test_includes_fix_summary(self) -> None: + errata = [ + Erratum( + id="e1", + spec_name="s", + task_group="1", + finding_summary="[critical] Bad thing", + fix_summary="Added validation", + ) + ] + result = format_errata_for_prompt(errata) + + assert "Fix: Added validation" in result[0] + + def test_empty_errata_returns_empty(self) -> None: + assert format_errata_for_prompt([]) == [] + + +# -- persist_erratum_markdown -------------------------------------------------- + + +class TestPersistErratumMarkdown: + def test_writes_markdown_file(self, tmp_path: Path) -> None: + errata = [ + Erratum( + id="e1", + spec_name="42_auth", + task_group="3", + finding_summary="[critical] Auth bypass", + requirement_ref="REQ-1.1", + ), + ] + path = persist_erratum_markdown(errata, tmp_path) + + assert path is not None + assert path.exists() + content = path.read_text() + assert "42_auth" in content + assert "[critical] Auth bypass" in content + assert "REQ-1.1" in content + + def test_empty_errata_returns_none(self, tmp_path: Path) -> None: + result = persist_erratum_markdown([], tmp_path) + assert result is None + + def test_creates_errata_directory(self, tmp_path: Path) -> None: + errata = [Erratum(id="e1", spec_name="s", task_group="1", finding_summary="x")] + path = persist_erratum_markdown(errata, tmp_path) + + assert path is not None + assert (tmp_path / "docs" / "errata").is_dir() + + +# -- Migration v19 ------------------------------------------------------------ + + +class TestMigrationV19: + def test_migration_creates_errata_table(self) -> None: + from agent_fox.knowledge.migrations import run_migrations + + conn = duckdb.connect(":memory:") + run_migrations(conn) + + tables = { + r[0] + for r in conn.execute( + "SELECT table_name FROM information_schema.tables WHERE table_schema = 'main'" + ).fetchall() + } + assert "errata" in tables + + # Verify columns + cols = { + r[0] + for r in conn.execute( + "SELECT column_name FROM information_schema.columns WHERE table_name = 'errata'" + ).fetchall() + } + assert {"id", "spec_name", "task_group", "finding_summary", "requirement_ref", "fix_summary", "created_at"} == cols + + conn.close() + + def test_migration_is_idempotent(self) -> None: + from agent_fox.knowledge.migrations import run_migrations + + conn = duckdb.connect(":memory:") + run_migrations(conn) + run_migrations(conn) + + rows = conn.execute("SELECT COUNT(*) FROM schema_version WHERE version = 19").fetchone() + assert rows is not None and rows[0] == 1 + conn.close() diff --git a/tests/unit/knowledge/test_fox_provider.py b/tests/unit/knowledge/test_fox_provider.py index cd97d9a8..57d28ecc 100644 --- a/tests/unit/knowledge/test_fox_provider.py +++ b/tests/unit/knowledge/test_fox_provider.py @@ -446,6 +446,7 @@ def test_engine_import_boundary(self) -> None: "agent_trace", "migrations", "fox_provider", + "errata", } # knowledge_harvest.py is the knowledge-engine integration pipeline diff --git a/tests/unit/knowledge/test_review_store.py b/tests/unit/knowledge/test_review_store.py index 4b280863..203de2f6 100644 --- a/tests/unit/knowledge/test_review_store.py +++ b/tests/unit/knowledge/test_review_store.py @@ -234,7 +234,7 @@ def test_migration_already_applied_skips(self) -> None: # Verify version is recorded version = conn.execute("SELECT MAX(version) FROM schema_version").fetchone() assert version is not None - assert version[0] == 18 + assert version[0] == 19 conn.close()