diff --git a/src/hexgraph/agent/mcp_tools.py b/src/hexgraph/agent/mcp_tools.py index f602f62..dd0ee40 100644 --- a/src/hexgraph/agent/mcp_tools.py +++ b/src/hexgraph/agent/mcp_tools.py @@ -17,7 +17,7 @@ from hexgraph.db.models import Finding, Node, Project, Target from hexgraph.db.session import session_scope -from hexgraph.engine.findings.findings import is_verified +from hexgraph.engine.findings.findings import coerce_evidence, is_verified from hexgraph.models.finding import Finding as FModel @@ -1054,7 +1054,7 @@ def list_findings(project_id: str, limit: int = 100, offset: int = 0, limit, offset = 100, 0 def _row(f): - ev = f.evidence_json or {} + ev = coerce_evidence(f.evidence_json) extra = ev.get("extra") or {} row = {"id": f.id, "title": f.title, "severity": f.severity, "category": f.category, "status": f.status, "finding_type": f.finding_type, "cwe": f.cwe, @@ -1122,7 +1122,7 @@ def get_finding(finding_id: str) -> dict: f = s.get(Finding, finding_id) if f is None: return {"error": "finding not found"} - ev = f.evidence_json or {} + ev = coerce_evidence(f.evidence_json) verified = is_verified(ev) return {"id": f.id, "title": f.title, "severity": f.severity, "confidence": f.confidence, "category": f.category, "status": f.status, "finding_type": f.finding_type, diff --git a/src/hexgraph/engine/findings/findings.py b/src/hexgraph/engine/findings/findings.py index 60d0a36..87dac3b 100644 --- a/src/hexgraph/engine/findings/findings.py +++ b/src/hexgraph/engine/findings/findings.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import re from sqlalchemy.orm import Session @@ -25,11 +26,32 @@ FINDING_TYPES = ("vulnerability", "recon", "harness", "fuzz_crash", "poc", "annotation", "other") -def is_verified(evidence: dict | None) -> bool: +def coerce_evidence(evidence: object) -> dict: + """Normalize a finding's stored evidence to a dict for reading. + + `Finding.evidence_json` is a JSON column, so a well-formed row deserializes to a + dict. But a legacy/hand-edited/double-encoded row can come back as a JSON-encoded + *string* (or some other scalar), and the many `(evidence or {}).get(...)` read sites + would then raise `AttributeError: 'str' object has no attribute 'get'` — one bad row + 500ing an entire findings listing. Coerce defensively: pass dicts through, best-effort + parse a JSON string when it decodes to an object, and treat anything else as empty.""" + if isinstance(evidence, dict): + return evidence + if isinstance(evidence, str): + try: + parsed = json.loads(evidence) + except (ValueError, TypeError): + return {} + return parsed if isinstance(parsed, dict) else {} + return {} + + +def is_verified(evidence: dict | str | None) -> bool: """True if a PoC verification was attached to this finding's evidence and it passed (evidence.extra.verification.verified). The single source for the `verified` flag surfaced by the API and MCP read tools.""" - return bool((((evidence or {}).get("extra") or {}).get("verification") or {}).get("verified")) + ev = coerce_evidence(evidence) + return bool(((ev.get("extra") or {}).get("verification") or {}).get("verified")) def classify_finding(task_type: str | None, category: str | None) -> str: @@ -146,7 +168,7 @@ def row_to_payload(row: FindingRow) -> dict: "category": row.category, "summary": row.summary, "reasoning": row.reasoning, - "evidence": row.evidence_json or {}, + "evidence": coerce_evidence(row.evidence_json), # always an object, even if a row stored a string } if row.suggested_followups_json: payload["suggested_followups"] = row.suggested_followups_json diff --git a/tests/test_evidence_coerce.py b/tests/test_evidence_coerce.py new file mode 100644 index 0000000..e211502 --- /dev/null +++ b/tests/test_evidence_coerce.py @@ -0,0 +1,40 @@ +"""`coerce_evidence` / `is_verified` must tolerate a non-dict `evidence_json`. + +`Finding.evidence_json` is a JSON column, so a healthy row reads back as a dict. But a +legacy/hand-edited/double-encoded row can deserialize to a *string* (or other scalar), and +the many `(evidence or {}).get(...)` read sites would then raise `AttributeError: 'str' +object has no attribute 'get'` — one bad row 500ing a whole findings listing. These guard +that the read path coerces defensively (and recovers a double-encoded dict where it can). +""" + +import json + +from hexgraph.engine.findings.findings import coerce_evidence, is_verified + +_VERIFIED = {"extra": {"verification": {"verified": True}}} + + +def test_coerce_passes_dicts_through_unchanged(): + ev = {"extra": {"assurance": {"standard": "static"}}} + assert coerce_evidence(ev) is ev + + +def test_coerce_recovers_double_encoded_dict(): + # A JSON column handed a string stores it double-encoded; it reads back as that string. + assert coerce_evidence(json.dumps(_VERIFIED)) == _VERIFIED + + +def test_coerce_degrades_non_object_to_empty_dict(): + assert coerce_evidence("not json at all") == {} # unparseable string + assert coerce_evidence("[1, 2, 3]") == {} # valid JSON but not an object + assert coerce_evidence(None) == {} + assert coerce_evidence(42) == {} + + +def test_is_verified_handles_string_evidence_without_raising(): + # The exact crash: a string evidence used to blow up the project endpoint. + assert is_verified("not json at all") is False + assert is_verified(json.dumps(_VERIFIED)) is True + assert is_verified(json.dumps({"extra": {"verification": {"verified": False}}})) is False + assert is_verified(None) is False + assert is_verified(_VERIFIED) is True diff --git a/tests/test_hidden_targets.py b/tests/test_hidden_targets.py index 439ee8b..b4e0445 100644 --- a/tests/test_hidden_targets.py +++ b/tests/test_hidden_targets.py @@ -10,6 +10,8 @@ exercised directly so it runs even without it. """ +import json + from fastapi.testclient import TestClient from hexgraph.api.app import create_app @@ -173,6 +175,40 @@ def test_project_payload_splits_hidden_target_findings(hg_home): assert full["hidden_findings"] == [] and full["hidden_targets"] == [] +def test_project_payload_tolerates_string_evidence_json(hg_home): + """Regression: a finding whose `evidence_json` reads back as a *string* (a legacy or + hand-edited double-encoded row) must NOT 500 the project endpoint. The read path coerces + it to an object — recovering a double-encoded dict where possible — so one malformed + finding can't take down the whole listing (the `is_verified` 'str has no .get' crash).""" + with session_scope() as s: + p = create_project(s, name="strev") + fw = ingest_file(s, p, fixture_path("synthetic_fw.bin"), name="fw") + fw.kind = TargetKind.firmware_image + child = ingest_file(s, p, fixture_path("vuln_httpd"), name="lib/x.so", parent=fw, visible=False) + s.flush() + # Assigning a str to a JSON column double-encodes it, so it reads back as a str. + # A double-encoded dict is recoverable; a non-JSON string degrades to {}. + s.add(Finding(project_id=p.id, target_id=child.id, task_id="t", title="dbl-encoded", + severity="high", confidence="high", category="auth", summary="s", reasoning="r", + evidence_json=json.dumps({"extra": {"verification": {"verified": True}}}), + finding_type="poc", status="new")) + s.add(Finding(project_id=p.id, target_id=fw.id, task_id="t", title="garbage-ev", + severity="low", confidence="low", category="auth", summary="s", reasoning="r", + evidence_json="not json at all", finding_type="vulnerability", status="new")) + pid = p.id + + resp = TestClient(create_app()).get(f"/api/projects/{pid}") + assert resp.status_code == 200 # was 500 before the fix + body = resp.json() + # Visible finding with garbage evidence: coerced to an object, not verified. + g = next(f for f in body["findings"] if f["title"] == "garbage-ev") + assert g["evidence"] == {} and g["verified"] is False + # Hidden double-encoded finding: parsed back, its verification recovered. + d = next(f for f in body["hidden_findings"] if f["title"] == "dbl-encoded") + assert d["verified"] is True + assert d["evidence"]["extra"]["verification"]["verified"] is True + + def test_project_payload_excludes_archived_target_findings(hg_home): """A finding on an ARCHIVED (soft-removed) target appears in NEITHER bucket — archive is a deliberate removal, distinct from a merely-hidden child."""