Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/hexgraph/agent/mcp_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from hexgraph.db.models import Finding, Node, Project, Target
from hexgraph.db.session import session_scope
from hexgraph.engine.findings.findings import is_verified
from hexgraph.engine.findings.findings import coerce_evidence, is_verified
from hexgraph.models.finding import Finding as FModel


Expand Down Expand Up @@ -1054,7 +1054,7 @@ def list_findings(project_id: str, limit: int = 100, offset: int = 0,
limit, offset = 100, 0

def _row(f):
ev = f.evidence_json or {}
ev = coerce_evidence(f.evidence_json)
extra = ev.get("extra") or {}
row = {"id": f.id, "title": f.title, "severity": f.severity, "category": f.category,
"status": f.status, "finding_type": f.finding_type, "cwe": f.cwe,
Expand Down Expand Up @@ -1122,7 +1122,7 @@ def get_finding(finding_id: str) -> dict:
f = s.get(Finding, finding_id)
if f is None:
return {"error": "finding not found"}
ev = f.evidence_json or {}
ev = coerce_evidence(f.evidence_json)
verified = is_verified(ev)
return {"id": f.id, "title": f.title, "severity": f.severity, "confidence": f.confidence,
"category": f.category, "status": f.status, "finding_type": f.finding_type,
Expand Down
28 changes: 25 additions & 3 deletions src/hexgraph/engine/findings/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import json
import re

from sqlalchemy.orm import Session
Expand All @@ -25,11 +26,32 @@
FINDING_TYPES = ("vulnerability", "recon", "harness", "fuzz_crash", "poc", "annotation", "other")


def is_verified(evidence: dict | None) -> bool:
def coerce_evidence(evidence: object) -> dict:
"""Normalize a finding's stored evidence to a dict for reading.

`Finding.evidence_json` is a JSON column, so a well-formed row deserializes to a
dict. But a legacy/hand-edited/double-encoded row can come back as a JSON-encoded
*string* (or some other scalar), and the many `(evidence or {}).get(...)` read sites
would then raise `AttributeError: 'str' object has no attribute 'get'` — one bad row
500ing an entire findings listing. Coerce defensively: pass dicts through, best-effort
parse a JSON string when it decodes to an object, and treat anything else as empty."""
if isinstance(evidence, dict):
return evidence
if isinstance(evidence, str):
try:
parsed = json.loads(evidence)
except (ValueError, TypeError):
return {}
return parsed if isinstance(parsed, dict) else {}
return {}
Comment thread
branover marked this conversation as resolved.


def is_verified(evidence: dict | str | None) -> bool:
"""True if a PoC verification was attached to this finding's evidence and it
passed (evidence.extra.verification.verified). The single source for the
`verified` flag surfaced by the API and MCP read tools."""
return bool((((evidence or {}).get("extra") or {}).get("verification") or {}).get("verified"))
ev = coerce_evidence(evidence)
return bool(((ev.get("extra") or {}).get("verification") or {}).get("verified"))


def classify_finding(task_type: str | None, category: str | None) -> str:
Expand Down Expand Up @@ -146,7 +168,7 @@ def row_to_payload(row: FindingRow) -> dict:
"category": row.category,
"summary": row.summary,
"reasoning": row.reasoning,
"evidence": row.evidence_json or {},
"evidence": coerce_evidence(row.evidence_json), # always an object, even if a row stored a string
}
if row.suggested_followups_json:
payload["suggested_followups"] = row.suggested_followups_json
Expand Down
40 changes: 40 additions & 0 deletions tests/test_evidence_coerce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""`coerce_evidence` / `is_verified` must tolerate a non-dict `evidence_json`.

`Finding.evidence_json` is a JSON column, so a healthy row reads back as a dict. But a
legacy/hand-edited/double-encoded row can deserialize to a *string* (or other scalar), and
the many `(evidence or {}).get(...)` read sites would then raise `AttributeError: 'str'
object has no attribute 'get'` — one bad row 500ing a whole findings listing. These guard
that the read path coerces defensively (and recovers a double-encoded dict where it can).
"""

import json

from hexgraph.engine.findings.findings import coerce_evidence, is_verified

_VERIFIED = {"extra": {"verification": {"verified": True}}}


def test_coerce_passes_dicts_through_unchanged():
ev = {"extra": {"assurance": {"standard": "static"}}}
assert coerce_evidence(ev) is ev


def test_coerce_recovers_double_encoded_dict():
# A JSON column handed a string stores it double-encoded; it reads back as that string.
assert coerce_evidence(json.dumps(_VERIFIED)) == _VERIFIED


def test_coerce_degrades_non_object_to_empty_dict():
assert coerce_evidence("not json at all") == {} # unparseable string
assert coerce_evidence("[1, 2, 3]") == {} # valid JSON but not an object
assert coerce_evidence(None) == {}
assert coerce_evidence(42) == {}


def test_is_verified_handles_string_evidence_without_raising():
# The exact crash: a string evidence used to blow up the project endpoint.
assert is_verified("not json at all") is False
assert is_verified(json.dumps(_VERIFIED)) is True
assert is_verified(json.dumps({"extra": {"verification": {"verified": False}}})) is False
assert is_verified(None) is False
assert is_verified(_VERIFIED) is True
36 changes: 36 additions & 0 deletions tests/test_hidden_targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
exercised directly so it runs even without it.
"""

import json

from fastapi.testclient import TestClient

from hexgraph.api.app import create_app
Expand Down Expand Up @@ -173,6 +175,40 @@ def test_project_payload_splits_hidden_target_findings(hg_home):
assert full["hidden_findings"] == [] and full["hidden_targets"] == []


def test_project_payload_tolerates_string_evidence_json(hg_home):
"""Regression: a finding whose `evidence_json` reads back as a *string* (a legacy or
hand-edited double-encoded row) must NOT 500 the project endpoint. The read path coerces
it to an object — recovering a double-encoded dict where possible — so one malformed
finding can't take down the whole listing (the `is_verified` 'str has no .get' crash)."""
with session_scope() as s:
p = create_project(s, name="strev")
fw = ingest_file(s, p, fixture_path("synthetic_fw.bin"), name="fw")
fw.kind = TargetKind.firmware_image
child = ingest_file(s, p, fixture_path("vuln_httpd"), name="lib/x.so", parent=fw, visible=False)
s.flush()
# Assigning a str to a JSON column double-encodes it, so it reads back as a str.
# A double-encoded dict is recoverable; a non-JSON string degrades to {}.
s.add(Finding(project_id=p.id, target_id=child.id, task_id="t", title="dbl-encoded",
severity="high", confidence="high", category="auth", summary="s", reasoning="r",
evidence_json=json.dumps({"extra": {"verification": {"verified": True}}}),
finding_type="poc", status="new"))
s.add(Finding(project_id=p.id, target_id=fw.id, task_id="t", title="garbage-ev",
severity="low", confidence="low", category="auth", summary="s", reasoning="r",
evidence_json="not json at all", finding_type="vulnerability", status="new"))
pid = p.id

resp = TestClient(create_app()).get(f"/api/projects/{pid}")
assert resp.status_code == 200 # was 500 before the fix
body = resp.json()
# Visible finding with garbage evidence: coerced to an object, not verified.
g = next(f for f in body["findings"] if f["title"] == "garbage-ev")
assert g["evidence"] == {} and g["verified"] is False
# Hidden double-encoded finding: parsed back, its verification recovered.
d = next(f for f in body["hidden_findings"] if f["title"] == "dbl-encoded")
assert d["verified"] is True
assert d["evidence"]["extra"]["verification"]["verified"] is True


def test_project_payload_excludes_archived_target_findings(hg_home):
"""A finding on an ARCHIVED (soft-removed) target appears in NEITHER bucket — archive is
a deliberate removal, distinct from a merely-hidden child."""
Expand Down
Loading