Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions src/hexgraph/agent/mcp_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from hexgraph.db.models import Finding, Node, Project, Target
from hexgraph.db.session import session_scope
from hexgraph.engine.findings.findings import coerce_evidence, is_verified
from hexgraph.engine.findings.findings import coerce_evidence, dig_dict, is_verified
from hexgraph.models.finding import Finding as FModel


Expand Down Expand Up @@ -1055,20 +1055,23 @@ def list_findings(project_id: str, limit: int = 100, offset: int = 0,

def _row(f):
ev = coerce_evidence(f.evidence_json)
extra = ev.get("extra") or {}
# extra (and its children) are agent-authored free-form — any can be a non-dict, which
# the `(x or {}).get` idiom doesn't guard. Treat a non-dict at any level as absent.
extra = ev.get("extra")
extra = extra if isinstance(extra, dict) else {}
row = {"id": f.id, "title": f.title, "severity": f.severity, "category": f.category,
"status": f.status, "finding_type": f.finding_type, "cwe": f.cwe,
"verified": is_verified(ev), "target_id": f.target_id,
"function": ev.get("function"),
"assurance": compact_assurance(assurance_of(ev))}
ver = extra.get("verification")
if ver:
if isinstance(ver, dict):
row["verification"] = {"verified": bool(ver.get("verified")), "detail": ver.get("detail")}
fz = extra.get("fuzz")
if fz:
if isinstance(fz, dict):
# coverage_instrumented=False => a black-box run; don't over-trust dedup.
row["fuzz"] = {
"exploitability": (fz.get("exploitability") or {}).get("rating"),
"exploitability": dig_dict(fz, "exploitability", "rating"),
"coverage_instrumented": fz.get("coverage_instrumented"),
"dupe_count": fz.get("dupe_count"),
}
Expand Down
18 changes: 18 additions & 0 deletions src/hexgraph/db/jsontypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ def coerce_evidence(evidence: object) -> dict:
return {}


def dig_dict(obj: object, *keys: str):
"""Safely walk nested dicts: return ``obj[k0][k1]…`` but yield ``None`` the moment any
level isn't a dict.

The pervasive ``(d.get(k) or {}).get(k2)`` idiom only guards a *falsy* intermediate — it
still crashes when a level is a *truthy non-dict*, e.g. an agent that wrote
``evidence.extra.verification`` as free-text prose instead of the expected object
(``'str' object has no attribute 'get'``). The frozen Finding schema's ``extra`` is
intentionally free-form, so its children can be any JSON type; read sites must navigate
defensively. Use this for any nested read into agent-authored evidence."""
cur = obj
for k in keys:
if not isinstance(cur, dict):
return None
cur = cur.get(k)
return cur


class JSONDict(TypeDecorator):
"""A `JSON` column whose Python value is ALWAYS a dict (or `None`) on read.

Expand Down
12 changes: 10 additions & 2 deletions src/hexgraph/engine/findings/assurance.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

from __future__ import annotations

from hexgraph.db.jsontypes import dig_dict

# Standards (what is claimed)
CODE_PRESENT = "code_present" # Standard A — the flaw exists in code
INPUT_REACHABLE = "input_reachable" # Standard B — reachable/triggerable via user input
Expand Down Expand Up @@ -62,8 +64,14 @@ def assurance_of(evidence: dict | None) -> dict | None:
"""The finding's assurance triple, if any. Canonical location is `evidence.extra.assurance`;
a PoC also nests it under `evidence.extra.verification.assurance` (verify_poc) — read both so
callers have one accessor regardless of which path produced the finding."""
extra = (evidence or {}).get("extra") or {}
return extra.get("assurance") or ((extra.get("verification") or {}).get("assurance"))
# Navigate defensively: evidence.extra (and its children) are agent-authored free-form, so
# any level can be a non-dict — return the first non-empty assurance dict, else None, so a
# malformed `extra`/`verification` (e.g. a string) yields None instead of crashing a caller.
extra = dig_dict(evidence, "extra")
for candidate in (dig_dict(extra, "assurance"), dig_dict(extra, "verification", "assurance")):
if isinstance(candidate, dict) and candidate:
return candidate
return None


def default_for(finding_type: str | None) -> dict | None:
Expand Down
7 changes: 4 additions & 3 deletions src/hexgraph/engine/findings/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from sqlalchemy.orm import Session

from hexgraph.db.jsontypes import coerce_evidence # re-exported: the canonical evidence read coercion
from hexgraph.db.jsontypes import coerce_evidence, dig_dict # re-exported: canonical evidence read helpers
from hexgraph.db.models import EdgeType
from hexgraph.db.models import Finding as FindingRow
from hexgraph.db.models import FindingStatus, Task
Expand All @@ -30,8 +30,9 @@ def is_verified(evidence: dict | str | None) -> bool:
"""True if a PoC verification was attached to this finding's evidence and it
passed (evidence.extra.verification.verified). The single source for the
`verified` flag surfaced by the API and MCP read tools."""
ev = coerce_evidence(evidence)
return bool(((ev.get("extra") or {}).get("verification") or {}).get("verified"))
# dig_dict (not `(x or {}).get`): evidence.extra.verification can be a non-dict (an agent
# wrote prose where the {verified,…} object belongs), which the `or {}` idiom doesn't guard.
return bool(dig_dict(coerce_evidence(evidence), "extra", "verification", "verified"))


def classify_finding(task_type: str | None, category: str | None) -> str:
Expand Down
39 changes: 39 additions & 0 deletions tests/test_evidence_coerce.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

import json

from hexgraph.db.jsontypes import dig_dict
from hexgraph.engine.findings.assurance import assurance_of
from hexgraph.engine.findings.findings import coerce_evidence, is_verified

_VERIFIED = {"extra": {"verification": {"verified": True}}}
Expand Down Expand Up @@ -38,3 +40,40 @@ def test_is_verified_handles_string_evidence_without_raising():
assert is_verified(json.dumps({"extra": {"verification": {"verified": False}}})) is False
assert is_verified(None) is False
assert is_verified(_VERIFIED) is True


# --- nested non-dict: extra / verification / assurance can themselves be the wrong type ---
# (e.g. an agent wrote evidence.extra.verification as free-text prose). The `(x or {}).get`
# idiom only guards a falsy intermediate; a truthy non-dict still crashed. dig_dict guards it.

def test_dig_dict_walks_nested_dicts():
assert dig_dict({"a": {"b": {"c": 1}}}, "a", "b", "c") == 1
assert dig_dict({"a": {"b": {}}}, "a", "b", "c") is None # missing leaf
assert dig_dict({}, "a") is None


def test_dig_dict_yields_none_on_a_truthy_non_dict_level():
# the real shape that crashed: extra.verification is a string, not a {verified:…} object
assert dig_dict({"extra": {"verification": "prose"}}, "extra", "verification", "verified") is None
assert dig_dict({"extra": "a string"}, "extra", "verification") is None # extra itself non-dict
assert dig_dict("not even a dict", "extra") is None
assert dig_dict(None, "extra") is None


def test_is_verified_handles_nondict_extra_and_verification():
assert is_verified({"extra": {"verification": "PowerPC disasm prose…"}}) is False # the field report
assert is_verified({"extra": "a string instead of a dict"}) is False
assert is_verified({"extra": {"verification": {"verified": True}}}) is True # still works


def test_assurance_of_handles_nondict_nesting():
# verification is a string but a valid extra.assurance is still recovered (not crashed/lost)
ev = {"extra": {"verification": "prose", "assurance": {"standard": "input_reachable"}}}
assert assurance_of(ev) == {"standard": "input_reachable"}
# fully malformed nesting → None, no raise
assert assurance_of({"extra": "a string"}) is None
assert assurance_of({"extra": {"assurance": "not a dict"}}) is None
assert assurance_of(None) is None
# falls back to verification.assurance when extra.assurance is absent
assert assurance_of({"extra": {"verification": {"assurance": {"standard": "code_present"}}}}) \
== {"standard": "code_present"}
34 changes: 33 additions & 1 deletion tests/test_evidence_json_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from fastapi.testclient import TestClient

from hexgraph.api.app import create_app
from hexgraph.db.models import Finding
from hexgraph.db.models import Finding, TargetKind
from hexgraph.db.session import session_scope
from hexgraph.engine.graph.dedup import dedupe_findings
from hexgraph.engine.findings.report import build_report_md
Expand Down Expand Up @@ -55,6 +55,38 @@ def test_column_coerces_non_dict_evidence_to_dict_on_read(hg_home):
assert rows["normal"] == {"a": 1} # untouched


def test_project_endpoint_tolerates_nondict_nested_evidence(hg_home):
"""The reported field crash: evidence is a proper dict, but a nested value
(`extra.verification`) is a STRING — an agent wrote prose where a {verified,…} object
belongs. The column boundary can't help (extra is intentionally free-form), so the read
accessors must navigate defensively. GET /api/projects/{id} must return 200, not 500."""
with session_scope() as s:
p = create_project(s, name="nested")
fw = ingest_file(s, p, fixture_path("synthetic_fw.bin"), name="fw")
fw.kind = TargetKind.firmware_image
child = ingest_file(s, p, fixture_path("vuln_httpd"), name="lib/x.so", parent=fw, visible=False)
s.flush()
# extra.verification is prose, not a {verified,…} object (real shape from the field).
_add_finding(s, project_id=p.id, target_id=child.id, title="prose-verification",
finding_type="poc",
evidence_json={"function": "f", "extra": {
"verification": "PowerPC disasm prose, not an object",
"assurance": {"standard": "input_reachable"}}})
# extra itself a string (more degenerate) on a visible target.
_add_finding(s, project_id=p.id, target_id=fw.id, title="prose-extra",
evidence_json={"function": "g", "extra": "a string instead of a dict"})
pid = p.id

resp = TestClient(create_app()).get(f"/api/projects/{pid}")
assert resp.status_code == 200 # was 500 (is_verified: 'str' object has no attribute 'get')
body = resp.json()
# Neither finding is verified; both serialize without error.
hidden = next(f for f in body["hidden_findings"] if f["title"] == "prose-verification")
assert hidden["verified"] is False
visible = next(f for f in body["findings"] if f["title"] == "prose-extra")
assert visible["verified"] is False


def test_report_build_tolerates_string_evidence(hg_home):
"""build_report_md reads `f.evidence_json or {}` then `.get(...)` — would crash on a
string before; the finding's section renders now."""
Expand Down
Loading