Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion strix/report/sarif.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,8 +716,12 @@ def _normalise_cwe(value: str) -> str | None:
"insecure random",
"tls verification",
"certificate verification",
"denial of service",
# "regex denial of service" MUST precede "denial of service": it is the
# more specific class and "denial of service" is a substring of it, so
# (first-match-wins) the generic entry would otherwise shadow it and
# collapse ReDoS findings into the generic DoS class hash.
"regex denial of service",
"denial of service",
"redos",
"supply chain",
)
Expand Down
39 changes: 38 additions & 1 deletion tests/test_sarif.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
from typing import TYPE_CHECKING, Any

from strix.report.sarif import write_sarif
from strix.report.sarif import _class_keyword, write_sarif


if TYPE_CHECKING:
Expand Down Expand Up @@ -178,6 +178,43 @@ def test_write_sarif_adds_logical_location_for_endpoint(tmp_path: Path) -> None:
assert logical == [{"fullyQualifiedName": "GET /api/users/{id}", "kind": "endpoint"}]


def test_class_keyword_prefers_specific_regex_dos_over_generic_dos() -> None:
# "regex denial of service" is the more specific class and *contains* the
# substring "denial of service"; first-match-wins keyword selection must
# return the specific class rather than let the generic entry shadow it.
assert _class_keyword("Regex Denial of Service in email validator") == "regex denial of service"
# A genuinely generic DoS finding still resolves to the generic class.
assert _class_keyword("Denial of Service via unbounded upload") == "denial of service"


def test_write_sarif_regex_dos_not_merged_into_generic_dos(tmp_path: Path) -> None:
# Two locationless findings on the same CWE are separated only by their
# vuln-class keyword (the synth_class component of the primary fingerprint).
# If ReDoS is misclassified as generic DoS the two collapse to one
# fingerprint and a SARIF consumer silently dedups a real finding away.
write_sarif(
tmp_path,
[
_finding(
id="vuln-redos",
title="Regex Denial of Service in email validator",
cwe="CWE-400",
code_locations=None,
),
_finding(
id="vuln-dos",
title="Denial of Service via unbounded upload",
cwe="CWE-400",
code_locations=None,
),
],
)
results = _read(tmp_path)["runs"][0]["results"]
assert len(results) == 2
fingerprints = {r["partialFingerprints"]["primaryLocationLineHash"] for r in results}
assert len(fingerprints) == 2


def test_write_sarif_synthetic_finding_falls_back_to_resource_logical_location(
tmp_path: Path,
) -> None:
Expand Down