diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 8b5c60e..f89c722 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -41,7 +41,7 @@ Python style - F-strings in exceptions: `raise ValueError(f"Error {var}")` - Google-style docstrings - Single blank line at end of file -- No documentation for `__init__` methods +- No documentation for `__init__` methods and test modules Patterns - Classes with `__init__` cannot throw exceptions diff --git a/src/core/helpers.py b/src/core/helpers.py index af42f6e..ea1d4a6 100644 --- a/src/core/helpers.py +++ b/src/core/helpers.py @@ -14,12 +14,21 @@ # limitations under the License. # -"""Pure utility functions – date helpers, hashing, and path normalisation.""" +"""Pure utility functions""" import hashlib import re from datetime import datetime, timezone +# Matches lines that start with 1-6 '#' followed by a space +_HEADING_RE = re.compile(r"^(#{1,6}\s)", re.MULTILINE) +# Matches '>' at the start of a line (blockquote) +_BLOCKQUOTE_RE = re.compile(r"^(>)", re.MULTILINE) +# Matches horizontal rules: three or more -, *, or _ on a line by themselves +_HR_RE = re.compile(r"^([-*_]{3,})\s*$", re.MULTILINE) +# Matches '|' at the start of a line (table row). +_TABLE_RE = re.compile(r"^(\|)", re.MULTILINE) + def utc_today() -> str: """Return today's date in UTC as an ISO-8601 string (``YYYY-MM-DD``).""" @@ -50,3 +59,16 @@ def normalize_path(path: str | None) -> str: p = p.lstrip("/") p = re.sub(r"/+", "/", p) return p + + +def sanitize_markdown(text: str) -> str: + """Escape block-level Markdown so text renders as plain content in an issue body.""" + if not text: + return text + + sanitized = _HEADING_RE.sub(r"\\\1", text) + sanitized = _BLOCKQUOTE_RE.sub(r"\\\1", sanitized) + sanitized = _HR_RE.sub(r"\\\1", sanitized) + sanitized = _TABLE_RE.sub(r"\\\1", sanitized) + + return sanitized diff --git a/src/security/issues/builder.py b/src/security/issues/builder.py index fed75c9..3e7cbac 100644 --- a/src/security/issues/builder.py +++ b/src/security/issues/builder.py @@ -18,7 +18,7 @@ from typing import Any -from core.helpers import iso_date +from core.helpers import iso_date, sanitize_markdown from core.rendering import render_markdown_template from security.constants import NOT_AVAILABLE, SECMETA_TYPE_PARENT @@ -60,7 +60,7 @@ def alert_extra_data(alert: Alert) -> dict[str, Any]: "impact": alert.rule_details.impact, "likelihood": alert.rule_details.likelihood, "confidence": alert.rule_details.confidence, - "remediation": alert.rule_details.remediation, + "remediation": sanitize_markdown(alert.rule_details.remediation), "references": references, } @@ -87,7 +87,7 @@ def build_parent_template_values(alert: Alert, *, rule_id: str, severity: str) - return { "category": alert.metadata.rule_name or NOT_AVAILABLE, "avd_id": alert.alert_details.vulnerability or rule_id, - "title": alert.metadata.rule_description or rule_id, + "title": sanitize_markdown(alert.metadata.rule_description or rule_id), "severity": severity, "published_date": iso_date(alert.rule_details.published_date or NOT_AVAILABLE), "package_name": alert.rule_details.package_name, @@ -135,7 +135,7 @@ def build_child_issue_body(alert: Alert) -> str: vulnerability = alert.alert_details.vulnerability avd_id = vulnerability if vulnerability.startswith("AVD-") else NOT_AVAILABLE - title = alert.metadata.rule_description or alert.metadata.rule_id + title = sanitize_markdown(alert.metadata.rule_description or alert.metadata.rule_id) scm_file = alert.alert_details.scm_file start_line = alert.metadata.start_line @@ -151,7 +151,7 @@ def build_child_issue_body(alert: Alert) -> str: file_display = file_name or NOT_AVAILABLE alert_hash = alert.alert_details.alert_hash - message = alert.alert_details.message + message = sanitize_markdown(alert.alert_details.message) category = classify_category(alert) diff --git a/tests/core/test_helpers.py b/tests/core/test_helpers.py new file mode 100644 index 0000000..0ae35a7 --- /dev/null +++ b/tests/core/test_helpers.py @@ -0,0 +1,73 @@ +# +# Copyright 2026 ABSA Group Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest + +from core.helpers import sanitize_markdown + + +# sanitize_markdown + + +@pytest.mark.parametrize( + "text", + [None, "", "Normal text without markdown", "text with numbers 123"], + ids=["none", "empty", "plain", "numbers"], +) +def test_passthrough_unchanged(text: str | None) -> None: + assert sanitize_markdown(text) == text + + +@pytest.mark.parametrize( + "raw, expected", + [ + ("# Heading one", r"\# Heading one"), + ("## Heading two", r"\## Heading two"), + ("### Deep heading", r"\### Deep heading"), + ("> quoted text", r"\> quoted text"), + ("| col1 | col2 |", r"\| col1 | col2 |"), + ("---", r"\---"), + ], + ids=["h1", "h2", "h3", "blockquote", "table", "hr"], +) +def test_block_markdown_escaped(raw: str, expected: str) -> None: + assert expected == sanitize_markdown(raw) + + +@pytest.mark.parametrize( + "text", + ["some **bold** text", "some __bold__ text", "use `code` here"], + ids=["bold", "underscore-bold", "backtick"], +) +def test_inline_markdown_preserved(text: str) -> None: + assert text == sanitize_markdown(text) + + +def test_heading_in_multiline() -> None: + text = "First line\n## Second is heading\nThird line" + result = sanitize_markdown(text) + assert r"\## Second is heading" in result + assert result.startswith("First line\n") + + +def test_real_aquasec_message() -> None: + msg = ( + "## Black is the uncompromising Python code formatter. " + "Prior to 26.3.1, Black writes a cache file." + ) + result = sanitize_markdown(msg) + assert not result.startswith("## ") + assert result.startswith(r"\## ") diff --git a/tests/security/issues/test_builder.py b/tests/security/issues/test_builder.py index c350ab9..7bbcd56 100644 --- a/tests/security/issues/test_builder.py +++ b/tests/security/issues/test_builder.py @@ -14,8 +14,6 @@ # limitations under the License. # -"""Unit tests for ``utils.issue_builder``.""" - import pytest from security.issues.builder import ( @@ -31,17 +29,14 @@ # ===================================================================== -# Low-level helpers +# alert_extra_data # ===================================================================== def test_extra_data_from_nested() -> None: """Extra data is synthesised from nested metadata + rule_details.""" alert = Alert.from_dict({ - "metadata": { - "rule_id": "CVE-123", - "rule_name": "sast", - }, + "metadata": {"rule_id": "CVE-123", "rule_name": "sast"}, "rule_details": { "confidence": "error", "owasp": "https://owasp.org/Top10/A07", @@ -57,340 +52,234 @@ def test_extra_data_from_nested() -> None: assert extra["confidence"] == "error" assert extra["category"] == "sast" + def test_extra_data_non_cve() -> None: """Non-CVE rule_id results in N/A for cve field.""" - alert = Alert.from_dict({ - "metadata": {"rule_id": "RULE-1", "rule_name": "sast"}, - "rule_details": {}, - }) - extra = alert_extra_data(alert) - assert extra["cve"] == "N/A" + alert = Alert.from_dict({"metadata": {"rule_id": "RULE-1", "rule_name": "sast"}, "rule_details": {}}) + assert alert_extra_data(alert)["cve"] == "N/A" -def test_sast(sast_alert: Alert) -> None: - assert classify_category(sast_alert) == "sast" +# ===================================================================== +# classify_category +# ===================================================================== -def test_vuln(vuln_alert: Alert) -> None: - assert classify_category(vuln_alert) == "vulnerabilities" -def test_empty() -> None: - assert classify_category(Alert()) == "" +@pytest.mark.parametrize("raw, expected", [ + ({"metadata": {"rule_name": "sast"}}, "sast"), + ({"metadata": {"rule_name": "vulnerabilities"}}, "vulnerabilities"), + ({}, ""), +], ids=["sast", "vulnerabilities", "empty"]) +def test_classify_category(raw: dict, expected: str) -> None: + assert expected == classify_category(Alert.from_dict(raw)) # ===================================================================== -# Parent issue builders +# build_parent_issue_title # ===================================================================== -def test_with_severity() -> None: - assert build_parent_issue_title("CVE-2026-25755", "high") == ( - "[HIGH] Security Alert \u2013 CVE-2026-25755" - ) +@pytest.mark.parametrize("severity, expected", [ + ("high", "[HIGH] Security Alert \u2013 CVE-2026-25755"), + ("", "Security Alert \u2013 CVE-2026-25755"), +], ids=["with_severity", "without_severity"]) +def test_build_parent_issue_title(severity: str, expected: str) -> None: + assert expected == build_parent_issue_title("CVE-2026-25755", severity) + -def test_without_severity() -> None: - assert build_parent_issue_title("CVE-2026-25755") == ( - "Security Alert \u2013 CVE-2026-25755" - ) +# ===================================================================== +# build_parent_template_values +# ===================================================================== def test_sast_category_from_rule_name(sast_alert: Alert) -> None: - vals = build_parent_template_values( - sast_alert, rule_id="req-with-very-false-aquasec-python", severity="high" - ) + vals = build_parent_template_values(sast_alert, rule_id="req-with-very-false-aquasec-python", severity="high") assert vals["category"] == "sast" + def test_vuln_category_from_rule_name(vuln_alert: Alert) -> None: - vals = build_parent_template_values( - vuln_alert, rule_id="CVE-2026-25755", severity="high" - ) + vals = build_parent_template_values(vuln_alert, rule_id="CVE-2026-25755", severity="high") assert vals["category"] == "vulnerabilities" + def test_avd_id_uses_vulnerability(sast_alert: Alert) -> None: - vals = build_parent_template_values( - sast_alert, rule_id="req-with-very-false-aquasec-python", severity="high" - ) + vals = build_parent_template_values(sast_alert, rule_id="req-with-very-false-aquasec-python", severity="high") assert vals["avd_id"] == "req-with-very-false-aquasec-python" -def test_published_date_from_rule_details(sast_alert: Alert) -> None: - """Published date comes from rule_details.published_date.""" - vals = build_parent_template_values( - sast_alert, rule_id="test", severity="high" - ) - # SAST fixture has rule_details.published_date = None; absent dates map to N/A - assert vals["published_date"] == "N/A" - - -def test_published_date_none_in_raw_dict() -> None: - """published_date=None in the raw dict (as returned by _parse_rule_details for a - missing field) must yield N/A, not today's date via iso_date(None). - """ - raw: dict = { - "metadata": {"rule_id": "x", "severity": "low"}, - "rule_details": {"published_date": None}, - } - alert = Alert.from_dict(raw) - vals = build_parent_template_values(alert, rule_id="x", severity="low") - assert vals["published_date"] == "N/A" - - -def test_published_date_absent_from_raw_dict() -> None: - """published_date key absent entirely from rule_details must also yield N/A.""" - raw: dict = { - "metadata": {"rule_id": "x", "severity": "low"}, - "rule_details": {}, - } - alert = Alert.from_dict(raw) - vals = build_parent_template_values(alert, rule_id="x", severity="low") - assert vals["published_date"] == "N/A" + +@pytest.mark.parametrize("rule_details", [ + {"published_date": None}, + {}, +], ids=["published_date_none", "published_date_absent"]) +def test_published_date_yields_na(rule_details: dict) -> None: + alert = Alert.from_dict({"metadata": {"rule_id": "x", "severity": "low"}, "rule_details": rule_details}) + assert "N/A" == build_parent_template_values(alert, rule_id="x", severity="low")["published_date"] def test_extra_data_synthesised(sast_alert: Alert) -> None: - """Fields are synthesised from the nested alert structure.""" - vals = build_parent_template_values( - sast_alert, rule_id="test", severity="high" - ) - extra = vals["extraData"] + extra = build_parent_template_values(sast_alert, rule_id="test", severity="high")["extraData"] assert isinstance(extra, dict) assert extra["confidence"] == "error" assert extra["category"] == "sast" - # OWASP reference derived from rule_details.owasp assert "owasp" in extra["owasp"].lower() + def test_extra_data_references(vuln_alert: Alert) -> None: - vals = build_parent_template_values( - vuln_alert, rule_id="CVE-2026-25755", severity="high" - ) - refs = vals["extraData"]["references"] + refs = build_parent_template_values(vuln_alert, rule_id="CVE-2026-25755", severity="high")["extraData"]["references"] assert "redhat.com" in refs -def test_references_fallback_to_metadata_urls() -> None: - """When rule_details.references is absent, fall back to metadata.help_uri / alert_url.""" +def test_all_template_keys_present(sast_alert: Alert) -> None: + vals = build_parent_template_values(sast_alert, rule_id="test", severity="high") + required = {"category", "avd_id", "title", "severity", "published_date", "package_name", "fixed_version", "extraData"} + assert required.issubset(vals.keys()) + + +def test_extra_data_sub_keys(sast_alert: Alert) -> None: + extra = build_parent_template_values(sast_alert, rule_id="test", severity="high")["extraData"] + required_extra = {"cve", "owasp", "category", "impact", "likelihood", "confidence", "remediation", "references"} + assert required_extra.issubset(extra.keys()) + + +# ===================================================================== +# References fallback (_synthesize_references) +# ===================================================================== + + +def test_references_fallback_to_both_urls() -> None: + """When rule_details.references is absent, both help_uri and alert_url are included.""" alert = Alert.from_dict({ "metadata": { "rule_id": "RULE-1", "help_uri": "https://example.com/rule", "alert_url": "https://github.com/org/repo/security/code-scanning/1", }, - "rule_details": {}, # references will be normalised to N/A - }) - extra = alert_extra_data(alert) - assert "https://example.com/rule" in extra["references"] - assert "https://github.com/org/repo/security/code-scanning/1" in extra["references"] - - -def test_references_fallback_help_uri_only() -> None: - """Fallback uses only help_uri when alert_url is absent.""" - alert = Alert.from_dict({ - "metadata": { - "rule_id": "RULE-2", - "help_uri": "https://docs.example.com/cve", - }, "rule_details": {}, }) - extra = alert_extra_data(alert) - assert "https://docs.example.com/cve" in extra["references"] + refs = alert_extra_data(alert)["references"] + assert "https://example.com/rule" in refs + assert "https://github.com/org/repo/security/code-scanning/1" in refs -def test_references_no_fallback_urls_yields_na() -> None: - """Fallback returns N/A when metadata has no useful URLs either.""" - alert = Alert.from_dict({ - "metadata": {"rule_id": "RULE-3"}, - "rule_details": {}, - }) - extra = alert_extra_data(alert) - assert extra["references"] == "N/A" +@pytest.mark.parametrize("metadata, expected", [ + ({"rule_id": "RULE-2", "help_uri": "https://docs.example.com/cve"}, "https://docs.example.com/cve"), + ({"rule_id": "RULE-3"}, "N/A"), +], ids=["help_uri_only", "no_urls_yields_na"]) +def test_references_fallback(metadata: dict, expected: str) -> None: + alert = Alert.from_dict({"metadata": metadata, "rule_details": {}}) + assert expected in alert_extra_data(alert)["references"] def test_references_not_overridden_when_present() -> None: - """rule_details.references is used as-is when it contains a value.""" + """rule_details.references is used as-is; metadata URLs are ignored.""" alert = Alert.from_dict({ - "metadata": { - "rule_id": "RULE-4", - "help_uri": "https://should-not-appear.example.com", - }, + "metadata": {"rule_id": "RULE-4", "help_uri": "https://should-not-appear.example.com"}, "rule_details": {"references": "- https://explicit-ref.example.com"}, }) - extra = alert_extra_data(alert) - assert "https://explicit-ref.example.com" in extra["references"] - assert "should-not-appear" not in extra["references"] - -def test_all_template_keys_present(sast_alert: Alert) -> None: - """Every placeholder in PARENT_BODY_TEMPLATE has a corresponding value.""" - vals = build_parent_template_values( - sast_alert, rule_id="test", severity="high" - ) - required = { - "category", "avd_id", "title", "severity", - "published_date", "package_name", - "fixed_version", "extraData", - } - assert required.issubset(vals.keys()) - -def test_extra_data_sub_keys(sast_alert: Alert) -> None: - """All extraData keys referenced in the template are present.""" - vals = build_parent_template_values( - sast_alert, rule_id="test", severity="high" - ) - extra = vals["extraData"] - required_extra = { - "cve", "owasp", "category", "impact", - "likelihood", "confidence", "remediation", "references", - } - assert required_extra.issubset(extra.keys()) + refs = alert_extra_data(alert)["references"] + assert "https://explicit-ref.example.com" in refs + assert "should-not-appear" not in refs # ===================================================================== -# Parent issue body (full render) +# build_parent_issue_body # ===================================================================== -def test_contains_secmeta_block(sast_alert: Alert) -> None: - body = build_parent_issue_body(sast_alert) - assert "