From 017a5e3783851da0b8c270aab2fc12493d4d91fd Mon Sep 17 00:00:00 2001 From: Ricardo Accioly Date: Tue, 23 Jun 2026 00:27:39 -0400 Subject: [PATCH] fix(recon): three regex/logic correctness bugs on the always-on run path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - routes (flask fallback): a greedy DOTALL `methods=` group reached across the whole file, mis-assigning the LAST methods=[...] to the FIRST @route and swallowing every route in between (only routes after the final methods=[...] survived). Constrain the optional group to the single route() call. - policy_consistency: under re.I the literal `A-Z`/`a-z` range matched BOTH cases, so a lowercase-only password rule was mis-counted as ALSO requiring uppercase — masking real cross-route drift (a security false-negative). Make the range token case-sensitive via (?-i:...); keyword branches keep re.I. - iac_ci (gha-script-injection): the finding detail double-prefixed `github.` onto an already-complete `${{ github.… }}` context string. Detail-only. Each fix ships a regression test that fails pre-fix and passes post-fix. Full suite 198/198 green; stdlib-only, no new deps. Co-Authored-By: Claude Opus 4.8 --- src/websec_validator/extractors/iac_ci.py | 2 +- .../extractors/policy_consistency.py | 8 ++++++-- src/websec_validator/extractors/routes.py | 6 +++++- tests/test_pentest_regressions.py | 11 +++++++++++ tests/test_recon.py | 15 +++++++++++++++ 5 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/websec_validator/extractors/iac_ci.py b/src/websec_validator/extractors/iac_ci.py index 279f2a3..ed6807b 100644 --- a/src/websec_validator/extractors/iac_ci.py +++ b/src/websec_validator/extractors/iac_ci.py @@ -102,7 +102,7 @@ def extract(self, ctx: RepoContext, facts: dict) -> dict: "free-text injectable); verify, low exploitability" if sha_only else "") findings.append({"severity": sev, "kind": "gha-script-injection", "file": rel, "detail": "untrusted context interpolated into a run: step — " - + ", ".join("github." + c for c in contexts[:4]) + extra}) + + ", ".join(contexts[:4]) + extra}) unpinned = sorted({f"{a}@{r}" for a, r in USES.findall(text) if not SHA40.match(r) and not a.startswith("./")}) if unpinned: diff --git a/src/websec_validator/extractors/policy_consistency.py b/src/websec_validator/extractors/policy_consistency.py index c248f47..89940fa 100644 --- a/src/websec_validator/extractors/policy_consistency.py +++ b/src/websec_validator/extractors/policy_consistency.py @@ -33,8 +33,12 @@ _LA = r"\(\?=[^)]{0,40}" _RE_MIN = re.compile(r"\.min\(\s*(\d{1,3})|minLength\s*[:=]\s*(\d{1,3})|@MinLength\(\s*(\d{1,3})" r"|\.length\s*>=?\s*(\d{1,3})|\{\s*(\d{1,3})\s*,") -_RE_UPPER = re.compile(_LA + r"\[[^\]]*A-Z|minUppercase\s*:\s*[1-9]|requireUppercase\s*[:=]\s*true", re.I) -_RE_LOWER = re.compile(_LA + r"\[[^\]]*a-z|minLowercase\s*:\s*[1-9]|requireLowercase\s*[:=]\s*true", re.I) +# The char-class branch must be case-SENSITIVE: under re.I a literal `A-Z` also matches `a-z`, so a +# lowercase-only rule `(?=.*[a-z])` was mis-counted as ALSO requiring uppercase — inflating the class +# set and masking real drift (a lower-only sibling looked equal to an upper+lower policy). `(?-i:...)` +# locally disables re.I for just the range token; the keyword branches keep re.I for casing tolerance. +_RE_UPPER = re.compile(_LA + r"\[[^\]]*(?-i:A-Z)|minUppercase\s*:\s*[1-9]|requireUppercase\s*[:=]\s*true", re.I) +_RE_LOWER = re.compile(_LA + r"\[[^\]]*(?-i:a-z)|minLowercase\s*:\s*[1-9]|requireLowercase\s*[:=]\s*true", re.I) _RE_DIGIT = re.compile(_LA + r"(?:\\d|\[[^\]]*0-9)|minNumbers\s*:\s*[1-9]|requireDigit\s*[:=]\s*true", re.I) _RE_SPECIAL = re.compile(_LA + r"(?:\\W|\[\^[A-Za-z0-9\\w]|\[[^\]]*[!@#$%^&*])|minSymbols\s*:\s*[1-9]" r"|require[_]?Symbol", re.I) diff --git a/src/websec_validator/extractors/routes.py b/src/websec_validator/extractors/routes.py index 2c90609..51e23c1 100644 --- a/src/websec_validator/extractors/routes.py +++ b/src/websec_validator/extractors/routes.py @@ -211,7 +211,11 @@ def _fallback_next_app_router(ctx: RepoContext) -> list: def _fallback_regex(ctx: RepoContext) -> list: rows = [] - flask = re.compile(r"@\w+\.route\s*\(\s*['\"]([^'\"]+)['\"](?:.*methods\s*=\s*\[([^\]]*)\])?", re.S) + # `[^)]*?` (not `.*` with re.S) keeps the optional methods= group INSIDE this one route() call: + # a greedy DOTALL `.*` reaches across the file to the LAST methods=[...], mis-assigning it to the + # first route and silently swallowing every route in between (only routes after the final + # methods=[...] survived). Staying within the call parens fixes both the mislabel and the drop. + flask = re.compile(r"@\w+\.route\s*\(\s*['\"]([^'\"]+)['\"](?:[^)]*?methods\s*=\s*\[([^\]]*)\])?") fastapi = re.compile(r"@\w+\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)") for _p, rel, text in ctx.iter_code(): for verb, path in fastapi.findall(text): diff --git a/tests/test_pentest_regressions.py b/tests/test_pentest_regressions.py index 2748344..353f1c4 100644 --- a/tests/test_pentest_regressions.py +++ b/tests/test_pentest_regressions.py @@ -167,6 +167,17 @@ def test_consistent_policy_no_drift(self): self.assertEqual(out["drift"], []) self.assertTrue(out["consistent"]) + def test_lowercase_only_subset_is_drift(self): + # Regression: under re.I the `A-Z` range also matched `a-z`, so a lower-ONLY sibling was + # mis-counted as requiring uppercase too — equal to the upper+lower strong policy, hiding the + # drift. A lower-only block is a strict subset and MUST be flagged. + strong = "const s = z.object({ password: z.string().min(8).regex(/(?=.*[A-Z])(?=.*[a-z])/) });\n" + lower_only = "const s = z.object({ password: z.string().min(8).regex(/(?=.*[a-z])/) });\n" + out = PolicyConsistencyExtractor().extract(repo({ + "change-password.ts": strong, "create-user.ts": lower_only}), {}) + self.assertFalse(out["consistent"]) + self.assertTrue(any(d["file"] == "create-user.ts" for d in out["drift"])) + class ClientIntegrityTests(unittest.TestCase): # agent-wallet MITB WALLET = ("'use client'\nexport const R=({walletAddress})=>
{walletAddress}" diff --git a/tests/test_recon.py b/tests/test_recon.py index 50151d3..08ee81f 100644 --- a/tests/test_recon.py +++ b/tests/test_recon.py @@ -381,6 +381,18 @@ def test_fallback_express_no_noir(self): self.assertIn(("GET", "/api/users/{id}"), paths) self.assertIn(("POST", "/api/groups/{groupId}/items"), paths) + def test_fallback_flask_multiroute_no_route_dropped(self): + # Regression: a greedy DOTALL methods= group used to reach across the file, mis-assigning + # the LAST methods=[...] to the FIRST route and swallowing every route in between. Each + # @route must be parsed independently — /b must survive and keep ITS own POST. + d = Path(tempfile.mkdtemp()) + (d / "app.py").write_text( + '@app.route("/a")\ndef a(): ...\n' + '@app.route("/b", methods=["POST"])\ndef b(): ...\n' + '@app.route("/c")\ndef c(): ...\n') + got = {(r["method"], r["path"]) for r in routes._fallback_regex(RepoContext(d))} + self.assertEqual(got, {("GET", "/a"), ("POST", "/b"), ("GET", "/c")}) + def test_router_heuristic_catches_modern_routers_and_guards_fps(self): # P1: generic router-call heuristic — hand-rolled / itty / Hono / Workers, ANY object name. d = Path(tempfile.mkdtemp()) @@ -685,6 +697,9 @@ def test_gha_injection_in_run_flagged(self): k = [f for f in out["findings"] if f["kind"] == "gha-script-injection"] self.assertTrue(k) self.assertEqual(k[0]["severity"], "HIGH") + # the matched context is already a full `${{ github.… }}` string — don't re-prefix `github.` + self.assertNotIn("github.${{", k[0]["detail"]) + self.assertIn("${{ github.event.pull_request.title }}", k[0]["detail"]) def test_gha_context_in_if_not_flagged(self): out = self._gha("jobs:\n b:\n if: ${{ github.event.pull_request.title == 'x' }}\n"