diff --git a/src/websec_validator/extractors/iac_ci.py b/src/websec_validator/extractors/iac_ci.py index 279f2a3..ed6807b 100644 --- a/src/websec_validator/extractors/iac_ci.py +++ b/src/websec_validator/extractors/iac_ci.py @@ -102,7 +102,7 @@ def extract(self, ctx: RepoContext, facts: dict) -> dict: "free-text injectable); verify, low exploitability" if sha_only else "") findings.append({"severity": sev, "kind": "gha-script-injection", "file": rel, "detail": "untrusted context interpolated into a run: step — " - + ", ".join("github." + c for c in contexts[:4]) + extra}) + + ", ".join(contexts[:4]) + extra}) unpinned = sorted({f"{a}@{r}" for a, r in USES.findall(text) if not SHA40.match(r) and not a.startswith("./")}) if unpinned: diff --git a/src/websec_validator/extractors/policy_consistency.py b/src/websec_validator/extractors/policy_consistency.py index c248f47..89940fa 100644 --- a/src/websec_validator/extractors/policy_consistency.py +++ b/src/websec_validator/extractors/policy_consistency.py @@ -33,8 +33,12 @@ _LA = r"\(\?=[^)]{0,40}" _RE_MIN = re.compile(r"\.min\(\s*(\d{1,3})|minLength\s*[:=]\s*(\d{1,3})|@MinLength\(\s*(\d{1,3})" r"|\.length\s*>=?\s*(\d{1,3})|\{\s*(\d{1,3})\s*,") -_RE_UPPER = re.compile(_LA + r"\[[^\]]*A-Z|minUppercase\s*:\s*[1-9]|requireUppercase\s*[:=]\s*true", re.I) -_RE_LOWER = re.compile(_LA + r"\[[^\]]*a-z|minLowercase\s*:\s*[1-9]|requireLowercase\s*[:=]\s*true", re.I) +# The char-class branch must be case-SENSITIVE: under re.I a literal `A-Z` also matches `a-z`, so a +# lowercase-only rule `(?=.*[a-z])` was mis-counted as ALSO requiring uppercase — inflating the class +# set and masking real drift (a lower-only sibling looked equal to an upper+lower policy). `(?-i:...)` +# locally disables re.I for just the range token; the keyword branches keep re.I for casing tolerance. +_RE_UPPER = re.compile(_LA + r"\[[^\]]*(?-i:A-Z)|minUppercase\s*:\s*[1-9]|requireUppercase\s*[:=]\s*true", re.I) +_RE_LOWER = re.compile(_LA + r"\[[^\]]*(?-i:a-z)|minLowercase\s*:\s*[1-9]|requireLowercase\s*[:=]\s*true", re.I) _RE_DIGIT = re.compile(_LA + r"(?:\\d|\[[^\]]*0-9)|minNumbers\s*:\s*[1-9]|requireDigit\s*[:=]\s*true", re.I) _RE_SPECIAL = re.compile(_LA + r"(?:\\W|\[\^[A-Za-z0-9\\w]|\[[^\]]*[!@#$%^&*])|minSymbols\s*:\s*[1-9]" r"|require[_]?Symbol", re.I) diff --git a/src/websec_validator/extractors/routes.py b/src/websec_validator/extractors/routes.py index 2c90609..51e23c1 100644 --- a/src/websec_validator/extractors/routes.py +++ b/src/websec_validator/extractors/routes.py @@ -211,7 +211,11 @@ def _fallback_next_app_router(ctx: RepoContext) -> list: def _fallback_regex(ctx: RepoContext) -> list: rows = [] - flask = re.compile(r"@\w+\.route\s*\(\s*['\"]([^'\"]+)['\"](?:.*methods\s*=\s*\[([^\]]*)\])?", re.S) + # `[^)]*?` (not `.*` with re.S) keeps the optional methods= group INSIDE this one route() call: + # a greedy DOTALL `.*` reaches across the file to the LAST methods=[...], mis-assigning it to the + # first route and silently swallowing every route in between (only routes after the final + # methods=[...] survived). Staying within the call parens fixes both the mislabel and the drop. + flask = re.compile(r"@\w+\.route\s*\(\s*['\"]([^'\"]+)['\"](?:[^)]*?methods\s*=\s*\[([^\]]*)\])?") fastapi = re.compile(r"@\w+\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)") for _p, rel, text in ctx.iter_code(): for verb, path in fastapi.findall(text): diff --git a/tests/test_pentest_regressions.py b/tests/test_pentest_regressions.py index 2748344..353f1c4 100644 --- a/tests/test_pentest_regressions.py +++ b/tests/test_pentest_regressions.py @@ -167,6 +167,17 @@ def test_consistent_policy_no_drift(self): self.assertEqual(out["drift"], []) self.assertTrue(out["consistent"]) + def test_lowercase_only_subset_is_drift(self): + # Regression: under re.I the `A-Z` range also matched `a-z`, so a lower-ONLY sibling was + # mis-counted as requiring uppercase too — equal to the upper+lower strong policy, hiding the + # drift. A lower-only block is a strict subset and MUST be flagged. + strong = "const s = z.object({ password: z.string().min(8).regex(/(?=.*[A-Z])(?=.*[a-z])/) });\n" + lower_only = "const s = z.object({ password: z.string().min(8).regex(/(?=.*[a-z])/) });\n" + out = PolicyConsistencyExtractor().extract(repo({ + "change-password.ts": strong, "create-user.ts": lower_only}), {}) + self.assertFalse(out["consistent"]) + self.assertTrue(any(d["file"] == "create-user.ts" for d in out["drift"])) + class ClientIntegrityTests(unittest.TestCase): # agent-wallet MITB WALLET = ("'use client'\nexport const R=({walletAddress})=>