Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/websec_validator/extractors/iac_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def extract(self, ctx: RepoContext, facts: dict) -> dict:
"free-text injectable); verify, low exploitability" if sha_only else "")
findings.append({"severity": sev, "kind": "gha-script-injection", "file": rel,
"detail": "untrusted context interpolated into a run: step — "
+ ", ".join("github." + c for c in contexts[:4]) + extra})
+ ", ".join(contexts[:4]) + extra})
unpinned = sorted({f"{a}@{r}" for a, r in USES.findall(text)
if not SHA40.match(r) and not a.startswith("./")})
if unpinned:
Expand Down
8 changes: 6 additions & 2 deletions src/websec_validator/extractors/policy_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@
_LA = r"\(\?=[^)]{0,40}"
_RE_MIN = re.compile(r"\.min\(\s*(\d{1,3})|minLength\s*[:=]\s*(\d{1,3})|@MinLength\(\s*(\d{1,3})"
r"|\.length\s*>=?\s*(\d{1,3})|\{\s*(\d{1,3})\s*,")
_RE_UPPER = re.compile(_LA + r"\[[^\]]*A-Z|minUppercase\s*:\s*[1-9]|requireUppercase\s*[:=]\s*true", re.I)
_RE_LOWER = re.compile(_LA + r"\[[^\]]*a-z|minLowercase\s*:\s*[1-9]|requireLowercase\s*[:=]\s*true", re.I)
# The char-class branch must be case-SENSITIVE: under re.I a literal `A-Z` also matches `a-z`, so a
# lowercase-only rule `(?=.*[a-z])` was mis-counted as ALSO requiring uppercase — inflating the class
# set and masking real drift (a lower-only sibling looked equal to an upper+lower policy). `(?-i:...)`
# locally disables re.I for just the range token; the keyword branches keep re.I for casing tolerance.
_RE_UPPER = re.compile(_LA + r"\[[^\]]*(?-i:A-Z)|minUppercase\s*:\s*[1-9]|requireUppercase\s*[:=]\s*true", re.I)
_RE_LOWER = re.compile(_LA + r"\[[^\]]*(?-i:a-z)|minLowercase\s*:\s*[1-9]|requireLowercase\s*[:=]\s*true", re.I)
_RE_DIGIT = re.compile(_LA + r"(?:\\d|\[[^\]]*0-9)|minNumbers\s*:\s*[1-9]|requireDigit\s*[:=]\s*true", re.I)
_RE_SPECIAL = re.compile(_LA + r"(?:\\W|\[\^[A-Za-z0-9\\w]|\[[^\]]*[!@#$%^&*])|minSymbols\s*:\s*[1-9]"
r"|require[_]?Symbol", re.I)
Expand Down
6 changes: 5 additions & 1 deletion src/websec_validator/extractors/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,11 @@ def _fallback_next_app_router(ctx: RepoContext) -> list:

def _fallback_regex(ctx: RepoContext) -> list:
rows = []
flask = re.compile(r"@\w+\.route\s*\(\s*['\"]([^'\"]+)['\"](?:.*methods\s*=\s*\[([^\]]*)\])?", re.S)
# `[^)]*?` (not `.*` with re.S) keeps the optional methods= group INSIDE this one route() call:
# a greedy DOTALL `.*` reaches across the file to the LAST methods=[...], mis-assigning it to the
# first route and silently swallowing every route in between (only routes after the final
# methods=[...] survived). Staying within the call parens fixes both the mislabel and the drop.
flask = re.compile(r"@\w+\.route\s*\(\s*['\"]([^'\"]+)['\"](?:[^)]*?methods\s*=\s*\[([^\]]*)\])?")
fastapi = re.compile(r"@\w+\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)")
for _p, rel, text in ctx.iter_code():
for verb, path in fastapi.findall(text):
Expand Down
11 changes: 11 additions & 0 deletions tests/test_pentest_regressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,17 @@ def test_consistent_policy_no_drift(self):
self.assertEqual(out["drift"], [])
self.assertTrue(out["consistent"])

def test_lowercase_only_subset_is_drift(self):
# Regression: under re.I the `A-Z` range also matched `a-z`, so a lower-ONLY sibling was
# mis-counted as requiring uppercase too — equal to the upper+lower strong policy, hiding the
# drift. A lower-only block is a strict subset and MUST be flagged.
strong = "const s = z.object({ password: z.string().min(8).regex(/(?=.*[A-Z])(?=.*[a-z])/) });\n"
lower_only = "const s = z.object({ password: z.string().min(8).regex(/(?=.*[a-z])/) });\n"
out = PolicyConsistencyExtractor().extract(repo({
"change-password.ts": strong, "create-user.ts": lower_only}), {})
self.assertFalse(out["consistent"])
self.assertTrue(any(d["file"] == "create-user.ts" for d in out["drift"]))


class ClientIntegrityTests(unittest.TestCase): # agent-wallet MITB
WALLET = ("'use client'\nexport const R=({walletAddress})=> <div>{walletAddress}"
Expand Down
15 changes: 15 additions & 0 deletions tests/test_recon.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,18 @@ def test_fallback_express_no_noir(self):
self.assertIn(("GET", "/api/users/{id}"), paths)
self.assertIn(("POST", "/api/groups/{groupId}/items"), paths)

def test_fallback_flask_multiroute_no_route_dropped(self):
# Regression: a greedy DOTALL methods= group used to reach across the file, mis-assigning
# the LAST methods=[...] to the FIRST route and swallowing every route in between. Each
# @route must be parsed independently — /b must survive and keep ITS own POST.
d = Path(tempfile.mkdtemp())
(d / "app.py").write_text(
'@app.route("/a")\ndef a(): ...\n'
'@app.route("/b", methods=["POST"])\ndef b(): ...\n'
'@app.route("/c")\ndef c(): ...\n')
got = {(r["method"], r["path"]) for r in routes._fallback_regex(RepoContext(d))}
self.assertEqual(got, {("GET", "/a"), ("POST", "/b"), ("GET", "/c")})

def test_router_heuristic_catches_modern_routers_and_guards_fps(self):
# P1: generic router-call heuristic — hand-rolled / itty / Hono / Workers, ANY object name.
d = Path(tempfile.mkdtemp())
Expand Down Expand Up @@ -685,6 +697,9 @@ def test_gha_injection_in_run_flagged(self):
k = [f for f in out["findings"] if f["kind"] == "gha-script-injection"]
self.assertTrue(k)
self.assertEqual(k[0]["severity"], "HIGH")
# the matched context is already a full `${{ github.… }}` string — don't re-prefix `github.`
self.assertNotIn("github.${{", k[0]["detail"])
self.assertIn("${{ github.event.pull_request.title }}", k[0]["detail"])

def test_gha_context_in_if_not_flagged(self):
out = self._gha("jobs:\n b:\n if: ${{ github.event.pull_request.title == 'x' }}\n"
Expand Down