From 95a734eb465ad4ee2fd035a56797a3184bd4312b Mon Sep 17 00:00:00 2001 From: Gadi Evron Date: Thu, 18 Jun 2026 01:21:58 +0300 Subject: [PATCH] fix(reachability): anchor user-input entry-point patterns with word boundary The FastAPI input pattern (Query|Body|Form|File|Header|Cookie)\s*\( and the ArgumentParser pattern lacked a leading \b, so any identifier ending in one of those words (setCookie(, PQsendQuery(, getHeader(, MyArgumentParser() matched as a user-input source and was seeded as a false remote-web entry point across C/Go/PHP/Python repos. Anchor both with \b; qualified forms like fastapi.Query( still match (the '.' provides the boundary). Co-Authored-By: Claude Opus 4.8 (1M context) --- ...test_entry_point_input_pattern_boundary.py | 52 +++++++++++++++++++ .../agentic_enhancer/entry_point_detector.py | 4 +- 2 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 libs/openant-core/tests/test_entry_point_input_pattern_boundary.py diff --git a/libs/openant-core/tests/test_entry_point_input_pattern_boundary.py b/libs/openant-core/tests/test_entry_point_input_pattern_boundary.py new file mode 100644 index 00000000..5de94f8e --- /dev/null +++ b/libs/openant-core/tests/test_entry_point_input_pattern_boundary.py @@ -0,0 +1,52 @@ +"""Regression test for F9: USER_INPUT_PATTERNS FastAPI alternation needs a word boundary. + +The pattern ``(Query|Body|Form|File|Header|Cookie)\\s*\\(`` (no leading ``\\b``) +matches any identifier *ending* in one of those words, so ordinary library +calls like ``setCookie(``, ``PQsendQuery(`` or ``getHeader(`` were flagged as +user-input sources and seeded as false remote-web entry points across C/Go/ +PHP/Python repos. The fix anchors the alternation with ``\\b`` so only the +standalone FastAPI dependency symbols match. +""" + +from __future__ import annotations + +import re + +from utilities.agentic_enhancer.entry_point_detector import USER_INPUT_PATTERNS + + +def _fastapi_pattern(): + """The FastAPI Query/Body/.../Cookie alternation from USER_INPUT_PATTERNS.""" + for p in USER_INPUT_PATTERNS: + if "Cookie" in p and "Query" in p: + return re.compile(p) + raise AssertionError("FastAPI input pattern not found in USER_INPUT_PATTERNS") + + +FALSE_POSITIVES = [ + "res.setCookie(token)", + "PQsendQuery(conn, sql)", + "req.getHeader('X')", + "parseMultipartFile(x)", + "renderBody(html)", + "buildForm(fields)", +] + +TRUE_POSITIVES = [ + "def handler(c: str = Cookie(None)): ...", + "def handler(q: str = Query(...)): ...", + "def handler(b: Item = Body(...)): ...", + "def handler(h: str = Header(None)): ...", +] + + +def test_input_pattern_rejects_substring_false_positives(): + pat = _fastapi_pattern() + for code in FALSE_POSITIVES: + assert pat.search(code) is None, f"false positive: {code!r} matched {pat.pattern!r}" + + +def test_input_pattern_still_matches_standalone_symbols(): + pat = _fastapi_pattern() + for code in TRUE_POSITIVES: + assert pat.search(code) is not None, f"regressed: {code!r} no longer matches" diff --git a/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py b/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py index 16df5b5b..c691f9cb 100644 --- a/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py +++ b/libs/openant-core/utilities/agentic_enhancer/entry_point_detector.py @@ -62,7 +62,7 @@ r'request\.environ', # FastAPI r'request\.(query_params|body|json)', - r'(Query|Body|Form|File|Header|Cookie)\s*\(', + r'\b(Query|Body|Form|File|Header|Cookie)\s*\(', # Django r'request\.(GET|POST|data|FILES|body)', r'self\.request\.(GET|POST|data)', @@ -72,7 +72,7 @@ # CLI arguments r'sys\.argv', r'argparse\.', - r'ArgumentParser\s*\(', + r'\bArgumentParser\s*\(', r'click\.(argument|option)', # Standard input r'\binput\s*\(',