diff --git a/app/heuristics/__init__.py b/app/heuristics/__init__.py index 7532b397..6504ac81 100644 --- a/app/heuristics/__init__.py +++ b/app/heuristics/__init__.py @@ -704,7 +704,11 @@ def detect_coding_context(text: str) -> bool: r"istisna", ] -_JOINED_LIVE_DEBUG = re.compile("|".join(LIVE_DEBUG_KEYWORDS)) +# Word boundaries prevent substring false positives: "logs?" must not match +# inside "login"/"blog"/"catalog", "mre" inside unrelated tokens, etc. The +# keywords intentionally contain regex (e.g. "logs?"), so we wrap with \b rather +# than re.escape. Mirrors the fix already applied to _TEACHING_RE above. +_JOINED_LIVE_DEBUG = re.compile("|".join(rf"\b{keyword}\b" for keyword in LIVE_DEBUG_KEYWORDS)) def detect_live_debug(text: str) -> bool: diff --git a/tests/test_detect_live_debug.py b/tests/test_detect_live_debug.py index 0dce4b15..8a5ce9a1 100644 --- a/tests/test_detect_live_debug.py +++ b/tests/test_detect_live_debug.py @@ -11,3 +11,20 @@ def test_detect_live_debug_false_cases(): assert detect_live_debug("How do I write a Python function?") is False assert detect_live_debug("Explain quantum physics") is False assert detect_live_debug("") is False + + +def test_detect_live_debug_ignores_substring_false_positives(): + # Keywords like "logs?" must not substring-match unrelated words such as + # "login", "blog", "catalog", or "dialog" (regression for the missing + # word boundaries in _JOINED_LIVE_DEBUG). + assert detect_live_debug("Implement secure login sessions") is False + assert detect_live_debug("write a blog post") is False + assert detect_live_debug("browse the catalog") is False + assert detect_live_debug("open the dialog box") is False + + +def test_detect_live_debug_matches_log_and_debug_keywords(): + # Genuine log/debug cues must still be detected once word boundaries are added. + assert detect_live_debug("check the error log") is True + assert detect_live_debug("attach the logs") is True + assert detect_live_debug("help me debug this stack trace") is True