From de4bf06b5bb7d745a3e5db6f9dfa553c7e0bf809 Mon Sep 17 00:00:00 2001 From: Atharva Kulkarni Date: Thu, 21 May 2026 12:47:47 +0530 Subject: [PATCH 1/4] fix(#21): Replace lookahead regex with explicit def boundary splitting - Split on ^def boundaries using MULTILINE flag instead of lookahead - Avoids conflating methods when LLM wraps output in class body - Handles missing blank lines between methods - Properly isolates trailing text after predict() - Validates both load and predict are found, raising clear error if not This fixes: 1. Class-wrapped output where both methods get captured together 2. Missing blank line handling (no blank line = no \ndef boundary) 3. Trailing text after predict being included in the method body Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" --- app/cli/core/agent.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/app/cli/core/agent.py b/app/cli/core/agent.py index fa44fc3..bf422ad 100644 --- a/app/cli/core/agent.py +++ b/app/cli/core/agent.py @@ -154,15 +154,23 @@ def _parse_methods(raw: str) -> tuple[str, str]: # Strip markdown fences if the model added them anyway raw = re.sub(r"```(?:python)?", "", raw).replace("```", "").strip() - load_match = re.search(r"(def load\(self\).*?)(?=\ndef |\Z)", raw, re.DOTALL) - predict_match = re.search(r"(def predict\(self,.*?)(?=\ndef |\Z)", raw, re.DOTALL) - - if not load_match or not predict_match: + # Split on explicit 'def ' boundaries (multiline mode to match start of line) + blocks = re.split(r"(?=^def )", raw, flags=re.MULTILINE) + methods = {} + + for block in blocks: + block = block.strip() + if block.startswith("def load(self)"): + methods["load"] = block + elif block.startswith("def predict(self,"): + methods["predict"] = block + + if "load" not in methods or "predict" not in methods: raise ValueError( f"Could not parse load() and predict() from LLM output:\n{raw}" ) - return load_match.group(1).strip(), predict_match.group(1).strip() + return methods["load"], methods["predict"] @dataclass From 34f2f5f00216e29cac7a506e8fbf35ae426087aa Mon Sep 17 00:00:00 2001 From: Atharva Kulkarni Date: Thu, 21 May 2026 12:48:11 +0530 Subject: [PATCH 2/4] test: Add edge case tests for _parse_methods Add tests for the three main issues the fix addresses: - Class-wrapped output (LLM wraps methods in class despite prompt) - Missing blank line between methods - Trailing text after predict method These tests verify that the new explicit boundary splitting approach correctly handles all edge cases without breaking existing functionality. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>" --- tests/test_cli_phase3_agent.py | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/test_cli_phase3_agent.py b/tests/test_cli_phase3_agent.py index efa1425..1267735 100644 --- a/tests/test_cli_phase3_agent.py +++ b/tests/test_cli_phase3_agent.py @@ -140,6 +140,60 @@ def test_parse_methods_missing_load_raises(): _parse_methods(raw) +def test_parse_methods_no_blank_line_between(): + """Handle case where LLM omits blank line between methods.""" + from app.cli.core.agent import _parse_methods + raw = ( + "def load(self) -> None:\n" + " import joblib\n" + " self._model = joblib.load('x')\n" + "def predict(self, x):\n" + " return self._model.predict([x])[0]\n" + ) + load, predict = _parse_methods(raw) + assert "def load(self)" in load + assert "self._model" in load + assert "def predict(self," in predict + + +def test_parse_methods_class_wrapped(): + """Handle case where LLM wraps methods in a class despite system prompt.""" + from app.cli.core.agent import _parse_methods + raw = ( + "class GeneratedModel:\n" + " def load(self) -> None:\n" + " import joblib\n" + " self._model = joblib.load('x')\n" + "\n" + " def predict(self, x):\n" + " return self._model.predict([x])[0]\n" + ) + load, predict = _parse_methods(raw) + assert "def load(self)" in load + assert "self._model" in load + assert "def predict(self," in predict + + +def test_parse_methods_trailing_text(): + """Handle case where LLM adds trailing text after predict.""" + from app.cli.core.agent import _parse_methods + raw = ( + "def load(self) -> None:\n" + " import joblib\n" + " self._model = joblib.load('x')\n" + "\n" + "def predict(self, x):\n" + " return self._model.predict([x])[0]\n" + "\n" + "# This model works great for sentiment analysis.\n" + ) + load, predict = _parse_methods(raw) + assert "def load(self)" in load + assert "self._model" in load + assert "def predict(self," in predict + assert "# This model" not in predict + + # --------------------------------------------------------------------------- # generate() — mocked Groq client # --------------------------------------------------------------------------- From 2435afbc4933f93cdb713a398663a90279520f30 Mon Sep 17 00:00:00 2001 From: Atharva Kulkarni Date: Thu, 21 May 2026 12:57:38 +0530 Subject: [PATCH 3/4] fix(#21): Enforce strict parsing of LLM method output Split on explicit def boundaries and reject non-compliant output. Prevents conflating methods when LLM violates system prompt (e.g., class wrapper, missing blank lines, trailing text). Raises clear error to trigger LLM retry instead of silent failures. - Split on ^def (start-of-line only, rejects indentation) - Reject trailing content after predict - Validate exactly two methods with correct signatures - Error messages direct to system prompt requirements --- app/cli/core/agent.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/app/cli/core/agent.py b/app/cli/core/agent.py index bf422ad..5e88863 100644 --- a/app/cli/core/agent.py +++ b/app/cli/core/agent.py @@ -150,20 +150,36 @@ def _check_api_key() -> None: def _parse_methods(raw: str) -> tuple[str, str]: - """Extract load() and predict() bodies from raw LLM output.""" + """Extract load() and predict() bodies from raw LLM output. + + Enforces the system prompt contract: + - Return ONLY the two method bodies as plain Python + - No class wrapper, no markdown fences + - Methods must start at column 0 (not indented) + """ # Strip markdown fences if the model added them anyway raw = re.sub(r"```(?:python)?", "", raw).replace("```", "").strip() - # Split on explicit 'def ' boundaries (multiline mode to match start of line) + # Split on 'def ' at start of line (no leading whitespace) + # If LLM adds indentation (class wrapper), this will fail—as intended blocks = re.split(r"(?=^def )", raw, flags=re.MULTILINE) methods = {} for block in blocks: block = block.strip() + if not block: + continue if block.startswith("def load(self)"): methods["load"] = block elif block.startswith("def predict(self,"): methods["predict"] = block + else: + # Non-method content (e.g., class wrapper, trailing text) + # Reject to enforce system prompt compliance + raise ValueError( + f"Unexpected content in LLM output (must be exactly two method bodies, " + f"no class wrapper or trailing text):\n{raw}" + ) if "load" not in methods or "predict" not in methods: raise ValueError( From 4fa1829b5ce28b22f13df59dbf1aa2bb72ecfc5c Mon Sep 17 00:00:00 2001 From: Atharva Kulkarni Date: Thu, 21 May 2026 12:57:59 +0530 Subject: [PATCH 4/4] test: Enforce strict parsing with rejection tests Replace lenient edge-case tests with strict validation tests: - test_parse_methods_class_wrapped_raises: Rejects indented methods (class wrapper) - test_parse_methods_trailing_text_raises: Rejects trailing content Keep test_parse_methods_no_blank_line_between: Valid format with missing blank line. Enforces system prompt contract: "Return ONLY the two method bodies as plain Python" --- tests/test_cli_phase3_agent.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/tests/test_cli_phase3_agent.py b/tests/test_cli_phase3_agent.py index 1267735..8af604d 100644 --- a/tests/test_cli_phase3_agent.py +++ b/tests/test_cli_phase3_agent.py @@ -141,7 +141,7 @@ def test_parse_methods_missing_load_raises(): def test_parse_methods_no_blank_line_between(): - """Handle case where LLM omits blank line between methods.""" + """Valid format: LLM omits blank line between methods but starts at column 0.""" from app.cli.core.agent import _parse_methods raw = ( "def load(self) -> None:\n" @@ -156,8 +156,8 @@ def test_parse_methods_no_blank_line_between(): assert "def predict(self," in predict -def test_parse_methods_class_wrapped(): - """Handle case where LLM wraps methods in a class despite system prompt.""" +def test_parse_methods_class_wrapped_raises(): + """Invalid: LLM wraps methods in class (violates system prompt).""" from app.cli.core.agent import _parse_methods raw = ( "class GeneratedModel:\n" @@ -168,14 +168,12 @@ def test_parse_methods_class_wrapped(): " def predict(self, x):\n" " return self._model.predict([x])[0]\n" ) - load, predict = _parse_methods(raw) - assert "def load(self)" in load - assert "self._model" in load - assert "def predict(self," in predict + with pytest.raises(ValueError, match="no class wrapper"): + _parse_methods(raw) -def test_parse_methods_trailing_text(): - """Handle case where LLM adds trailing text after predict.""" +def test_parse_methods_trailing_text_raises(): + """Invalid: LLM adds trailing content (violates 'ONLY the two method bodies').""" from app.cli.core.agent import _parse_methods raw = ( "def load(self) -> None:\n" @@ -187,11 +185,8 @@ def test_parse_methods_trailing_text(): "\n" "# This model works great for sentiment analysis.\n" ) - load, predict = _parse_methods(raw) - assert "def load(self)" in load - assert "self._model" in load - assert "def predict(self," in predict - assert "# This model" not in predict + with pytest.raises(ValueError, match="Unexpected content"): + _parse_methods(raw) # ---------------------------------------------------------------------------