From de4bf06b5bb7d745a3e5db6f9dfa553c7e0bf809 Mon Sep 17 00:00:00 2001
From: Atharva Kulkarni <aa.kulkarni11105@gmail.com>
Date: Thu, 21 May 2026 12:47:47 +0530
Subject: [PATCH 1/4] fix(#21): Replace lookahead regex with explicit def
 boundary splitting

- Split on ^def boundaries using MULTILINE flag instead of lookahead
- Avoids conflating methods when LLM wraps output in class body
- Handles missing blank lines between methods
- Properly isolates trailing text after predict()
- Validates both load and predict are found, raising clear error if not

This fixes:
1. Class-wrapped output where both methods get captured together
2. Missing blank line handling (no blank line = no \ndef boundary)
3. Trailing text after predict being included in the method body

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>"
---
 app/cli/core/agent.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/app/cli/core/agent.py b/app/cli/core/agent.py
index fa44fc3..bf422ad 100644
--- a/app/cli/core/agent.py
+++ b/app/cli/core/agent.py
@@ -154,15 +154,23 @@ def _parse_methods(raw: str) -> tuple[str, str]:
     # Strip markdown fences if the model added them anyway
     raw = re.sub(r"```(?:python)?", "", raw).replace("```", "").strip()
 
-    load_match = re.search(r"(def load\(self\).*?)(?=\ndef |\Z)", raw, re.DOTALL)
-    predict_match = re.search(r"(def predict\(self,.*?)(?=\ndef |\Z)", raw, re.DOTALL)
-
-    if not load_match or not predict_match:
+    # Split on explicit 'def ' boundaries (multiline mode to match start of line)
+    blocks = re.split(r"(?=^def )", raw, flags=re.MULTILINE)
+    methods = {}
+
+    for block in blocks:
+        block = block.strip()
+        if block.startswith("def load(self)"):
+            methods["load"] = block
+        elif block.startswith("def predict(self,"):
+            methods["predict"] = block
+
+    if "load" not in methods or "predict" not in methods:
         raise ValueError(
             f"Could not parse load() and predict() from LLM output:\n{raw}"
         )
 
-    return load_match.group(1).strip(), predict_match.group(1).strip()
+    return methods["load"], methods["predict"]
 
 
 @dataclass

From 34f2f5f00216e29cac7a506e8fbf35ae426087aa Mon Sep 17 00:00:00 2001
From: Atharva Kulkarni <aa.kulkarni11105@gmail.com>
Date: Thu, 21 May 2026 12:48:11 +0530
Subject: [PATCH 2/4] test: Add edge case tests for _parse_methods

Add tests for the three main issues the fix addresses:
- Class-wrapped output (LLM wraps methods in class despite prompt)
- Missing blank line between methods
- Trailing text after predict method

These tests verify that the new explicit boundary splitting approach
correctly handles all edge cases without breaking existing functionality.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>"
---
 tests/test_cli_phase3_agent.py | 54 ++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/tests/test_cli_phase3_agent.py b/tests/test_cli_phase3_agent.py
index efa1425..1267735 100644
--- a/tests/test_cli_phase3_agent.py
+++ b/tests/test_cli_phase3_agent.py
@@ -140,6 +140,60 @@ def test_parse_methods_missing_load_raises():
         _parse_methods(raw)
 
 
+def test_parse_methods_no_blank_line_between():
+    """Handle case where LLM omits blank line between methods."""
+    from app.cli.core.agent import _parse_methods
+    raw = (
+        "def load(self) -> None:\n"
+        "    import joblib\n"
+        "    self._model = joblib.load('x')\n"
+        "def predict(self, x):\n"
+        "    return self._model.predict([x])[0]\n"
+    )
+    load, predict = _parse_methods(raw)
+    assert "def load(self)" in load
+    assert "self._model" in load
+    assert "def predict(self," in predict
+
+
+def test_parse_methods_class_wrapped():
+    """Handle case where LLM wraps methods in a class despite system prompt."""
+    from app.cli.core.agent import _parse_methods
+    raw = (
+        "class GeneratedModel:\n"
+        "    def load(self) -> None:\n"
+        "        import joblib\n"
+        "        self._model = joblib.load('x')\n"
+        "\n"
+        "    def predict(self, x):\n"
+        "        return self._model.predict([x])[0]\n"
+    )
+    load, predict = _parse_methods(raw)
+    assert "def load(self)" in load
+    assert "self._model" in load
+    assert "def predict(self," in predict
+
+
+def test_parse_methods_trailing_text():
+    """Handle case where LLM adds trailing text after predict."""
+    from app.cli.core.agent import _parse_methods
+    raw = (
+        "def load(self) -> None:\n"
+        "    import joblib\n"
+        "    self._model = joblib.load('x')\n"
+        "\n"
+        "def predict(self, x):\n"
+        "    return self._model.predict([x])[0]\n"
+        "\n"
+        "# This model works great for sentiment analysis.\n"
+    )
+    load, predict = _parse_methods(raw)
+    assert "def load(self)" in load
+    assert "self._model" in load
+    assert "def predict(self," in predict
+    assert "# This model" not in predict
+
+
 # ---------------------------------------------------------------------------
 # generate() — mocked Groq client
 # ---------------------------------------------------------------------------

From 2435afbc4933f93cdb713a398663a90279520f30 Mon Sep 17 00:00:00 2001
From: Atharva Kulkarni <aa.kulkarni11105@gmail.com>
Date: Thu, 21 May 2026 12:57:38 +0530
Subject: [PATCH 3/4] fix(#21): Enforce strict parsing of LLM method output

Split on explicit def boundaries and reject non-compliant output.
Prevents conflating methods when LLM violates system prompt
(e.g., class wrapper, missing blank lines, trailing text).

Raises clear error to trigger LLM retry instead of silent failures.

- Split on ^def (start-of-line only, rejects indentation)
- Reject trailing content after predict
- Validate exactly two methods with correct signatures
- Error messages direct to system prompt requirements
---
 app/cli/core/agent.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/app/cli/core/agent.py b/app/cli/core/agent.py
index bf422ad..5e88863 100644
--- a/app/cli/core/agent.py
+++ b/app/cli/core/agent.py
@@ -150,20 +150,36 @@ def _check_api_key() -> None:
 
 
 def _parse_methods(raw: str) -> tuple[str, str]:
-    """Extract load() and predict() bodies from raw LLM output."""
+    """Extract load() and predict() bodies from raw LLM output.
+    
+    Enforces the system prompt contract:
+    - Return ONLY the two method bodies as plain Python
+    - No class wrapper, no markdown fences
+    - Methods must start at column 0 (not indented)
+    """
     # Strip markdown fences if the model added them anyway
     raw = re.sub(r"```(?:python)?", "", raw).replace("```", "").strip()
 
-    # Split on explicit 'def ' boundaries (multiline mode to match start of line)
+    # Split on 'def ' at start of line (no leading whitespace)
+    # If LLM adds indentation (class wrapper), this will fail—as intended
     blocks = re.split(r"(?=^def )", raw, flags=re.MULTILINE)
     methods = {}
 
     for block in blocks:
         block = block.strip()
+        if not block:
+            continue
         if block.startswith("def load(self)"):
             methods["load"] = block
         elif block.startswith("def predict(self,"):
             methods["predict"] = block
+        else:
+            # Non-method content (e.g., class wrapper, trailing text)
+            # Reject to enforce system prompt compliance
+            raise ValueError(
+                f"Unexpected content in LLM output (must be exactly two method bodies, "
+                f"no class wrapper or trailing text):\n{raw}"
+            )
 
     if "load" not in methods or "predict" not in methods:
         raise ValueError(

From 4fa1829b5ce28b22f13df59dbf1aa2bb72ecfc5c Mon Sep 17 00:00:00 2001
From: Atharva Kulkarni <aa.kulkarni11105@gmail.com>
Date: Thu, 21 May 2026 12:57:59 +0530
Subject: [PATCH 4/4] test: Enforce strict parsing with rejection tests

Replace lenient edge-case tests with strict validation tests:
- test_parse_methods_class_wrapped_raises: Rejects indented methods (class wrapper)
- test_parse_methods_trailing_text_raises: Rejects trailing content

Keep test_parse_methods_no_blank_line_between: Valid format with missing blank line.

Enforces system prompt contract: "Return ONLY the two method bodies as plain Python"
---
 tests/test_cli_phase3_agent.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/tests/test_cli_phase3_agent.py b/tests/test_cli_phase3_agent.py
index 1267735..8af604d 100644
--- a/tests/test_cli_phase3_agent.py
+++ b/tests/test_cli_phase3_agent.py
@@ -141,7 +141,7 @@ def test_parse_methods_missing_load_raises():
 
 
 def test_parse_methods_no_blank_line_between():
-    """Handle case where LLM omits blank line between methods."""
+    """Valid format: LLM omits blank line between methods but starts at column 0."""
     from app.cli.core.agent import _parse_methods
     raw = (
         "def load(self) -> None:\n"
@@ -156,8 +156,8 @@ def test_parse_methods_no_blank_line_between():
     assert "def predict(self," in predict
 
 
-def test_parse_methods_class_wrapped():
-    """Handle case where LLM wraps methods in a class despite system prompt."""
+def test_parse_methods_class_wrapped_raises():
+    """Invalid: LLM wraps methods in class (violates system prompt)."""
     from app.cli.core.agent import _parse_methods
     raw = (
         "class GeneratedModel:\n"
@@ -168,14 +168,12 @@ def test_parse_methods_class_wrapped():
         "    def predict(self, x):\n"
         "        return self._model.predict([x])[0]\n"
     )
-    load, predict = _parse_methods(raw)
-    assert "def load(self)" in load
-    assert "self._model" in load
-    assert "def predict(self," in predict
+    with pytest.raises(ValueError, match="no class wrapper"):
+        _parse_methods(raw)
 
 
-def test_parse_methods_trailing_text():
-    """Handle case where LLM adds trailing text after predict."""
+def test_parse_methods_trailing_text_raises():
+    """Invalid: LLM adds trailing content (violates 'ONLY the two method bodies')."""
     from app.cli.core.agent import _parse_methods
     raw = (
         "def load(self) -> None:\n"
@@ -187,11 +185,8 @@ def test_parse_methods_trailing_text():
         "\n"
         "# This model works great for sentiment analysis.\n"
     )
-    load, predict = _parse_methods(raw)
-    assert "def load(self)" in load
-    assert "self._model" in load
-    assert "def predict(self," in predict
-    assert "# This model" not in predict
+    with pytest.raises(ValueError, match="Unexpected content"):
+        _parse_methods(raw)
 
 
 # ---------------------------------------------------------------------------