diff --git a/app/cli/core/agent.py b/app/cli/core/agent.py
index fa44fc3..5e88863 100644
--- a/app/cli/core/agent.py
+++ b/app/cli/core/agent.py
@@ -150,19 +150,43 @@ def _check_api_key() -> None:
 
 
 def _parse_methods(raw: str) -> tuple[str, str]:
-    """Extract load() and predict() bodies from raw LLM output."""
+    """Extract load() and predict() bodies from raw LLM output.
+    
+    Enforces the system prompt contract:
+    - Return ONLY the two method bodies as plain Python
+    - No class wrapper, no markdown fences
+    - Methods must start at column 0 (not indented)
+    """
     # Strip markdown fences if the model added them anyway
     raw = re.sub(r"```(?:python)?", "", raw).replace("```", "").strip()
 
-    load_match = re.search(r"(def load\(self\).*?)(?=\ndef |\Z)", raw, re.DOTALL)
-    predict_match = re.search(r"(def predict\(self,.*?)(?=\ndef |\Z)", raw, re.DOTALL)
-
-    if not load_match or not predict_match:
+    # Split on 'def ' at start of line (no leading whitespace)
+    # If LLM adds indentation (class wrapper), this will fail—as intended
+    blocks = re.split(r"(?=^def )", raw, flags=re.MULTILINE)
+    methods = {}
+
+    for block in blocks:
+        block = block.strip()
+        if not block:
+            continue
+        if block.startswith("def load(self)"):
+            methods["load"] = block
+        elif block.startswith("def predict(self,"):
+            methods["predict"] = block
+        else:
+            # Non-method content (e.g., class wrapper, trailing text)
+            # Reject to enforce system prompt compliance
+            raise ValueError(
+                f"Unexpected content in LLM output (must be exactly two method bodies, "
+                f"no class wrapper or trailing text):\n{raw}"
+            )
+
+    if "load" not in methods or "predict" not in methods:
         raise ValueError(
             f"Could not parse load() and predict() from LLM output:\n{raw}"
         )
 
-    return load_match.group(1).strip(), predict_match.group(1).strip()
+    return methods["load"], methods["predict"]
 
 
 @dataclass
diff --git a/tests/test_cli_phase3_agent.py b/tests/test_cli_phase3_agent.py
index efa1425..8af604d 100644
--- a/tests/test_cli_phase3_agent.py
+++ b/tests/test_cli_phase3_agent.py
@@ -140,6 +140,55 @@ def test_parse_methods_missing_load_raises():
         _parse_methods(raw)
 
 
+def test_parse_methods_no_blank_line_between():
+    """Valid format: LLM omits blank line between methods but starts at column 0."""
+    from app.cli.core.agent import _parse_methods
+    raw = (
+        "def load(self) -> None:\n"
+        "    import joblib\n"
+        "    self._model = joblib.load('x')\n"
+        "def predict(self, x):\n"
+        "    return self._model.predict([x])[0]\n"
+    )
+    load, predict = _parse_methods(raw)
+    assert "def load(self)" in load
+    assert "self._model" in load
+    assert "def predict(self," in predict
+
+
+def test_parse_methods_class_wrapped_raises():
+    """Invalid: LLM wraps methods in class (violates system prompt)."""
+    from app.cli.core.agent import _parse_methods
+    raw = (
+        "class GeneratedModel:\n"
+        "    def load(self) -> None:\n"
+        "        import joblib\n"
+        "        self._model = joblib.load('x')\n"
+        "\n"
+        "    def predict(self, x):\n"
+        "        return self._model.predict([x])[0]\n"
+    )
+    with pytest.raises(ValueError, match="no class wrapper"):
+        _parse_methods(raw)
+
+
+def test_parse_methods_trailing_text_raises():
+    """Invalid: LLM adds trailing content (violates 'ONLY the two method bodies')."""
+    from app.cli.core.agent import _parse_methods
+    raw = (
+        "def load(self) -> None:\n"
+        "    import joblib\n"
+        "    self._model = joblib.load('x')\n"
+        "\n"
+        "def predict(self, x):\n"
+        "    return self._model.predict([x])[0]\n"
+        "\n"
+        "# This model works great for sentiment analysis.\n"
+    )
+    with pytest.raises(ValueError, match="Unexpected content"):
+        _parse_methods(raw)
+
+
 # ---------------------------------------------------------------------------
 # generate() — mocked Groq client
 # ---------------------------------------------------------------------------