Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions app/cli/core/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,19 +150,43 @@ def _check_api_key() -> None:


def _parse_methods(raw: str) -> tuple[str, str]:
"""Extract load() and predict() bodies from raw LLM output."""
"""Extract load() and predict() bodies from raw LLM output.

Enforces the system prompt contract:
- Return ONLY the two method bodies as plain Python
- No class wrapper, no markdown fences
- Methods must start at column 0 (not indented)
"""
# Strip markdown fences if the model added them anyway
raw = re.sub(r"```(?:python)?", "", raw).replace("```", "").strip()

load_match = re.search(r"(def load\(self\).*?)(?=\ndef |\Z)", raw, re.DOTALL)
predict_match = re.search(r"(def predict\(self,.*?)(?=\ndef |\Z)", raw, re.DOTALL)

if not load_match or not predict_match:
# Split on 'def ' at start of line (no leading whitespace)
# If LLM adds indentation (class wrapper), this will fail—as intended
blocks = re.split(r"(?=^def )", raw, flags=re.MULTILINE)
methods = {}

for block in blocks:
block = block.strip()
if not block:
continue
if block.startswith("def load(self)"):
methods["load"] = block
elif block.startswith("def predict(self,"):
methods["predict"] = block
else:
# Non-method content (e.g., class wrapper, trailing text)
# Reject to enforce system prompt compliance
raise ValueError(
f"Unexpected content in LLM output (must be exactly two method bodies, "
f"no class wrapper or trailing text):\n{raw}"
)

if "load" not in methods or "predict" not in methods:
raise ValueError(
f"Could not parse load() and predict() from LLM output:\n{raw}"
)

return load_match.group(1).strip(), predict_match.group(1).strip()
return methods["load"], methods["predict"]


@dataclass
Expand Down
49 changes: 49 additions & 0 deletions tests/test_cli_phase3_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,55 @@ def test_parse_methods_missing_load_raises():
_parse_methods(raw)


def test_parse_methods_no_blank_line_between():
"""Valid format: LLM omits blank line between methods but starts at column 0."""
from app.cli.core.agent import _parse_methods
raw = (
"def load(self) -> None:\n"
" import joblib\n"
" self._model = joblib.load('x')\n"
"def predict(self, x):\n"
" return self._model.predict([x])[0]\n"
)
load, predict = _parse_methods(raw)
assert "def load(self)" in load
assert "self._model" in load
assert "def predict(self," in predict


def test_parse_methods_class_wrapped_raises():
"""Invalid: LLM wraps methods in class (violates system prompt)."""
from app.cli.core.agent import _parse_methods
raw = (
"class GeneratedModel:\n"
" def load(self) -> None:\n"
" import joblib\n"
" self._model = joblib.load('x')\n"
"\n"
" def predict(self, x):\n"
" return self._model.predict([x])[0]\n"
)
with pytest.raises(ValueError, match="no class wrapper"):
_parse_methods(raw)


def test_parse_methods_trailing_text_raises():
"""Invalid: LLM adds trailing content (violates 'ONLY the two method bodies')."""
from app.cli.core.agent import _parse_methods
raw = (
"def load(self) -> None:\n"
" import joblib\n"
" self._model = joblib.load('x')\n"
"\n"
"def predict(self, x):\n"
" return self._model.predict([x])[0]\n"
"\n"
"# This model works great for sentiment analysis.\n"
)
with pytest.raises(ValueError, match="Unexpected content"):
_parse_methods(raw)


# ---------------------------------------------------------------------------
# generate() — mocked Groq client
# ---------------------------------------------------------------------------
Expand Down
Loading