diff --git a/src/mantis_agent/gym/learning_runner.py b/src/mantis_agent/gym/learning_runner.py index dc7e026c..5ededaef 100644 --- a/src/mantis_agent/gym/learning_runner.py +++ b/src/mantis_agent/gym/learning_runner.py @@ -22,7 +22,7 @@ import logging import time from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from ..verification.playbook import ( ExtractionPattern, @@ -32,6 +32,9 @@ from ..verification.step_verifier import StepVerifier, VerificationResult from .runner import GymRunner +if TYPE_CHECKING: + from ..sim_envs.cli_integration import EnvSession + logger = logging.getLogger(__name__) @@ -65,6 +68,7 @@ def __init__( grounding: Any = None, on_step: Any = None, max_attempts_per_step: int = 3, + oracle_session: "EnvSession | None" = None, ): self.brain = brain self.env = env @@ -72,6 +76,204 @@ def __init__( self.grounding = grounding self.on_step = on_step self.max_attempts = max_attempts_per_step + # When a sim-env session is attached, the runner replaces + # vision-based ``verify_step`` / ``verify_filter`` / + # ``verify_on_correct_page`` calls with cheap server-side + # probes against ``/__env__/mutations`` plus a URL substring + # check. See :meth:`_oracle_verify_url_signals` and + # :meth:`_oracle_verify_state_change`. The vision verifier + # stays as the fallback for extraction-iteration steps where + # the env can't surface the relevant signal (popup detection, + # gallery traps, etc.). + self.oracle_session = oracle_session + # Monotonic cursor into the env's mutations log so successive + # ``fetch_mutations`` calls only return entries from after + # the previous step. + self._last_mutation_id: int = 0 + + # ── Oracle-path verifier helpers (#594) ───────────────────────────── + + def _env_current_url(self) -> str: + """Best-effort fetch of the env's current URL. + + Different env adapters expose the URL via different accessors: + Playwright + Modal back the property as ``current_url`` (string), + some legacy adapters use ``url`` or a ``get_url()`` method. We + try them in order and return ``""`` if none match — the oracle + verifier degrades gracefully to "verified=False" on an empty + URL rather than crashing the run. + """ + env = self.env + if env is None: + return "" + for attr in ("current_url", "url"): + val = getattr(env, attr, None) + if callable(val): + try: + return str(val() or "") + except Exception: # noqa: BLE001 + continue + if val: + return str(val) + getter = getattr(env, "get_url", None) + if callable(getter): + try: + return str(getter() or "") + except Exception: # noqa: BLE001 + return "" + return "" + + def _oracle_verify_url_signals( + self, + signals: list[str], + ) -> VerificationResult: + """Cheap replacement for ``verify_filter`` / ``verify_on_correct_page``. + + The plan-author hints (``["private seller", "by owner", + "by-owner"]`` etc.) are token substrings expected to appear in + the current URL when the page is in the right state. + Case-insensitive match; ANY signal hit counts as verified. + + Returns ``VerificationResult`` with the same shape as the + vision verifier so callers don't need to branch. + """ + url = self._env_current_url().lower() + if not url: + return VerificationResult( + verified=False, + page_changed=False, + issue="no_url", + suggestion="env did not expose a URL — oracle path can't verify", + confidence=0.5, + details="empty env URL", + ) + if not signals: + return VerificationResult( + verified=True, + page_changed=True, + confidence=0.5, + details=f"no signals supplied; URL={url}", + ) + matches = [s for s in signals if s and s.lower() in url] + verified = bool(matches) + return VerificationResult( + verified=verified, + page_changed=True, + issue="" if verified else "filter_lost", + suggestion="" if verified else ( + f"none of {signals} found in URL {url}" + ), + confidence=0.9 if verified else 0.85, + details=f"matched={matches} url={url}", + ) + + def _oracle_verify_state_change( + self, + expected_ops: set[str] | None = None, + ) -> VerificationResult: + """Cheap replacement for ``verify_step`` on state-changing actions. + + Polls ``GET /__env__/mutations?since=``. If any entries + landed, advances the cursor and returns verified=True (or False + when ``expected_ops`` is set and no entry matches). If no + mutations landed, returns verified=False with + ``issue="no_state_change"``. + + Note this only works for steps the env's harness instruments + with mutations — for boattrader: lead submission, consent set, + phone revealed, env reset. Filter application + plain + navigation are URL-only; callers verify those via + :meth:`_oracle_verify_url_signals` instead. + """ + from ..sim_envs.oracle_client import fetch_mutations, last_mutation_id + + if self.oracle_session is None: + # Caller should have gated on this — fail loud rather than + # silently treating the absence as "verified". + return VerificationResult( + verified=False, + page_changed=False, + issue="no_oracle_session", + confidence=0.0, + details="oracle_session is None", + ) + + resp = fetch_mutations( + self.oracle_session.url, + self.oracle_session.admin_token, + since_id=self._last_mutation_id, + ) + if resp.get("error"): + return VerificationResult( + verified=False, + page_changed=False, + issue="oracle_unreachable", + suggestion=f"fetch_mutations failed: {resp['error']}", + confidence=0.4, + details=resp["error"], + ) + + mutations = resp.get("mutations", []) + if not mutations: + return VerificationResult( + verified=False, + page_changed=False, + issue="no_state_change", + suggestion="mutation log empty — agent's last action had no observable effect", + confidence=0.9, + details="empty mutation delta", + ) + + # Advance cursor BEFORE checking content so a non-matching delta + # still doesn't re-fire on the next poll. + self._last_mutation_id = max(self._last_mutation_id, last_mutation_id(mutations)) + + if expected_ops: + matched = [m for m in mutations + if str(m.get("operation") or "") in expected_ops] + verified = bool(matched) + ops_seen = [m.get("operation") for m in mutations] + return VerificationResult( + verified=verified, + page_changed=True, + issue="" if verified else "wrong_op", + suggestion="" if verified else ( + f"expected one of {sorted(expected_ops)}, got {ops_seen}" + ), + confidence=0.95 if verified else 0.85, + details=f"matched={[m.get('operation') for m in matched]} " + f"all={ops_seen}", + ) + + # No expected_ops filter — any mutation is signal of state change. + return VerificationResult( + verified=True, + page_changed=True, + confidence=0.9, + details=f"{len(mutations)} mutation(s): " + f"{[m.get('operation') for m in mutations]}", + ) + + def _sync_mutation_cursor(self) -> None: + """Advance ``_last_mutation_id`` to the env's current tail + without producing a VerificationResult. + + Called before a phase-start to ignore any prior mutations + (e.g. seeding, reset) so the first step's + :meth:`_oracle_verify_state_change` only sees mutations that + post-date the phase's first action. + """ + if self.oracle_session is None: + return + from ..sim_envs.oracle_client import fetch_mutations, last_mutation_id + resp = fetch_mutations( + self.oracle_session.url, + self.oracle_session.admin_token, + since_id=0, + ) + muts = resp.get("mutations") or [] + if muts: + self._last_mutation_id = last_mutation_id(muts) def learn( self, @@ -141,6 +343,13 @@ def _learn_setup( self.env.reset(task="navigate", start_url=start_url) time.sleep(4) + # On the oracle path, sync the mutation cursor to the env's + # current tail so the first filter step's verification only + # sees mutations from the agent — not the reset that may have + # just stamped ``env_reset``. + if self.oracle_session is not None: + self._sync_mutation_cursor() + # Define atomic filter steps — positive, small instructions atomic_filters = [ ( @@ -182,17 +391,25 @@ def _learn_setup( after = self.env.screenshot() - # Verify this specific filter was applied - verify = self.verifier.verify_step( - before, after, - intent=f"Apply filter: {filter_name}", - action=f"{result.total_steps} steps", - ) + # Verify this specific filter was applied. Oracle path: + # a filter click is a URL change — verify by URL pattern, + # not by mutation log (filters don't stamp mutations in + # the boattrader env). Vision path: before/after screenshot + # diff as before. + if self.oracle_session is not None: + verify = self._oracle_verify_url_signals(verify_signals) + filter_check = verify # same signal on the oracle path + else: + verify = self.verifier.verify_step( + before, after, + intent=f"Apply filter: {filter_name}", + action=f"{result.total_steps} steps", + ) - # Also check page for the expected signals - filter_check = self.verifier.verify_filter( - after, verify_signals, max_results=50000, - ) + # Also check page for the expected signals + filter_check = self.verifier.verify_filter( + after, verify_signals, max_results=50000, + ) step = PlaybookStep( name=filter_name, @@ -233,9 +450,12 @@ def _learn_setup( task_id=f"learn_{filter_name}_retry", ) after_retry = self.env.screenshot() - retry_check = self.verifier.verify_filter( - after_retry, verify_signals, max_results=50000, - ) + if self.oracle_session is not None: + retry_check = self._oracle_verify_url_signals(verify_signals) + else: + retry_check = self.verifier.verify_filter( + after_retry, verify_signals, max_results=50000, + ) step.update_confidence(retry_check.verified) if retry_check.verified: logger.info(" Retry VERIFIED") @@ -263,12 +483,19 @@ def _learn_extraction( # Before: on search results page before = self.env.screenshot() - # Verify we're on a results page - page_check = self.verifier.verify_on_correct_page( - before, - expected_description="Search results page with boat listings", - expected_signals=["listing", "boat", "price", "photo"], - ) + # Verify we're on a results page. Oracle path: URL pattern + # check (results page URLs contain ``/boats`` or similar). + # Vision path: image-content check as before. + if self.oracle_session is not None: + page_check = self._oracle_verify_url_signals( + ["/boats", "/listings", "/search", "boats/"], + ) + else: + page_check = self.verifier.verify_on_correct_page( + before, + expected_description="Search results page with boat listings", + expected_signals=["listing", "boat", "price", "photo"], + ) if not page_check.verified: logger.warning(f" Not on results page: {page_check.issue}") playbook.known_traps.append(f"Lost results page at iteration {i}: {page_check.issue}") @@ -293,7 +520,12 @@ def _learn_extraction( # After: check what happened after = self.env.screenshot() - # Verify the iteration + # Verify the iteration — extraction reads the page and + # produces no mutation, so the oracle path can't tell + # success from failure here. Vision still adds value: + # detects gallery traps / dealer-page drift / popups even + # when the URL hasn't changed. Stays on the vision path + # regardless of ``oracle_session``. verify = self.verifier.verify_step( before, after, intent=f"Extract data from listing #{i}", diff --git a/tests/test_learning_runner_oracle_path.py b/tests/test_learning_runner_oracle_path.py new file mode 100644 index 00000000..d75e20d1 --- /dev/null +++ b/tests/test_learning_runner_oracle_path.py @@ -0,0 +1,330 @@ +"""Unit tests for the oracle-path verifier helpers on ``LearningRunner``. + +Issue #594 wired the per-step oracle primitives shipped in #590 / PR #593 +into ``gym/learning_runner.py``. These tests cover the three private +helpers added there: + +* ``_env_current_url`` — accessor that tolerates env adapters with + different URL conventions (``current_url`` callable / attribute, + ``url`` attribute, ``get_url()`` method, or no surface at all). +* ``_oracle_verify_url_signals`` — case-insensitive substring check + used in place of ``verify_filter`` and ``verify_on_correct_page``. +* ``_oracle_verify_state_change`` — mutation-log delta check used in + place of ``verify_step`` on state-changing actions. +* ``_sync_mutation_cursor`` — phase-start helper that advances the + cursor past any reset/seed mutations so the first step's verifier + doesn't see them. + +The LearningRunner itself isn't booted — these tests instantiate it +directly with stub env / verifier objects and exercise the private +helpers. End-to-end `learn()` coverage stays for the integration +suite under ``tests/integration/`` (not in this repo today). +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import patch + +# Import ``mantis_agent.rewards`` ahead of ``recipes.marketplace_listings`` +# to sidestep the latent circular noted in #590's PR — not needed here +# (no rewards imports) but keeping the ordering consistent. +from mantis_agent.gym.learning_runner import LearningRunner + + +# ── Test scaffolding ─────────────────────────────────────────────────── + + +def _stub_env(*, url: str = "", url_callable: bool = False): + """Build a minimal env stub exposing the URL via the requested shape.""" + env = SimpleNamespace() + if url_callable: + env.current_url = lambda: url + else: + env.current_url = url + return env + + +def _stub_session(url: str = "http://env.test", token: str = "admin-tok"): + """Build a minimal EnvSession-shaped object.""" + return SimpleNamespace(url=url, admin_token=token) + + +def _make_runner(*, env=None, oracle_session=None) -> LearningRunner: + """Construct LearningRunner without booting any real component.""" + return LearningRunner( + brain=SimpleNamespace(), + env=env if env is not None else _stub_env(url="http://env.test/boats/"), + verifier=SimpleNamespace(), + oracle_session=oracle_session, + ) + + +# ── _env_current_url ─────────────────────────────────────────────────── + + +def test_env_current_url_reads_callable_current_url(): + env = _stub_env(url="http://example.test/boats/?make=Sea+Ray", url_callable=True) + runner = _make_runner(env=env) + assert runner._env_current_url() == "http://example.test/boats/?make=Sea+Ray" + + +def test_env_current_url_reads_attribute_current_url(): + env = _stub_env(url="http://example.test/boats/", url_callable=False) + runner = _make_runner(env=env) + assert runner._env_current_url() == "http://example.test/boats/" + + +def test_env_current_url_falls_back_to_get_url_method(): + env = SimpleNamespace() + env.get_url = lambda: "http://example.test/from-getter" + runner = _make_runner(env=env) + assert runner._env_current_url() == "http://example.test/from-getter" + + +def test_env_current_url_returns_empty_when_no_accessor(): + env = SimpleNamespace() + runner = _make_runner(env=env) + assert runner._env_current_url() == "" + + +def test_env_current_url_swallows_callable_errors(): + """A throwing accessor shouldn't crash the verifier — return empty.""" + env = SimpleNamespace() + def _boom(): + raise RuntimeError("env not ready") + env.current_url = _boom + runner = _make_runner(env=env) + assert runner._env_current_url() == "" + + +def test_env_current_url_handles_none_env(): + """A None env shouldn't crash the accessor — must return ``""``.""" + runner = LearningRunner( + brain=SimpleNamespace(), + env=None, + verifier=SimpleNamespace(), + ) + assert runner._env_current_url() == "" + + +# ── _oracle_verify_url_signals ───────────────────────────────────────── + + +def test_url_signals_verifies_on_substring_match(): + runner = _make_runner(env=_stub_env(url="http://x.test/boats/?seller=by-owner")) + result = runner._oracle_verify_url_signals(["private seller", "by-owner"]) + assert result.verified is True + assert result.issue == "" + + +def test_url_signals_case_insensitive(): + runner = _make_runner(env=_stub_env(url="http://x.test/Boats/?Make=Sea+Ray")) + result = runner._oracle_verify_url_signals(["sea+ray"]) + assert result.verified is True + + +def test_url_signals_fails_with_no_match(): + runner = _make_runner(env=_stub_env(url="http://x.test/about")) + result = runner._oracle_verify_url_signals(["private seller", "by-owner"]) + assert result.verified is False + assert result.issue == "filter_lost" + + +def test_url_signals_returns_no_url_when_env_silent(): + runner = _make_runner(env=SimpleNamespace()) + result = runner._oracle_verify_url_signals(["anything"]) + assert result.verified is False + assert result.issue == "no_url" + + +def test_url_signals_empty_signal_list_is_verified(): + """An empty signal list with a valid URL is a degenerate-but-OK case.""" + runner = _make_runner(env=_stub_env(url="http://x.test/boats/")) + result = runner._oracle_verify_url_signals([]) + assert result.verified is True + + +def test_url_signals_skips_empty_strings(): + """Empty strings in the signal list shouldn't accidentally match the URL.""" + runner = _make_runner(env=_stub_env(url="http://x.test/about")) + result = runner._oracle_verify_url_signals(["", "by-owner"]) + assert result.verified is False + + +# ── _oracle_verify_state_change ──────────────────────────────────────── + + +def test_state_change_no_session_returns_no_oracle(): + """Helper requires an oracle_session; without one, it returns no_oracle_session.""" + runner = _make_runner(oracle_session=None) + result = runner._oracle_verify_state_change() + assert result.verified is False + assert result.issue == "no_oracle_session" + + +def test_state_change_verified_on_any_mutation_without_expected_ops(): + runner = _make_runner(oracle_session=_stub_session()) + fake = {"mutations": [ + {"id": 1, "operation": "consent_set"}, + ]} + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value=fake, + ): + result = runner._oracle_verify_state_change() + assert result.verified is True + assert runner._last_mutation_id == 1 + + +def test_state_change_verified_when_expected_op_matches(): + runner = _make_runner(oracle_session=_stub_session()) + fake = {"mutations": [ + {"id": 1, "operation": "consent_set"}, + {"id": 2, "operation": "lead_submitted"}, + ]} + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value=fake, + ): + result = runner._oracle_verify_state_change( + expected_ops={"lead_submitted"}, + ) + assert result.verified is True + assert runner._last_mutation_id == 2 + + +def test_state_change_unverified_when_expected_op_missing(): + """Mutations landed but none match the expected set — wrong_op.""" + runner = _make_runner(oracle_session=_stub_session()) + fake = {"mutations": [ + {"id": 1, "operation": "consent_set"}, + ]} + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value=fake, + ): + result = runner._oracle_verify_state_change( + expected_ops={"lead_submitted"}, + ) + assert result.verified is False + assert result.issue == "wrong_op" + # Cursor still advances so the non-matching mutation doesn't re-fire. + assert runner._last_mutation_id == 1 + + +def test_state_change_empty_delta_is_no_state_change(): + runner = _make_runner(oracle_session=_stub_session()) + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value={"mutations": []}, + ): + result = runner._oracle_verify_state_change() + assert result.verified is False + assert result.issue == "no_state_change" + # Cursor unchanged on empty delta. + assert runner._last_mutation_id == 0 + + +def test_state_change_fetch_error_surfaces_as_oracle_unreachable(): + runner = _make_runner(oracle_session=_stub_session()) + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value={"mutations": [], "error": "HTTP 503: down"}, + ): + result = runner._oracle_verify_state_change() + assert result.verified is False + assert result.issue == "oracle_unreachable" + assert "HTTP 503" in result.suggestion + + +def test_state_change_passes_since_id_from_cursor(): + runner = _make_runner(oracle_session=_stub_session()) + runner._last_mutation_id = 42 + captured = {} + + def fake_fetch(url, token, *, since_id, **kw): + captured["since_id"] = since_id + return {"mutations": []} + + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + side_effect=fake_fetch, + ): + runner._oracle_verify_state_change() + assert captured["since_id"] == 42 + + +def test_state_change_cursor_monotonic_under_out_of_order_responses(): + """A response with id < current cursor should NOT regress the cursor.""" + runner = _make_runner(oracle_session=_stub_session()) + runner._last_mutation_id = 10 + fake = {"mutations": [{"id": 5, "operation": "consent_set"}]} + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value=fake, + ): + runner._oracle_verify_state_change() + assert runner._last_mutation_id == 10 + + +# ── _sync_mutation_cursor ────────────────────────────────────────────── + + +def test_sync_cursor_advances_past_current_tail(): + runner = _make_runner(oracle_session=_stub_session()) + fake = {"mutations": [ + {"id": 1, "operation": "env_reset"}, + {"id": 2, "operation": "consent_set"}, + {"id": 3, "operation": "env_reset"}, + ]} + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value=fake, + ): + runner._sync_mutation_cursor() + assert runner._last_mutation_id == 3 + + +def test_sync_cursor_noop_without_session(): + runner = _make_runner(oracle_session=None) + runner._last_mutation_id = 5 + runner._sync_mutation_cursor() + assert runner._last_mutation_id == 5 + + +def test_sync_cursor_noop_on_empty_log(): + runner = _make_runner(oracle_session=_stub_session()) + with patch( + "mantis_agent.sim_envs.oracle_client.fetch_mutations", + return_value={"mutations": []}, + ): + runner._sync_mutation_cursor() + assert runner._last_mutation_id == 0 + + +# ── Constructor wiring ───────────────────────────────────────────────── + + +def test_oracle_session_defaults_to_none(): + """The new constructor param defaults to None — vision-only path + is unchanged for callers that don't pass a session.""" + runner = LearningRunner( + brain=SimpleNamespace(), + env=SimpleNamespace(), + verifier=SimpleNamespace(), + ) + assert runner.oracle_session is None + assert runner._last_mutation_id == 0 + + +def test_oracle_session_accepts_env_session_shape(): + """The runner accepts anything with ``.url`` and ``.admin_token``.""" + session = _stub_session(url="http://env.test", token="tok") + runner = LearningRunner( + brain=SimpleNamespace(), + env=SimpleNamespace(), + verifier=SimpleNamespace(), + oracle_session=session, + ) + assert runner.oracle_session is session