diff --git a/CHANGELOG.md b/CHANGELOG.md index 7c1b27d80..5dc355686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,20 @@ # agent-browser -## 0.26.0-celeria-camoufox.1 +## 0.26.0-celeria-camoufox.2 +### Bug Fixes + +- **Fixed `internal-error: name 'ref_id' is not defined` on every `page.snapshot` call under the Camoufox engine.** A prior change to the sidecar's handle-resolution loop renamed a loop-local variable but missed the `ref_cache.put` call at the end of the loop, which broke every snapshot-then-click flow in v0.26.0-celeria-camoufox.1. The fix also clarifies the two-ref distinction in the loop (agent-facing `@eN` vs. DOM `data-__ab-ref` attribute) so a future edit is less likely to break this again. Regression test added: `test_interactive_only_snapshot_then_click_by_ref` exercises the full snapshot → click-by-ref path and would have caught the original NameError. (Celeria fork) + +### New Features + +- **`scroll` and `scroll into view` on the Camoufox engine.** Previously returned `not-yet-implemented: action 'Runtime.evaluate' is not yet supported on engine=camoufox`. Parity with the Chrome path: `scroll` accepts `{x, y}` pixel deltas or `{direction: up|down|left|right, amount}` (Rust-side normalisation folds direction/amount into deltas before the sidecar call), with optional `selector` (a CSS selector or `@eN` ref) to scroll inside a specific element rather than the window. `scroll into view` centres the matched element via `scrollIntoView({block:'center', inline:'center'})`, matching the Chrome path's JS exactly. Both return structured errors (`selector-not-found`, `ambiguous-selector`, `ref-stale`, `element-detached`) rather than opaque Playwright exceptions. Still deferred to v2: ref-annotated screenshots (`screenshot --annotate`) which need CDP DOM-box extraction the sidecar doesn't yet expose. (Celeria fork) + + +## 0.26.0-celeria-camoufox.1 + + ### New Features - **`--engine camoufox` — third browser backend (Camoufox / patched Firefox).** Adds Camoufox alongside the existing Chrome (CDP) and Lightpanda (CDP) engines for targets that defeat JS-injection stealth. Camoufox's C++-level patches (canvas/WebGL noise, font fingerprint, WebRTC IP, AudioContext) go deeper than our `--stealth` script. Because Camoufox speaks Juggler, not CDP, the daemon drives it via a persistent Python sidecar over JSON-line stdio instead of the existing `CdpClient`. Stealth is implicit when `engine=camoufox`; combining with `--stealth` is a no-op with a warning (the JS injection would fight the engine-level spoofs). (Celeria fork) @@ -15,7 +27,7 @@ ### Requirements - Running `--engine camoufox` outside the Celeria E2B template requires a Python 3 runtime with `pip install camoufox camoufox_sidecar` and a one-time `python -m camoufox fetch` to download the Camoufox browser binary. Follows the Lightpanda "install it yourself" precedent; `agent-browser install` is not extended for Camoufox in v1. - + ## 0.26.0-celeria-stealth.1 diff --git a/cli/Cargo.lock b/cli/Cargo.lock index 7fa373661..4e8022f9e 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -45,7 +45,7 @@ dependencies = [ [[package]] name = "agent-browser" -version = "0.26.0-celeria-camoufox.1" +version = "0.26.0-celeria-camoufox.2" dependencies = [ "aes-gcm", "async-trait", diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 736a7fe87..5bcfbb967 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "agent-browser" -version = "0.26.0-celeria-camoufox.1" +version = "0.26.0-celeria-camoufox.2" edition = "2021" description = "Fast browser automation CLI for AI agents" license = "Apache-2.0" diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index eb114bab8..57520cc0e 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -2921,7 +2921,6 @@ async fn handle_hover(cmd: &Value, state: &mut DaemonState) -> Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; - let session_id = mgr.active_session_id()?.to_string(); let selector = cmd.get("selector").and_then(|v| v.as_str()); let (mut dx, mut dy) = ( @@ -2940,6 +2939,19 @@ async fn handle_scroll(cmd: &Value, state: &mut DaemonState) -> Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; - let session_id = mgr.active_session_id()?.to_string(); let selector = cmd .get("selector") .and_then(|v| v.as_str()) .ok_or("Missing 'selector' parameter")?; + if mgr.backend.is_camoufox() { + return mgr + .camoufox_client() + .call("page.scrollIntoView", json!({ "selector": selector })) + .await; + } + + let session_id = mgr.active_session_id()?.to_string(); interaction::scroll_into_view( &mgr.backend, &session_id, diff --git a/package.json b/package.json index 85bc6275f..450137102 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "agent-browser", - "version": "0.26.0-celeria-camoufox.1", + "version": "0.26.0-celeria-camoufox.2", "description": "Browser automation CLI for AI agents", "type": "module", "files": [ diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__init__.py b/packages/camoufox-sidecar/camoufox_sidecar/__init__.py index 9047a4178..d17506b74 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/__init__.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/__init__.py @@ -1,3 +1,3 @@ """camoufox-sidecar: Playwright+Camoufox driver process for agent-browser.""" -__version__ = "0.26.0+celeria.camoufox.1" +__version__ = "0.26.0+celeria.camoufox.2" diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py index f36ac13b5..739a269a6 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py @@ -147,6 +147,14 @@ async def _cmd_page_screenshot(sidecar: "Sidecar", args: dict) -> dict: return await sidecar.session.screenshot(args) +async def _cmd_page_scroll(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.scroll(args) + + +async def _cmd_page_scroll_into_view(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.scroll_into_view(args) + + async def _cmd_tab_new(sidecar: "Sidecar", args: dict) -> dict: return await sidecar.session.tab_new(args) @@ -172,6 +180,8 @@ async def _cmd_tab_list(sidecar: "Sidecar", args: dict) -> dict: "page.fill": _cmd_page_fill, "page.getText": _cmd_page_get_text, "page.screenshot": _cmd_page_screenshot, + "page.scroll": _cmd_page_scroll, + "page.scrollIntoView": _cmd_page_scroll_into_view, "tab.new": _cmd_tab_new, "tab.switch": _cmd_tab_switch, "tab.close": _cmd_tab_close, diff --git a/packages/camoufox-sidecar/camoufox_sidecar/session.py b/packages/camoufox-sidecar/camoufox_sidecar/session.py index 3cab786de..90f5c3899 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/session.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/session.py @@ -564,6 +564,106 @@ async def get_text(self, args: Optional[dict] = None) -> dict: ) return {"text": text, "origin": _safe_page_url(tab.page), "tabId": tab.tab_id} + async def scroll(self, args: Optional[dict] = None) -> dict: + """Scroll the page or an element by ``(x, y)`` pixels. + + Parity shape with the Chrome path: ``selector`` is optional; when + absent we scroll ``window``, when present we scroll the matched + element's own scroll container (via ``el.scrollBy``). The Rust side + pre-normalises ``direction`` + ``amount`` into ``x`` / ``y``, so the + sidecar only sees deltas. + """ + args = args or {} + selector_or_ref = args.get("selector") + dx = float(args.get("x", 0) or 0) + dy = float(args.get("y", 0) or 0) + + tab = await self._tab_for(args) + + if selector_or_ref is None or selector_or_ref == "": + try: + await tab.page.evaluate( + "([dx, dy]) => window.scrollBy(dx, dy)", [dx, dy] + ) + except Exception as exc: # noqa: BLE001 + raise LaunchError("action-failed", str(exc)) from exc + return {"scrolled": True, "tabId": tab.tab_id} + + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = _require_ref(tab, ref_id) + try: + await handle.evaluate( + "(el, [dx, dy]) => el.scrollBy(dx, dy)", [dx, dy] + ) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + return {"scrolled": True, "tabId": tab.tab_id} + + locator = tab.page.locator(selector_or_ref) + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector_or_ref) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector_or_ref!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector_or_ref!r} matched {count} elements; refine it or use a ref", + ) + try: + await locator.evaluate("(el, [dx, dy]) => el.scrollBy(dx, dy)", [dx, dy]) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector_or_ref) from exc + return {"scrolled": True, "tabId": tab.tab_id} + + async def scroll_into_view(self, args: Optional[dict] = None) -> dict: + """Scroll ``selector`` into view, centred. + + Mirrors the Chrome path's ``scrollIntoView({block:'center', inline:'center'})`` + rather than using Playwright's looser ``scroll_into_view_if_needed``, + so behaviour matches across engines for the same selector. + """ + args = args or {} + selector_or_ref = _require_str(args, "selector") + + tab = await self._tab_for(args) + js = "el => el.scrollIntoView({ block: 'center', inline: 'center' })" + + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = _require_ref(tab, ref_id) + try: + await handle.evaluate(js) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + return {"scrolled": selector_or_ref, "tabId": tab.tab_id} + + locator = tab.page.locator(selector_or_ref) + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector_or_ref) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector_or_ref!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector_or_ref!r} matched {count} elements; refine it or use a ref", + ) + try: + await locator.evaluate(js) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector_or_ref) from exc + return {"scrolled": selector_or_ref, "tabId": tab.tab_id} + async def _tab_for(self, args: dict) -> Tab: tab_id = args.get("tabId") if isinstance(tab_id, str) and tab_id: diff --git a/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py b/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py index c4e1c965e..14cfa047e 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py @@ -300,9 +300,16 @@ async def take_snapshot( entry["ref"] = f"e{new_idx}" # Resolve each ref back to a live ElementHandle so subsequent click/fill - # calls can reach the element without re-running the JS walker. + # calls can reach the element without re-running the JS walker. Two refs + # are in play here: ``entry["ref"]`` is the agent-facing id (``e1..eM``, + # contiguous after any ``interactive_only`` filter), and ``dom_ref`` is + # the attribute value the JS walker stamped on the element before the + # filter re-numbered refs. We query by the DOM ref and cache under the + # agent-facing one, so the next ``click @eN`` from the agent lands on + # the right handle. for entry in entries: - dom_ref = entry.get("_dom_ref", entry["ref"]) + agent_ref = entry["ref"] + dom_ref = entry.get("_dom_ref", agent_ref) handle = await page.query_selector(f"[data-__ab-ref='{dom_ref}']") if handle is None: # Element vanished between the walker and this query_selector — @@ -310,7 +317,7 @@ async def take_snapshot( # synchronously. Drop the entry silently rather than emit a # dangling ref to the agent. continue - ref_cache.put(ref_id, handle, role=entry["role"], name=entry["name"]) + ref_cache.put(agent_ref, handle, role=entry["role"], name=entry["name"]) lines = [_format_line(entry) for entry in entries] if not lines: diff --git a/packages/camoufox-sidecar/pyproject.toml b/packages/camoufox-sidecar/pyproject.toml index 89775cb5e..cdf3530cf 100644 --- a/packages/camoufox-sidecar/pyproject.toml +++ b/packages/camoufox-sidecar/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "camoufox-sidecar" -version = "0.26.0+celeria.camoufox.1" +version = "0.26.0+celeria.camoufox.2" description = "Sidecar process that drives Camoufox on behalf of agent-browser" readme = "README.md" requires-python = ">=3.10" diff --git a/packages/camoufox-sidecar/tests/test_commands.py b/packages/camoufox-sidecar/tests/test_commands.py index 80e61e196..405753be7 100644 --- a/packages/camoufox-sidecar/tests/test_commands.py +++ b/packages/camoufox-sidecar/tests/test_commands.py @@ -262,3 +262,160 @@ def _ref_by(refs: dict, *, role: str, name: str) -> str | None: if entry["role"] == role and entry["name"].strip() == name: return ref_id return None + + +# --------------------------------------------------------------------------- +# Regression tests +# --------------------------------------------------------------------------- + + +async def test_interactive_only_snapshot_then_click_by_ref( + camoufox_sidecar: Sidecar, +) -> None: + """Regression: ``snapshot -i`` renumbers refs to ``e1..eM`` on the agent + side while the JS walker's ``data-__ab-ref`` attributes still hold the + pre-filter counter. The handle-resolution loop must query the DOM by the + original attribute and cache handles under the renumbered ref, so a + subsequent ``click @eN`` actually hits the right element. + + A prior fix introduced a ``NameError: name 'ref_id' is not defined`` in + this loop which broke every ``snapshot`` call. This test locks in that + (a) interactive_only snapshot doesn't raise, and (b) the renumbered ref + actually clicks the underlying element. + """ + sc = camoufox_sidecar + await _launch_and_goto(sc, FIXTURE_URL) + + # Interactive-only snapshot should not raise and should renumber refs. + result = await _snapshot(sc, 3, interactive=True) + refs = result["refs"] + # Fixture has no heading filtered out (h1 is a heading role, non-interactive), + # so e1..e5 should all be interactive roles. + assert all(k.startswith("e") for k in refs.keys()) + assert not any(r["role"] == "heading" for r in refs.values()), refs + + # The submit button's renumbered ref should still click the real element. + submit_ref = _ref_by(refs, role="button", name="Submit") + assert submit_ref, refs + await sc.send( + {"id": 20, "cmd": "page.click", "args": {"selector": f"@{submit_ref}"}} + ) + click_resp = await sc.read_frame(timeout=30.0) + assert click_resp["ok"] is True, click_resp + + await sc.send( + {"id": 21, "cmd": "page.getText", "args": {"selector": "#status"}} + ) + text_resp = await sc.read_frame(timeout=10.0) + assert text_resp["result"]["text"] == "Submitted" + + +# --------------------------------------------------------------------------- +# Scroll — Unit 5 extension for Chrome-parity +# --------------------------------------------------------------------------- + + +SCROLL_PAGE_URL = ( + "data:text/html," + "" + "
top
" + "
" + "
bottom
" + "" +) + + +async def test_scroll_window_by_y_succeeds(camoufox_sidecar: Sidecar) -> None: + """page.scroll without a selector scrolls the window by (x, y).""" + sc = camoufox_sidecar + await _launch_and_goto(sc, SCROLL_PAGE_URL) + + await sc.send( + {"id": 10, "cmd": "page.scroll", "args": {"y": 1500}} + ) + resp = await sc.read_frame(timeout=10.0) + assert resp["ok"] is True, resp + assert resp["result"]["scrolled"] is True + + +async def test_scroll_window_negative_y_scrolls_up(camoufox_sidecar: Sidecar) -> None: + """Negative y scrolls up (after scrolling down first so there's room).""" + sc = camoufox_sidecar + await _launch_and_goto(sc, SCROLL_PAGE_URL) + + # Scroll down 2000, then scroll up 1000 — both should succeed. + await sc.send({"id": 10, "cmd": "page.scroll", "args": {"y": 2000}}) + down_resp = await sc.read_frame(timeout=10.0) + assert down_resp["ok"] is True, down_resp + + await sc.send({"id": 11, "cmd": "page.scroll", "args": {"y": -1000}}) + up_resp = await sc.read_frame(timeout=10.0) + assert up_resp["ok"] is True, up_resp + + +async def test_scroll_by_css_selector_with_missing_element( + camoufox_sidecar: Sidecar, +) -> None: + """Selector that doesn't match surfaces ``selector-not-found``.""" + sc = camoufox_sidecar + await _launch_and_goto(sc, SCROLL_PAGE_URL) + + await sc.send( + { + "id": 10, + "cmd": "page.scroll", + "args": {"selector": "#does-not-exist-42", "y": 100}, + } + ) + resp = await sc.read_frame(timeout=10.0) + assert resp["ok"] is False, resp + assert resp["error"]["code"] == "selector-not-found", resp + + +async def test_scroll_into_view_by_css_selector(camoufox_sidecar: Sidecar) -> None: + """page.scrollIntoView centres the matched element in the viewport.""" + sc = camoufox_sidecar + await _launch_and_goto(sc, SCROLL_PAGE_URL) + + await sc.send( + {"id": 10, "cmd": "page.scrollIntoView", "args": {"selector": "#bot"}} + ) + resp = await sc.read_frame(timeout=10.0) + assert resp["ok"] is True, resp + assert resp["result"]["scrolled"] == "#bot" + + +async def test_scroll_into_view_by_ref(camoufox_sidecar: Sidecar) -> None: + """page.scrollIntoView accepts a ``@eN`` ref after a snapshot.""" + sc = camoufox_sidecar + await _launch_and_goto(sc, FIXTURE_URL) + snap = await _snapshot(sc, 3) + submit_ref = _ref_by(snap["refs"], role="button", name="Submit") + assert submit_ref, snap + + await sc.send( + { + "id": 10, + "cmd": "page.scrollIntoView", + "args": {"selector": f"@{submit_ref}"}, + } + ) + resp = await sc.read_frame(timeout=10.0) + assert resp["ok"] is True, resp + assert resp["result"]["scrolled"] == f"@{submit_ref}" + + +async def test_scroll_into_view_ambiguous_selector_errors( + camoufox_sidecar: Sidecar, +) -> None: + """Selector matching multiple elements surfaces ``ambiguous-selector``.""" + sc = camoufox_sidecar + await _launch_and_goto(sc, FIXTURE_URL) + + # ``input`` matches three elements on the fixture. + await sc.send( + {"id": 10, "cmd": "page.scrollIntoView", "args": {"selector": "input"}} + ) + resp = await sc.read_frame(timeout=10.0) + assert resp["ok"] is False, resp + assert resp["error"]["code"] == "ambiguous-selector", resp diff --git a/packages/dashboard/package.json b/packages/dashboard/package.json index 757c9069a..5ce966466 100644 --- a/packages/dashboard/package.json +++ b/packages/dashboard/package.json @@ -1,6 +1,6 @@ { "name": "dashboard", - "version": "0.26.0-celeria-camoufox.1", + "version": "0.26.0-celeria-camoufox.2", "private": true, "scripts": { "dev": "next dev",