diff --git a/.gitignore b/.gitignore index 7bd71de3e..eae6d2d80 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,11 @@ docs/package-lock.json # next .next/ out/ + +# Python +__pycache__/ +*.py[cod] +*.egg-info/ +.pytest_cache/ +.venv/ +packages/camoufox-sidecar/.venv/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ec3b9df1..7c1b27d80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,30 @@ # agent-browser -## 0.26.0-celeria-stealth.1 +## 0.26.0-celeria-camoufox.1 ### New Features -- **`--stealth` flag and `AGENT_BROWSER_STEALTH` env var** - Opt-in stealth mode that masks the most common bot-detection signals: hides `navigator.webdriver`, restores a fake `chrome.runtime`, spoofs `navigator.plugins`, fixes `navigator.languages`, patches the WebGL vendor/renderer tuple, and prepends `--disable-blink-features=AutomationControlled` to the Chromium launch args. The init script is injected per-page via `Page.addScriptToEvaluateOnNewDocument` from `enable_domains`, so it runs before any document JS on every navigation. Spirit of the closed upstream PR #471, ported to the current Rust codebase. (Celeria fork) +- **`--engine camoufox` — third browser backend (Camoufox / patched Firefox).** Adds Camoufox alongside the existing Chrome (CDP) and Lightpanda (CDP) engines for targets that defeat JS-injection stealth. Camoufox's C++-level patches (canvas/WebGL noise, font fingerprint, WebRTC IP, AudioContext) go deeper than our `--stealth` script. Because Camoufox speaks Juggler, not CDP, the daemon drives it via a persistent Python sidecar over JSON-line stdio instead of the existing `CdpClient`. Stealth is implicit when `engine=camoufox`; combining with `--stealth` is a no-op with a warning (the JS injection would fight the engine-level spoofs). (Celeria fork) +- **`BrowserBackend` engine dispatch in the action layer.** `BrowserManager` now holds an engine-tagged backend enum and every action under `cli/src/native/*.rs` (`actions`, `interaction`, `element`, `snapshot`, `screenshot`, `cookies`, `network`) has a per-engine arm. Chrome and Lightpanda paths are byte-for-byte unchanged; `inspect_server` and `stream/cdp_loop` remain Chrome-only and return a structured `engine-incompatible` error when pointed at a Camoufox backend. +- **v1 command surface for Camoufox.** `open`, `navigate`, `snapshot` with `@eN` refs, `click`/`fill`/`get text` by ref and by selector, `screenshot`, `tabs` (list/new/switch/close), `close`. `@eN` refs are sidecar-owned element handles invalidated on navigation — stale access returns `{"code": "ref-stale"}` rather than silently acting on the wrong element. Low-value or engine-incompatible commands (`screencast`, raw `cdp`, devtools introspection) return `{"code": "not-yet-supported", "engine": "camoufox"}`. +- **Sidecar distribution.** The `camoufox_sidecar` Python package is embedded into the Rust binary via `include_dir!` and extracted to `$CACHE_DIR/agent-browser/camoufox-sidecar-/` on first launch. When `camoufox_sidecar` is installed into the system Python (e.g. the E2B sandbox), `python3 -m camoufox_sidecar` is used directly without extraction. Python runtime lookup order: `AGENT_BROWSER_CAMOUFOX_PYTHON` env var → `python3` on PATH. +- **`doctor` reports Camoufox status.** `agent-browser doctor` probes python3, `import camoufox`, and `python3 -m camoufox path` (browser binary presence), reporting each separately with actionable reasons when a step is missing. Non-fatal — a missing Camoufox doesn't block Chrome/Lightpanda use. +- **`"engine"` label in `--json` output.** Every response carries `"engine": ""` so downstream consumers can segment telemetry by backend without inspecting request state. + +### Requirements + +- Running `--engine camoufox` outside the Celeria E2B template requires a Python 3 runtime with `pip install camoufox camoufox_sidecar` and a one-time `python -m camoufox fetch` to download the Camoufox browser binary. Follows the Lightpanda "install it yourself" precedent; `agent-browser install` is not extended for Camoufox in v1. +## 0.26.0-celeria-stealth.1 + + +### New Features + +- **`--stealth` flag and `AGENT_BROWSER_STEALTH` env var** - Opt-in stealth mode that masks the most common bot-detection signals: hides `navigator.webdriver`, restores a fake `chrome.runtime`, spoofs `navigator.plugins`, fixes `navigator.languages`, patches the WebGL vendor/renderer tuple, and prepends `--disable-blink-features=AutomationControlled` to the Chromium launch args. The init script is injected per-page via `Page.addScriptToEvaluateOnNewDocument` from `enable_domains`, so it runs before any document JS on every navigation. Spirit of the closed upstream PR #471, ported to the current Rust codebase. (Celeria fork) + + ## 0.26.0 diff --git a/cli/Cargo.lock b/cli/Cargo.lock index 461663f2c..7fa373661 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -45,7 +45,7 @@ dependencies = [ [[package]] name = "agent-browser" -version = "0.26.0-celeria-stealth.2" +version = "0.26.0-celeria-camoufox.1" dependencies = [ "aes-gcm", "async-trait", @@ -57,6 +57,7 @@ dependencies = [ "hex", "hmac", "image", + "include_dir", "libc", "reqwest", "rust-embed", @@ -1047,6 +1048,25 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8" +[[package]] +name = "include_dir" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "indexmap" version = "2.13.0" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 19262fd7f..736a7fe87 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "agent-browser" -version = "0.26.0-celeria-stealth.2" +version = "0.26.0-celeria-camoufox.1" edition = "2021" description = "Fast browser automation CLI for AI agents" license = "Apache-2.0" @@ -35,6 +35,15 @@ hex = "0.4" chrono = "0.4" urlencoding = "2" rust-embed = "8" +include_dir = "0.7" + +[features] +# Enables the `cli/tests/camoufox_launch.rs` integration suite. Off by default +# because it requires a working Python 3 install with the `camoufox` package +# and the browser binary fetched. Turn on locally with +# `cargo test --features camoufox-integration` when testing the Camoufox +# engine end-to-end. +camoufox-integration = [] [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/cli/src/connection.rs b/cli/src/connection.rs index cac92dd91..c8a885484 100644 --- a/cli/src/connection.rs +++ b/cli/src/connection.rs @@ -33,6 +33,12 @@ pub struct Response { pub error: Option, #[serde(skip_serializing_if = "Option::is_none")] pub warning: Option, + /// Engine label the daemon was driving when this response was produced + /// ("chrome", "lightpanda", "camoufox", "safari"). Optional on the wire + /// so local-only commands and older daemons without engine context still + /// round-trip through this struct cleanly. + #[serde(skip_serializing_if = "Option::is_none")] + pub engine: Option, } #[allow(dead_code)] diff --git a/cli/src/doctor/camoufox.rs b/cli/src/doctor/camoufox.rs new file mode 100644 index 000000000..7b933c564 --- /dev/null +++ b/cli/src/doctor/camoufox.rs @@ -0,0 +1,318 @@ +//! Probe the Camoufox engine availability. +//! +//! Three checks, reported independently so the user can tell exactly which +//! step is missing on a partial install: +//! 1. A Python 3 runtime (either `AGENT_BROWSER_CAMOUFOX_PYTHON` or +//! `python3` on PATH). +//! 2. The `camoufox` Python package imports cleanly. +//! 3. The Camoufox browser binary has been fetched. +//! +//! All failures are non-fatal: we report the distinct reason as `Info` so +//! `doctor` continues and users can still confidently use `--engine chrome` +//! / `--engine lightpanda`. Dependent checks short-circuit: if Python is +//! missing we skip the package and binary probes, since running them would +//! fail for an unrelated reason. + +use std::env; +use std::process::{Command, Output, Stdio}; +use std::time::Duration; + +use super::{Check, Status}; + +const CATEGORY: &str = "Camoufox"; +const PROBE_TIMEOUT: Duration = Duration::from_secs(10); + +pub(super) fn check(checks: &mut Vec) { + let python = match resolve_python() { + Some(p) => p, + None => { + push_not_available( + checks, + "camoufox.python", + "python3 not found", + "install python3 and `pip install camoufox`, or set AGENT_BROWSER_CAMOUFOX_PYTHON", + ); + return; + } + }; + + match probe_python_version(&python) { + PythonProbe::Ok(version_label) => checks.push(Check::new( + "camoufox.python", + CATEGORY, + Status::Pass, + format!("python3 at {} ({})", python, version_label), + )), + PythonProbe::Unusable(reason) => { + push_not_available( + checks, + "camoufox.python", + &format!("python3 at {} is not runnable ({})", python, reason), + "install python3 and `pip install camoufox`, or set AGENT_BROWSER_CAMOUFOX_PYTHON", + ); + return; + } + } + + match import_camoufox(&python) { + ProbeOutcome::Ok(detail) => checks.push(Check::new( + "camoufox.package", + CATEGORY, + Status::Pass, + format!("camoufox package importable{}", detail), + )), + ProbeOutcome::Missing(reason) => { + push_not_available(checks, "camoufox.package", &reason, "pip install camoufox"); + return; + } + } + + match camoufox_binary_path(&python) { + ProbeOutcome::Ok(path) => checks.push(Check::new( + "camoufox.binary", + CATEGORY, + Status::Pass, + format!("camoufox browser binary at {}", path), + )), + ProbeOutcome::Missing(reason) => { + push_not_available( + checks, + "camoufox.binary", + &reason, + "python3 -m camoufox fetch", + ); + } + } +} + +fn push_not_available(checks: &mut Vec, id: &str, reason: &str, fix: &str) { + checks.push( + Check::new( + id.to_string(), + CATEGORY, + Status::Info, + format!("camoufox: not available (reason: {})", reason), + ) + .with_fix(fix.to_string()), + ); +} + +enum PythonProbe { + /// Version string from ` --version`, already trimmed. Empty is + /// allowed and surfaced as `(version unknown)`. + Ok(String), + /// Spawn failed, non-zero exit, or probe timed out. The caller treats + /// this as equivalent to "python not found" for `doctor` purposes. + Unusable(String), +} + +enum ProbeOutcome { + Ok(String), + Missing(String), +} + +fn resolve_python() -> Option { + if let Ok(explicit) = env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") { + if !explicit.trim().is_empty() { + return Some(explicit); + } + } + if super::helpers::which_exists("python3") { + return Some("python3".to_string()); + } + None +} + +fn probe_python_version(python: &str) -> PythonProbe { + let out = match run_with_timeout(Command::new(python).arg("--version")) { + RunOutcome::Ok(o) => o, + RunOutcome::SpawnFailed(e) => { + return PythonProbe::Unusable(format!("spawn failed: {}", e)); + } + RunOutcome::Timeout => { + return PythonProbe::Unusable("probe timed out".to_string()); + } + }; + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr); + let msg = first_line(&stderr).unwrap_or_else(|| format!("exit {}", exit_code_label(&out))); + return PythonProbe::Unusable(msg); + } + // Python writes `--version` to stdout on 3.4+ and stderr on older; + // prefer stdout, fall back to stderr. + let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if !stdout.is_empty() { + return PythonProbe::Ok(stdout); + } + let stderr = String::from_utf8_lossy(&out.stderr).trim().to_string(); + if stderr.is_empty() { + PythonProbe::Ok("version unknown".to_string()) + } else { + PythonProbe::Ok(stderr) + } +} + +fn import_camoufox(python: &str) -> ProbeOutcome { + // `camoufox.__version__` is a submodule (not a string), so use + // importlib.metadata to fetch the installed version instead. + let probe = r#" +import sys, importlib +importlib.import_module('camoufox') +try: + from importlib.metadata import version + print(version('camoufox'), end='') +except Exception: + print('', end='') +"#; + let out = match run_with_timeout(Command::new(python).arg("-c").arg(probe)) { + RunOutcome::Ok(o) => o, + RunOutcome::SpawnFailed(e) => { + return ProbeOutcome::Missing(format!("python probe spawn failed: {}", e)); + } + RunOutcome::Timeout => { + return ProbeOutcome::Missing("camoufox import probe timed out".to_string()); + } + }; + if out.status.success() { + let version = String::from_utf8_lossy(&out.stdout).trim().to_string(); + let detail = if version.is_empty() { + String::new() + } else { + format!(" (version {})", version) + }; + return ProbeOutcome::Ok(detail); + } + let stderr = String::from_utf8_lossy(&out.stderr); + if stderr.contains("ModuleNotFoundError") || stderr.contains("No module named 'camoufox'") { + ProbeOutcome::Missing("camoufox package not installed".to_string()) + } else { + ProbeOutcome::Missing(format!( + "camoufox import failed: {}", + first_line(&stderr).unwrap_or_else(|| "unknown error".to_string()) + )) + } +} + +fn camoufox_binary_path(python: &str) -> ProbeOutcome { + // Prefer the package's own path resolver so we don't hardcode the cache + // layout, then fall back to the canonical linux cache dir so an + // upstream rename to `pkgman` / etc. can't make a working install look + // broken. + let probe = r#" +import sys +from pathlib import Path +try: + from camoufox.pkgman import installed_verstr, get_path + ver = installed_verstr() + if not ver: + print('__AB_NOT_FETCHED__', end='') + sys.exit(0) + base = Path(get_path('cache')) + candidates = [base, base / ver] + for c in candidates: + if c.exists(): + print(str(c), end='') + sys.exit(0) + print('__AB_NOT_FETCHED__', end='') +except Exception as exc: + home = Path.home() + fallback = home / '.cache' / 'camoufox' + if fallback.exists() and any(fallback.iterdir()): + print(str(fallback), end='') + sys.exit(0) + sys.stderr.write(f'{type(exc).__name__}: {exc}') + sys.exit(2) +"#; + + let out = match run_with_timeout(Command::new(python).arg("-c").arg(probe)) { + RunOutcome::Ok(o) => o, + RunOutcome::SpawnFailed(e) => { + return ProbeOutcome::Missing(format!("python probe spawn failed: {}", e)); + } + RunOutcome::Timeout => { + return ProbeOutcome::Missing("camoufox path probe timed out".to_string()); + } + }; + if !out.status.success() { + let stderr = String::from_utf8_lossy(&out.stderr); + return ProbeOutcome::Missing(format!( + "camoufox path probe failed: {}", + first_line(&stderr).unwrap_or_else(|| "unknown error".to_string()) + )); + } + let stdout = String::from_utf8_lossy(&out.stdout).trim().to_string(); + if stdout.is_empty() || stdout == "__AB_NOT_FETCHED__" { + return ProbeOutcome::Missing( + "camoufox browser binary not fetched (run `python3 -m camoufox fetch`)".to_string(), + ); + } + ProbeOutcome::Ok(stdout) +} + +enum RunOutcome { + Ok(Output), + SpawnFailed(String), + Timeout, +} + +fn run_with_timeout(cmd: &mut Command) -> RunOutcome { + // A deadlocked child (e.g. probe hanging on import of a broken module) + // must not hang the whole `doctor` run. Spawn, then poll with a wall + // clock; if the deadline fires, kill and return `Timeout`. + let mut child = match cmd + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .stdin(Stdio::null()) + .spawn() + { + Ok(c) => c, + Err(e) => return RunOutcome::SpawnFailed(e.to_string()), + }; + + let start = std::time::Instant::now(); + loop { + match child.try_wait() { + Ok(Some(_status)) => { + return match child.wait_with_output() { + Ok(out) => RunOutcome::Ok(out), + Err(e) => RunOutcome::SpawnFailed(e.to_string()), + }; + } + Ok(None) => { + if start.elapsed() >= PROBE_TIMEOUT { + let _ = child.kill(); + let _ = child.wait(); + return RunOutcome::Timeout; + } + std::thread::sleep(Duration::from_millis(50)); + } + Err(e) => return RunOutcome::SpawnFailed(e.to_string()), + } + } +} + +fn exit_code_label(out: &Output) -> String { + match out.status.code() { + Some(c) => c.to_string(), + None => "signal".to_string(), + } +} + +fn first_line(s: &str) -> Option { + s.lines() + .find(|l| !l.trim().is_empty()) + .map(|l| l.trim().to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_first_line_skips_blank_leading_lines() { + assert_eq!(first_line(""), None); + assert_eq!(first_line("\n\n"), None); + assert_eq!(first_line("hello"), Some("hello".to_string())); + assert_eq!(first_line("\n first\nsecond"), Some("first".to_string())); + } +} diff --git a/cli/src/doctor/mod.rs b/cli/src/doctor/mod.rs index 4aa131e1a..939781c0c 100644 --- a/cli/src/doctor/mod.rs +++ b/cli/src/doctor/mod.rs @@ -8,6 +8,7 @@ //! repairs (reinstalling Chrome, purging old state files, generating a //! missing encryption key) are gated behind `--fix`. +mod camoufox; mod chrome; mod config; mod daemon; @@ -98,6 +99,7 @@ pub fn run_doctor(opts: DoctorOptions) -> i32 { environment::check(&mut checks); chrome::check(&mut checks); + camoufox::check(&mut checks); daemon::check(&mut checks); config::check(&mut checks); security::check(&mut checks); diff --git a/cli/src/main.rs b/cli/src/main.rs index 41e1cb98e..53e88e387 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -703,12 +703,14 @@ fn main() { data: Some(data), error: None, warning: None, + engine: None, }, Err(e) => connection::Response { success: false, data: None, error: Some(e), warning: None, + engine: None, }, }; let output_opts = OutputOptions::from_flags(&flags); @@ -1159,13 +1161,16 @@ fn main() { match send_command(launch_cmd, &flags.session) { Ok(resp) if !resp.success => { - // Launch command failed (e.g., invalid state file, profile error) - let error_msg = resp - .error - .unwrap_or_else(|| "Browser launch failed".to_string()); + // Launch command failed (e.g., invalid state file, profile error). + // Route through `print_response_with_opts` so the engine label + // (set by the daemon before validation) survives to --json + // callers — telemetry needs to know which engine rejected the + // launch. if flags.json { - print_json_error(error_msg); + let output_opts = OutputOptions::from_flags(&flags); + print_response_with_opts(&resp, None, &output_opts); } else { + let error_msg = resp.error.as_deref().unwrap_or("Browser launch failed"); eprintln!("{} {}", color::error_indicator(), error_msg); } exit(1); diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index 5d10e40cc..eb114bab8 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -344,7 +344,13 @@ impl DaemonState { fn subscribe_to_browser_events(&mut self) { if let Some(ref browser) = self.browser { - self.event_rx = Some(browser.client.subscribe()); + // Camoufox events are surfaced through `CamoufoxClient::subscribe` + // rather than a CDP broadcast. Units 4/5 wire up a sidecar + // event bridge; Unit 3's open+close flow doesn't depend on it, + // so skip silently here on the Camoufox backend. + if let Ok(client) = browser.backend.require_cdp() { + self.event_rx = Some(client.subscribe()); + } } } @@ -362,8 +368,14 @@ impl DaemonState { return; }; - let client = browser.client.clone(); - let mut rx = browser.client.subscribe(); + // Fetch.* is a CDP-only surface; Camoufox routes requests through + // Playwright's Route API (Unit 4+). Leave the handler idle on + // non-CDP backends. + let Ok(cdp) = browser.backend.require_cdp() else { + return; + }; + let client = cdp.clone(); + let mut rx = cdp.subscribe(); let domain_filter = self.domain_filter.clone(); let routes = self.routes.clone(); let origin_headers = self.origin_headers.clone(); @@ -477,8 +489,15 @@ impl DaemonState { return; }; - let client = browser.client.clone(); - let mut rx = browser.client.subscribe(); + // Dialog handling is wired up through CDP's Page.javascriptDialog + // events. Camoufox will eventually surface dialogs through the + // sidecar (Unit 4/5); for now Unit 3's open/close flow doesn't + // depend on this handler. + let Ok(cdp) = browser.backend.require_cdp() else { + return; + }; + let client = cdp.clone(); + let mut rx = cdp.subscribe(); self.dialog_handler_task = Some(tokio::spawn(async move { loop { @@ -524,7 +543,11 @@ impl DaemonState { pub async fn update_stream_client(&self) { if let Some(ref slot) = self.stream_client { let mut guard = slot.write().await; - *guard = self.browser.as_ref().map(|m| Arc::clone(&m.client)); + *guard = self + .browser + .as_ref() + .filter(|m| m.backend.is_cdp()) + .map(|m| Arc::clone(m.client())); } if let Some(ref server) = self.stream_server { // Update the CDP page session ID so screencast commands target the right page @@ -587,7 +610,7 @@ impl DaemonState { if let Some(ref browser) = self.browser { if let Ok(session_id) = browser.active_session_id() { for ack_sid in drained.pending_acks { - let _ = stream::ack_screencast_frame(&browser.client, session_id, ack_sid) + let _ = stream::ack_screencast_frame(&browser.backend, session_id, ack_sid) .await; } } @@ -607,23 +630,23 @@ impl DaemonState { .insert(frame_id.clone(), iframe_sid.clone()); if let Some(ref mgr) = self.browser { let _ = mgr - .client + .client() .send_command_no_params( "Runtime.runIfWaitingForDebugger", Some(iframe_sid.as_str()), ) .await; let _ = mgr - .client + .client() .send_command_no_params("DOM.enable", Some(iframe_sid.as_str())) .await; let _ = mgr - .client + .client() .send_command_no_params("Accessibility.enable", Some(iframe_sid.as_str())) .await; if self.har_recording || self.request_tracking { let _ = mgr - .client + .client() .send_command_no_params("Network.enable", Some(iframe_sid.as_str())) .await; } @@ -637,7 +660,7 @@ impl DaemonState { for te in &drained.new_targets { if let Some(ref mut mgr) = self.browser { let attach_result: Result = mgr - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -655,7 +678,7 @@ impl DaemonState { if let Some(ref filter) = *df { let has_proxy_creds = self.proxy_credentials.read().await.is_some(); let _ = network::install_domain_filter( - &mgr.client, + &mgr.backend, &attach.session_id, &filter.allowed_domains, has_proxy_creds, @@ -1172,6 +1195,7 @@ pub async fn execute_command(cmd: &Value, state: &mut DaemonState) -> Value { return error_response( &id, &format!("Action '{}' denied by policy: {}", action, reason), + &state.engine, ); } PolicyResult::RequiresConfirmation => { @@ -1253,7 +1277,7 @@ pub async fn execute_command(cmd: &Value, state: &mut DaemonState) -> Value { state.update_stream_client().await; } if let Err(e) = auto_launch(state).await { - return error_response(&id, &format!("Auto-launch failed: {}", e)); + return error_response(&id, &format!("Auto-launch failed: {}", e), &state.engine); } } @@ -1274,6 +1298,7 @@ pub async fn execute_command(cmd: &Value, state: &mut DaemonState) -> Value { "Action '{}' is not supported on the WebDriver backend", action ), + &state.engine, ); } @@ -1436,8 +1461,12 @@ pub async fn execute_command(cmd: &Value, state: &mut DaemonState) -> Value { }; let mut resp = match result { - Ok(data) => success_response(&id, data), - Err(e) => error_response(&id, &super::browser::to_ai_friendly_error(&e)), + Ok(data) => success_response(&id, data, &state.engine), + Err(e) => error_response( + &id, + &super::browser::to_ai_friendly_error(&e), + &state.engine, + ), }; // Auto-report pending JavaScript dialog so agents know why commands may hang @@ -1494,7 +1523,7 @@ async fn connect_auto_with_fresh_tab() -> Result { mgr.tab_new(None, None).await?; let session_id = mgr.active_session_id()?.to_string(); let _ = mgr - .client + .client() .send_command("Page.bringToFront", None, Some(&session_id)) .await; Ok(mgr) @@ -1610,7 +1639,7 @@ async fn auto_launch(state: &mut DaemonState) -> Result<(), String> { if has_proxy_auth { if let Some(ref mgr) = state.browser { if let Ok(session_id) = mgr.active_session_id() { - let _ = network::install_domain_filter_fetch(&mgr.client, session_id, true).await; + let _ = network::install_domain_filter_fetch(&mgr.backend, session_id, true).await; } } } @@ -1675,7 +1704,7 @@ async fn try_auto_restore_state(state: &mut DaemonState) { if let Some(path) = state::find_auto_state_file(&session_name) { if let Some(ref mgr) = state.browser { if let Ok(session_id) = mgr.active_session_id() { - let _ = state::load_state(&mgr.client, session_id, &path).await; + let _ = state::load_state(&mgr.backend, session_id, &path).await; } } } @@ -1689,7 +1718,7 @@ async fn load_storage_state(state: &DaemonState, path: &Option) -> Resul if let Some(ref path) = path { if let Some(ref mgr) = state.browser { if let Ok(session_id) = mgr.active_session_id() { - state::load_state(&mgr.client, session_id, path).await?; + state::load_state(&mgr.backend, session_id, path).await?; } } } @@ -2023,18 +2052,18 @@ async fn handle_launch(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result { fn handle_cdp_url(state: &DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + // `cdp_url` exposes the raw CDP WebSocket endpoint for DevTools / custom + // clients to attach to. Camoufox's Juggler isn't CDP, so there's no + // WebSocket to hand back. Fail loud rather than return an empty string. + let _ = mgr.backend.require_cdp_for("cdp_url")?; Ok(json!({ "cdpUrl": mgr.get_cdp_url() })) } async fn handle_inspect(state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + // Chrome-only: the DevTools inspect proxy forwards raw CDP and has no + // Playwright/Camoufox analogue. Fail loud with an actionable error. + let cdp_client = mgr.backend.require_cdp_for("inspect (DevTools proxy)")?; + // Shut down any existing inspect server so we always target the current page if let Some(server) = state.inspect_server.take() { server.shutdown(); @@ -2251,7 +2293,7 @@ async fn handle_inspect(state: &mut DaemonState) -> Result { let target_id = mgr.active_target_id()?.to_string(); let chrome_hp = mgr.chrome_host_port().to_string(); - let proxy_handle = mgr.client.inspect_handle(); + let proxy_handle = cdp_client.inspect_handle(); let server = InspectServer::start(proxy_handle, target_id, chrome_hp).await?; let url = format!("http://127.0.0.1:{}", server.port()); @@ -2334,7 +2376,7 @@ async fn handle_close(state: &mut DaemonState) -> Result { if let Some(ref session_name) = state.session_name { if let Ok(session_id) = mgr.active_session_id() { let _ = state::save_state( - &mgr.client, + &mgr.backend, session_id, None, Some(session_name.as_str()), @@ -2392,6 +2434,23 @@ async fn handle_close(state: &mut DaemonState) -> Result { async fn handle_snapshot(cmd: &Value, state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + + // Camoufox path: the sidecar owns snapshot + ref assignment. The Rust + // side mirrors the returned `refs` map into `state.ref_map` so that + // anything on the Rust side that introspects ref metadata (diffing, + // screenshot annotation, etc.) keeps working — even though click/fill + // themselves don't use the Rust ref_map on this engine. + if mgr.backend.is_camoufox() { + state.ref_map.clear(); + let args = json!({ + "interactive": cmd.get("interactive").and_then(|v| v.as_bool()).unwrap_or(false), + "selector": cmd.get("selector").and_then(|v| v.as_str()), + }); + let result = mgr.camoufox_client().call("page.snapshot", args).await?; + mirror_camoufox_refs_into(&result, &mut state.ref_map); + return Ok(result); + } + let session_id = mgr.active_session_id()?.to_string(); let options = SnapshotOptions { @@ -2416,7 +2475,7 @@ async fn handle_snapshot(cmd: &Value, state: &mut DaemonState) -> Result Result Result { let annotate = cmd .get("annotate") @@ -2485,6 +2559,44 @@ async fn handle_screenshot(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; - let session_id = mgr.active_session_id()?.to_string(); let selector = cmd .get("selector") .and_then(|v| v.as_str()) .ok_or("Missing 'selector' parameter")?; + if mgr.backend.is_camoufox() { + let args = json!({ "selector": selector }); + return mgr.camoufox_client().call("page.getText", args).await; + } + + let session_id = mgr.active_session_id()?.to_string(); let text = super::element::get_element_text( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -2959,7 +3103,7 @@ async fn handle_getattribute(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - mgr.client + mgr.client() .send_command_no_params("Page.reload", Some(&session_id)) .await?; - let mut rx = mgr.client.subscribe(); + let mut rx = mgr.client().subscribe(); let _ = tokio::time::timeout(tokio::time::Duration::from_secs(10), async { loop { match rx.recv().await { @@ -3112,7 +3256,7 @@ async fn handle_reload(state: &mut DaemonState) -> Result { // --------------------------------------------------------------------------- async fn wait_for_selector( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, selector: &str, state: &str, @@ -3152,7 +3296,7 @@ async fn wait_for_selector( } async fn wait_for_url( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, pattern: &str, timeout_ms: u64, @@ -3165,7 +3309,7 @@ async fn wait_for_url( } async fn wait_for_text( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, text: &str, timeout_ms: u64, @@ -3178,7 +3322,7 @@ async fn wait_for_text( } async fn wait_for_function( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, fn_str: &str, timeout_ms: u64, @@ -3188,7 +3332,7 @@ async fn wait_for_function( } async fn poll_until_true( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, expression: &str, timeout_ms: u64, @@ -3246,7 +3390,7 @@ async fn handle_cookies_get(cmd: &Value, state: &DaemonState) -> Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - cookies::clear_cookies(&mgr.client, &session_id).await?; + cookies::clear_cookies(&mgr.backend, &session_id).await?; Ok(json!({ "cleared": true })) } @@ -3287,7 +3431,7 @@ async fn handle_storage_get(cmd: &Value, state: &DaemonState) -> Result Result { @@ -3302,7 +3446,7 @@ async fn handle_storage_set(cmd: &Value, state: &DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result let button = cmd.get("button").and_then(|v| v.as_str()).unwrap_or("none"); let click_count = cmd.get("clickCount").and_then(|v| v.as_i64()).unwrap_or(0); - mgr.client + mgr.client() .send_command( "Input.dispatchMouseEvent", Some(json!({ @@ -3594,7 +3738,7 @@ async fn handle_keyboard(cmd: &Value, state: &DaemonState) -> Result Result Result Result Result Result Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - native_tracing::trace_start(&mgr.client, &session_id, &mut state.tracing_state).await + native_tracing::trace_start(&mgr.backend, &session_id, &mut state.tracing_state).await } async fn handle_trace_stop(cmd: &Value, state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); let path = cmd.get("path").and_then(|v| v.as_str()); - native_tracing::trace_stop(&mgr.client, &session_id, &mut state.tracing_state, path).await + native_tracing::trace_stop(&mgr.backend, &session_id, &mut state.tracing_state, path).await } async fn handle_profiler_start(cmd: &Value, state: &mut DaemonState) -> Result { @@ -3953,7 +4106,7 @@ async fn handle_profiler_start(cmd: &Value, state: &mut DaemonState) -> Result Result Result { @@ -3996,14 +4149,14 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Capture current cookies let cookies_result = mgr - .client + .client() .send_command_no_params("Network.getAllCookies", Some(&old_session_id)) .await .ok(); // Create new browser context let ctx_result = mgr - .client + .client() .send_command_no_params("Target.createBrowserContext", None) .await?; let context_id = ctx_result @@ -4014,7 +4167,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Create page in new context let create_result: CreateTargetResult = mgr - .client + .client() .send_command_typed( "Target.createTarget", &json!({ "url": "about:blank", "browserContextId": context_id }), @@ -4023,7 +4176,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< .await?; let attach_result: AttachToTargetResult = mgr - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -4042,7 +4195,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // because Browser.setDownloadBehavior at launch only applies to the default context. if let Some(ref dl_path) = mgr.download_path { let _ = mgr - .client + .client() .send_command( "Browser.setDownloadBehavior", Some(json!({ @@ -4060,7 +4213,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Security.setIgnoreCertificateErrors at launch only applies to the session it was sent on. if mgr.ignore_https_errors { let _ = mgr - .client + .client() .send_command( "Security.setIgnoreCertificateErrors", Some(json!({ "ignore": true })), @@ -4074,7 +4227,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< if let Some(cookie_arr) = cr.get("cookies").and_then(|v| v.as_array()) { if !cookie_arr.is_empty() { let _ = mgr - .client + .client() .send_command( "Network.setCookies", Some(json!({ "cookies": cookie_arr })), @@ -4104,7 +4257,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Navigate to URL if nav_url != "about:blank" { let _ = mgr - .client + .client() .send_command( "Page.navigate", Some(json!({ "url": nav_url })), @@ -4114,7 +4267,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; } - (mgr.client.clone(), new_session_id) + (mgr.client().clone(), new_session_id) }; let result = recording::recording_start(&mut state.recording_state, path)?; @@ -4150,7 +4303,7 @@ async fn handle_recording_restart(cmd: &Value, state: &mut DaemonState) -> Resul if let Some(ref browser) = state.browser { let session_id = browser.active_session_id()?.to_string(); state - .start_recording_task(browser.client.clone(), session_id) + .start_recording_task(browser.client().clone(), session_id) .await?; } @@ -4168,7 +4321,7 @@ async fn handle_pdf(cmd: &Value, state: &DaemonState) -> Result { }); let result = mgr - .client + .client() .send_command("Page.printToPDF", Some(params), Some(&session_id)) .await?; @@ -4217,7 +4370,7 @@ async fn handle_focus(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result { - interaction::press_key_with_modifiers(&mgr.client, &session_id, "c", Some(modifier)) + interaction::press_key_with_modifiers(&mgr.backend, &session_id, "c", Some(modifier)) .await?; Ok(json!({ "copied": true })) } "paste" => { - interaction::press_key_with_modifiers(&mgr.client, &session_id, "v", Some(modifier)) + interaction::press_key_with_modifiers(&mgr.backend, &session_id, "v", Some(modifier)) .await?; Ok(json!({ "pasted": true })) } @@ -4760,7 +4913,7 @@ async fn handle_wheel(cmd: &Value, state: &DaemonState) -> Result let delta_x = cmd.get("deltaX").and_then(|v| v.as_f64()).unwrap_or(0.0); let delta_y = cmd.get("deltaY").and_then(|v| v.as_f64()).unwrap_or(0.0); - mgr.client + mgr.client() .send_command( "Input.dispatchMouseEvent", Some(json!({ @@ -4982,6 +5135,11 @@ async fn handle_stream_status(state: &DaemonState) -> Result { async fn handle_screencast_start(cmd: &Value, state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + // Screencast is a raw CDP streaming surface (``Page.startScreencast``). + // Playwright's only equivalent is video recording, which has a different + // shape (file-at-end, not frame-by-frame) — not an in-scope swap for + // agent-browser's UI contract. Camoufox will never ship this. + let _ = mgr.backend.require_cdp_for("screencast_start")?; let session_id = mgr.active_session_id()?.to_string(); if state.screencasting { @@ -5006,7 +5164,7 @@ async fn handle_screencast_start(cmd: &Value, state: &mut DaemonState) -> Result .unwrap_or(default_h as i64) as i32; stream::start_screencast( - &mgr.client, + &mgr.backend, &session_id, format, quality, @@ -5034,13 +5192,14 @@ async fn handle_screencast_start(cmd: &Value, state: &mut DaemonState) -> Result async fn handle_screencast_stop(state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + let _ = mgr.backend.require_cdp_for("screencast_stop")?; let session_id = mgr.active_session_id()?; if !state.screencasting { return Err("No screencast active".to_string()); } - stream::stop_screencast(&mgr.client, session_id).await?; + stream::stop_screencast(&mgr.backend, session_id).await?; state.screencasting = false; if let Some(ref server) = state.stream_server { @@ -5067,7 +5226,7 @@ async fn handle_waitforurl(cmd: &Value, state: &DaemonState) -> Result Result Result Result { interaction::click( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5302,7 +5461,7 @@ async fn execute_subaction( .and_then(|v| v.as_str()) .ok_or("Missing 'value' for fill subaction")?; interaction::fill( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5314,7 +5473,7 @@ async fn execute_subaction( } "check" => { interaction::check( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5325,7 +5484,7 @@ async fn execute_subaction( } "hover" => { interaction::hover( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5336,7 +5495,7 @@ async fn execute_subaction( } "text" => { let text = super::element::get_element_text( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5402,7 +5561,7 @@ async fn handle_getbyrole(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result { async fn handle_har_start(state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - mgr.client + mgr.client() .send_command_no_params("Network.enable", Some(&session_id)) .await?; // Also enable Network on cross-origin iframe sessions so their // requests are captured in the HAR output. for iframe_sid in state.iframe_sessions.values() { let _ = mgr - .client + .client() .send_command_no_params("Network.enable", Some(iframe_sid.as_str())) .await; } @@ -6489,7 +6648,7 @@ async fn har_browser_metadata(state: &DaemonState) -> Option { } let version = mgr - .client + .client() .send_command_no_params("Browser.getVersion", None) .await .ok()?; @@ -6770,7 +6929,7 @@ async fn handle_route(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "dispatched": event_type })) @@ -7567,7 +7726,7 @@ async fn handle_input_keyboard(cmd: &Value, state: &DaemonState) -> Result Result Result Result .and_then(|v| v.as_str()) .ok_or("Missing 'key' parameter")?; - mgr.client + mgr.client() .send_command( "Input.dispatchKeyEvent", Some(json!({ "type": "keyUp", "key": key })), @@ -7638,7 +7797,7 @@ async fn handle_inserttext(cmd: &Value, state: &DaemonState) -> Result Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "moved": true })) @@ -7689,7 +7848,7 @@ async fn handle_mousedown(cmd: &Value, state: &mut DaemonState) -> Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "pressed": true })) @@ -7712,7 +7871,7 @@ async fn handle_mouseup(cmd: &Value, state: &mut DaemonState) -> Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "released": true })) @@ -7722,19 +7881,21 @@ async fn handle_mouseup(cmd: &Value, state: &mut DaemonState) -> Result Value { +fn success_response(id: &str, data: Value, engine: &str) -> Value { json!({ "id": id, "success": true, "data": data, + "engine": engine, }) } -fn error_response(id: &str, error: &str) -> Value { +fn error_response(id: &str, error: &str, engine: &str) -> Value { json!({ "id": id, "success": false, "error": error, + "engine": engine, }) } @@ -7934,19 +8095,21 @@ mod tests { #[test] fn test_success_response_structure() { - let resp = success_response("cmd-1", json!({"url": "https://example.com"})); + let resp = success_response("cmd-1", json!({"url": "https://example.com"}), "chrome"); assert_eq!(resp["id"], "cmd-1"); assert_eq!(resp["success"], true); assert!(resp["data"].is_object()); assert_eq!(resp["data"]["url"], "https://example.com"); + assert_eq!(resp["engine"], "chrome"); } #[test] fn test_error_response_structure() { - let resp = error_response("cmd-2", "Something went wrong"); + let resp = error_response("cmd-2", "Something went wrong", "camoufox"); assert_eq!(resp["id"], "cmd-2"); assert_eq!(resp["success"], false); assert_eq!(resp["error"], "Something went wrong"); + assert_eq!(resp["engine"], "camoufox"); } #[tokio::test] diff --git a/cli/src/native/backend.rs b/cli/src/native/backend.rs new file mode 100644 index 000000000..e4d9295bb --- /dev/null +++ b/cli/src/native/backend.rs @@ -0,0 +1,176 @@ +//! Engine-tagged browser backend. +//! +//! `BrowserBackend` is the single point where the Rust daemon decides whether +//! it is driving Chrome/Lightpanda (CDP) or Camoufox (Playwright sidecar). Every +//! action-layer function that used to accept a bare `&CdpClient` now accepts +//! `&BrowserBackend` and dispatches on the variant; Chrome-only modules assert +//! on the `Cdp` variant at entry and surface an `engine-incompatible` error +//! when pointed at Camoufox. +//! +//! In Unit 1 the Camoufox arm is a stub: `require_cdp` returns a structured +//! `not-yet-implemented` error so agents hit a clean failure mode instead of a +//! panic. Unit 3 fills in the real sidecar client and each action's Camoufox +//! arm is fleshed out in later units. + +use std::sync::Arc; + +use serde_json::Value; +use tokio::sync::broadcast; + +use super::camoufox_client::CamoufoxClient; +use super::cdp::client::CdpClient; +use super::cdp::types::CdpEvent; + +/// The engine this daemon session is talking to. +/// +/// `Cdp` covers both Chrome and Lightpanda — they share a single CDP +/// transport. `Camoufox` wraps the Python sidecar client. +#[derive(Clone)] +pub enum BrowserBackend { + Cdp(Arc), + Camoufox(Arc), +} + +impl BrowserBackend { + /// Human-readable engine label, also used as the `"engine"` field in + /// `--json` output so callers can segment telemetry by backend. + pub fn engine_name(&self) -> &'static str { + match self { + BrowserBackend::Cdp(_) => "cdp", + BrowserBackend::Camoufox(_) => "camoufox", + } + } + + pub fn is_cdp(&self) -> bool { + matches!(self, BrowserBackend::Cdp(_)) + } + + pub fn is_camoufox(&self) -> bool { + matches!(self, BrowserBackend::Camoufox(_)) + } + + /// Return the inner CDP client, or a structured `not-yet-implemented` + /// error when the session is running on Camoufox. Action-layer functions + /// call this at the top of their body until their Camoufox arm is + /// implemented; the returned error surfaces to the CLI as a clean failure. + pub fn require_cdp(&self) -> Result<&Arc, String> { + match self { + BrowserBackend::Cdp(c) => Ok(c), + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(None)), + } + } + + /// Chrome-only subsystem entry points (`inspect_server`, `stream::cdp_loop`) + /// call this instead of `require_cdp` so the error message makes clear that + /// the feature will not work on Camoufox, rather than "not yet implemented". + pub fn require_cdp_for(&self, operation: &str) -> Result<&Arc, String> { + match self { + BrowserBackend::Cdp(c) => Ok(c), + BrowserBackend::Camoufox(_) => Err(engine_incompatible_error(operation)), + } + } + + /// Option accessor for non-`Result` contexts (e.g. sync setup paths that + /// cannot use `?`). Returns `None` on Camoufox. + pub fn cdp_opt(&self) -> Option<&Arc> { + match self { + BrowserBackend::Cdp(c) => Some(c), + BrowserBackend::Camoufox(_) => None, + } + } + + // --------------------------------------------------------------------- + // Delegating methods: mirror the handful of `CdpClient` methods that + // action-layer code calls on the backend. Each arm of the `match` is the + // "enum arm body" the plan refers to — the Cdp arm forwards to the real + // CDP client; the Camoufox arm returns `not-yet-implemented` until the + // corresponding action is wired up in a later unit. Keeping the dispatch + // at this method level (rather than at each call site) lets us lift + // function signatures from `&CdpClient` to `&BrowserBackend` without + // rewriting 130+ action-body lines. + // --------------------------------------------------------------------- + + pub async fn send_command( + &self, + method: &str, + params: Option, + session_id: Option<&str>, + ) -> Result { + match self { + BrowserBackend::Cdp(c) => c.send_command(method, params, session_id).await, + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some(method))), + } + } + + pub async fn send_command_typed( + &self, + method: &str, + params: &P, + session_id: Option<&str>, + ) -> Result { + match self { + BrowserBackend::Cdp(c) => c.send_command_typed(method, params, session_id).await, + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some(method))), + } + } + + pub async fn send_command_no_params( + &self, + method: &str, + session_id: Option<&str>, + ) -> Result { + match self { + BrowserBackend::Cdp(c) => c.send_command_no_params(method, session_id).await, + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some(method))), + } + } + + /// Subscribe to CDP-shaped events. On Camoufox this surfaces a + /// `not-yet-implemented` error; callers in the action layer already + /// propagate with `?` because they return `Result`. + pub fn subscribe(&self) -> Result, String> { + match self { + BrowserBackend::Cdp(c) => Ok(c.subscribe()), + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some("subscribe"))), + } + } +} + +impl std::fmt::Debug for BrowserBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BrowserBackend::Cdp(_) => f + .debug_struct("BrowserBackend::Cdp") + .finish_non_exhaustive(), + BrowserBackend::Camoufox(_) => f + .debug_struct("BrowserBackend::Camoufox") + .finish_non_exhaustive(), + } + } +} + +/// Structured error returned when an action reaches a Camoufox arm that +/// Unit 3+ has not filled in yet. The JSON shape is stable so downstream +/// tooling (celeria API, dashboards) can pattern-match on `code`. +pub fn not_yet_implemented_error(action: Option<&str>) -> String { + match action { + Some(a) => format!( + "not-yet-implemented: action `{}` is not yet supported on engine=camoufox", + a + ), + None => { + "not-yet-implemented: this action is not yet supported on engine=camoufox".to_string() + } + } +} + +/// Structured error for Chrome-only subsystems (raw CDP streaming, DevTools +/// inspect proxy) that will never work on Camoufox. Distinguished from +/// `not-yet-implemented` because callers can fall back to `--engine chrome` +/// but should not wait for a Camoufox implementation that isn't coming. +pub fn engine_incompatible_error(operation: &str) -> String { + format!( + "engine-incompatible: `{}` requires engine=chrome (Camoufox does not speak raw CDP)", + operation + ) +} diff --git a/cli/src/native/browser.rs b/cli/src/native/browser.rs index 28178dff7..899db9d12 100644 --- a/cli/src/native/browser.rs +++ b/cli/src/native/browser.rs @@ -5,6 +5,8 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::{broadcast, Mutex}; +use super::backend::BrowserBackend; +use super::cdp::camoufox::CamoufoxProcess; use super::cdp::chrome::{auto_connect_cdp, launch_chrome, ChromeProcess, LaunchOptions}; use super::cdp::client::CdpClient; use super::cdp::discovery::discover_cdp_url; @@ -88,6 +90,38 @@ fn validate_lightpanda_options(options: &LaunchOptions) -> Result<(), String> { Ok(()) } +/// Mirrors `validate_lightpanda_options`: rejects options that have no +/// equivalent on the Camoufox path. The sidecar re-validates launch kwargs +/// against its own allowlist, but these are shaped at the Rust launch-option +/// level and are easier to reject up-front with a clear message. +fn validate_camoufox_options(options: &LaunchOptions) -> Result<(), String> { + if options + .extensions + .as_ref() + .map(|e| !e.is_empty()) + .unwrap_or(false) + { + return Err("Extensions are not supported with Camoufox".to_string()); + } + if options.profile.is_some() { + return Err("Profiles are not supported with Camoufox".to_string()); + } + if options.storage_state.is_some() { + return Err("Storage state is not supported with Camoufox".to_string()); + } + if options.allow_file_access { + return Err("File access is not supported with Camoufox".to_string()); + } + if !options.args.is_empty() { + return Err( + "Custom Chrome arguments (--args) are not supported with Camoufox; \ + pass engine-specific kwargs through the sidecar config instead." + .to_string(), + ); + } + Ok(()) +} + /// Returns true for Chrome internal targets that should not be selected /// during auto-connect (e.g. chrome://, chrome-extension://, devtools://). fn is_internal_chrome_target(url: &str) -> bool { @@ -265,6 +299,10 @@ impl WaitUntil { pub enum BrowserProcess { Chrome(ChromeProcess), Lightpanda(LightpandaProcess), + /// Stub variant for engine=camoufox. Unit 1 never constructs one — launch + /// returns a not-yet-implemented error first — but the variant exists so + /// the enum is total once Unit 3 wires in the real sidecar subprocess. + Camoufox(CamoufoxProcess), } impl BrowserProcess { @@ -272,6 +310,7 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.kill(), BrowserProcess::Lightpanda(p) => p.kill(), + BrowserProcess::Camoufox(p) => p.kill(), } } @@ -279,6 +318,7 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.wait_or_kill(timeout), BrowserProcess::Lightpanda(p) => p.kill(), + BrowserProcess::Camoufox(p) => p.wait_or_kill(timeout), } } @@ -287,12 +327,16 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.has_exited(), BrowserProcess::Lightpanda(_) => false, + BrowserProcess::Camoufox(p) => p.has_exited(), } } } pub struct BrowserManager { - pub client: Arc, + /// Engine-tagged backend. Either a `Cdp` arm (Chrome / Lightpanda) or a + /// `Camoufox` arm (Python sidecar). All engine-specific transport lives + /// here; the rest of the struct is engine-agnostic. + pub backend: BrowserBackend, browser_process: Option, ws_url: String, pages: Vec, @@ -315,6 +359,37 @@ const LIGHTPANDA_CDP_CONNECT_POLL_INTERVAL: Duration = Duration::from_millis(100 const LIGHTPANDA_TARGET_INIT_TIMEOUT: Duration = Duration::from_secs(10); impl BrowserManager { + /// CDP client accessor. Most internal `BrowserManager` methods still + /// assume a CDP backend (Chrome/Lightpanda); this helper centralises the + /// lookup so a future conversion of those methods to `backend`-dispatch + /// only has to touch one site. + /// + /// Panics if called on a Camoufox backend. Callers on mixed-engine paths + /// must guard with `self.backend.require_cdp()?` (or `.is_cdp()`) first; + /// reaching this accessor on Camoufox is a programmer bug, not a runtime + /// failure mode. + pub fn client(&self) -> &Arc { + match &self.backend { + BrowserBackend::Cdp(c) => c, + BrowserBackend::Camoufox(_) => panic!( + "BrowserManager::client() called on Camoufox backend; use self.backend.require_cdp()? first" + ), + } + } + + /// Camoufox sidecar client accessor. Symmetric with `client()` above; + /// panics on a CDP backend. Camoufox-specific code paths in this module + /// and in `actions.rs` (e.g. handle_snapshot's Camoufox arm) use this + /// after a `backend.is_camoufox()` check. + pub fn camoufox_client(&self) -> &Arc { + match &self.backend { + BrowserBackend::Camoufox(c) => c, + BrowserBackend::Cdp(_) => panic!( + "BrowserManager::camoufox_client() called on CDP backend; check backend.is_camoufox() first" + ), + } + } + pub async fn launch(options: LaunchOptions, engine: Option<&str>) -> Result { let engine = engine.unwrap_or("chrome"); @@ -332,9 +407,12 @@ impl BrowserManager { "lightpanda" => { validate_lightpanda_options(&options)?; } + "camoufox" => { + validate_camoufox_options(&options)?; + } _ => { return Err(format!( - "Unknown engine '{}'. Supported engines: chrome, lightpanda", + "Unknown engine '{}'. Supported engines: chrome, lightpanda, camoufox", engine )); } @@ -346,6 +424,14 @@ impl BrowserManager { let download_path = options.download_path.clone(); let stealth = options.stealth; + if engine == "camoufox" { + // Camoufox has its own process lifecycle (Python sidecar driving + // a Playwright/Camoufox browser) and doesn't share the CDP + // WebSocket path. Return early with a fully-constructed + // BrowserManager from the sidecar. + return initialize_camoufox_manager(&options).await; + } + let (ws_url, process) = match engine { "lightpanda" => { let lp_options = LightpandaLaunchOptions { @@ -370,8 +456,9 @@ impl BrowserManager { initialize_lightpanda_manager(ws_url, process).await? } else { let client = Arc::new(CdpClient::connect(&ws_url).await?); + let backend = BrowserBackend::Cdp(client.clone()); let mut manager = Self { - client, + backend, browser_process: Some(process), ws_url, pages: Vec::new(), @@ -391,7 +478,7 @@ impl BrowserManager { if ignore_https_errors { let _ = manager - .client + .client() .send_command( "Security.setIgnoreCertificateErrors", Some(json!({ "ignore": true })), @@ -402,7 +489,7 @@ impl BrowserManager { if let Some(ref ua) = user_agent { let _ = manager - .client + .client() .send_command( "Emulation.setUserAgentOverride", Some(json!({ "userAgent": ua })), @@ -413,7 +500,7 @@ impl BrowserManager { if let Some(ref scheme) = color_scheme { let _ = manager - .client + .client() .send_command( "Emulation.setEmulatedMedia", Some(json!({ "features": [{ "name": "prefers-color-scheme", "value": scheme }] })), @@ -424,7 +511,7 @@ impl BrowserManager { if let Some(ref path) = download_path { let _ = manager - .client + .client() .send_command( "Browser.setDownloadBehavior", Some(json!({ "behavior": "allow", "downloadPath": path })), @@ -460,11 +547,12 @@ impl BrowserManager { ) -> Result { let ws_url = resolve_cdp_url(url).await?; let client = Arc::new(CdpClient::connect_with_headers(&ws_url, headers).await?); + let backend = BrowserBackend::Cdp(client); let stealth = std::env::var("AGENT_BROWSER_STEALTH") .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True")) .unwrap_or(false); let mut manager = Self { - client, + backend, browser_process: None, ws_url, pages: Vec::new(), @@ -502,7 +590,7 @@ impl BrowserManager { } async fn discover_and_attach_targets(&mut self) -> Result<(), String> { - self.client + self.client() .send_command_typed::<_, Value>( "Target.setDiscoverTargets", &SetDiscoverTargetsParams { discover: true }, @@ -511,7 +599,7 @@ impl BrowserManager { .await?; let result: GetTargetsResult = self - .client + .client() .send_command_typed("Target.getTargets", &json!({}), None) .await?; @@ -524,7 +612,7 @@ impl BrowserManager { if page_targets.is_empty() { // Create a new tab let result: CreateTargetResult = self - .client + .client() .send_command_typed( "Target.createTarget", &CreateTargetParams { @@ -535,7 +623,7 @@ impl BrowserManager { .await?; let attach_result: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -562,7 +650,7 @@ impl BrowserManager { } else { for target in &page_targets { let attach_result: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -599,12 +687,12 @@ impl BrowserManager { } async fn enable_domains(&self, session_id: &str) -> Result<(), String> { - self.client + self.client() .send_command_no_params("Page.enable", Some(session_id)) .await?; if self.stealth { let _ = self - .client + .client() .send_command( "Page.addScriptToEvaluateOnNewDocument", Some(json!({ "source": super::stealth::STEALTH_INIT_SCRIPT })), @@ -612,24 +700,24 @@ impl BrowserManager { ) .await; } - self.client + self.client() .send_command_no_params("Runtime.enable", Some(session_id)) .await?; // Resume the target if it is paused waiting for the debugger. // This is needed for real browser sessions (Chrome 144+) where targets // are paused after attach until explicitly resumed. No-op otherwise. let _ = self - .client + .client() .send_command_no_params("Runtime.runIfWaitingForDebugger", Some(session_id)) .await; - self.client + self.client() .send_command_no_params("Network.enable", Some(session_id)) .await?; // Enable auto-attach for cross-origin iframe support. // flatten: true gives each iframe its own session_id. // Ignored on engines that don't support it (e.g. Lightpanda). let _ = self - .client + .client() .send_command( "Target.setAutoAttach", Some(json!({ @@ -645,12 +733,12 @@ impl BrowserManager { /// Enable domains on a direct page connection (no session_id needed). async fn enable_domains_direct(&self) -> Result<(), String> { - self.client + self.client() .send_command_no_params("Page.enable", None) .await?; if self.stealth { let _ = self - .client + .client() .send_command( "Page.addScriptToEvaluateOnNewDocument", Some(json!({ "source": super::stealth::STEALTH_INIT_SCRIPT })), @@ -658,14 +746,14 @@ impl BrowserManager { ) .await; } - self.client + self.client() .send_command_no_params("Runtime.enable", None) .await?; let _ = self - .client + .client() .send_command_no_params("Runtime.runIfWaitingForDebugger", None) .await; - self.client + self.client() .send_command_no_params("Network.enable", None) .await?; Ok(()) @@ -679,11 +767,14 @@ impl BrowserManager { } pub async fn navigate(&mut self, url: &str, wait_until: WaitUntil) -> Result { + if self.backend.is_camoufox() { + return self.camoufox_navigate(url, wait_until).await; + } let session_id = self.active_session_id()?.to_string(); - let mut lifecycle_rx = self.client.subscribe(); + let mut lifecycle_rx = self.client().subscribe(); let nav_result: PageNavigateResult = self - .client + .client() .send_command_typed( "Page.navigate", &PageNavigateParams { @@ -725,6 +816,69 @@ impl BrowserManager { Ok(json!({ "url": page_url, "title": title })) } + /// Camoufox path for `navigate`: delegates to the sidecar's `page.goto` + /// command. In Unit 3 the response carried `{url, title}` so the CLI + /// output shape matches the Chrome path; Unit 5 adds tab routing — the + /// first navigate with no active tab allocates ``t1`` through the + /// Rust-side counter so subsequent tabs don't collide. + async fn camoufox_navigate( + &mut self, + url: &str, + wait_until: WaitUntil, + ) -> Result { + // Ensure at least one tab exists, allocating its id through the + // authoritative Rust counter. Without this the sidecar would have + // to mint its own "t1", and the "tab ids never reuse" guarantee + // would depend on two counters staying in lockstep across a + // process boundary. + if self.active_page_index >= self.pages.len() { + self.camoufox_tab_new(None, None).await?; + } + let tab_id_str = format_tab_id(self.pages[self.active_page_index].tab_id); + + let wait_until_str = match wait_until { + WaitUntil::Load => "load", + WaitUntil::DomContentLoaded => "domcontentloaded", + WaitUntil::NetworkIdle => "networkidle", + WaitUntil::None => "none", + }; + let args = json!({ + "url": url, + "waitUntil": wait_until_str, + "tabId": tab_id_str, + }); + let result = self.camoufox_client().call("page.goto", args).await?; + + let page_url = result + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or(url) + .to_string(); + let title = result + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + + if let Ok(parsed) = url::Url::parse(&page_url) { + let origin = parsed.origin().ascii_serialization(); + if origin != "null" { + self.visited_origins.insert(origin); + } + } + + // Keep Rust-side bookkeeping honest so ``tab_list`` / external state + // queries see the current URL + title. Without this the PageInfo + // entry minted in ``camoufox_tab_new`` stays frozen at its + // registration-time values, silently diverging from reality. + if let Some(page) = self.pages.get_mut(self.active_page_index) { + page.url = page_url.clone(); + page.title = title.clone(); + } + + Ok(json!({ "url": page_url, "title": title })) + } + async fn wait_for_lifecycle( &self, wait_until: WaitUntil, @@ -790,7 +944,7 @@ impl BrowserManager { let session_id = self.active_session_id()?.to_string(); let result: EvaluateResult = self - .client + .client() .send_command_typed( "Runtime.evaluate", &EvaluateParams { @@ -823,18 +977,34 @@ impl BrowserManager { wait_until: WaitUntil, session_id: &str, ) -> Result<(), String> { - let mut rx = self.client.subscribe(); + let mut rx = self.client().subscribe(); self.wait_for_lifecycle(wait_until, session_id, &mut rx) .await } pub async fn close(&mut self) -> Result<(), String> { + if self.backend.is_camoufox() { + // Graceful path: tell the sidecar to close its browser and exit. + // The sidecar acknowledges the `close` command and then drops its + // shutdown event; we still need to wait on the child process + // separately (see CamoufoxProcess::wait_or_kill below). + let _ = self.camoufox_client().close().await; + if let Some(mut process) = self.browser_process.take() { + let timeout = std::time::Duration::from_secs(5); + let _ = tokio::task::spawn_blocking(move || { + process.wait_or_kill(timeout); + }) + .await; + } + return Ok(()); + } + if self.browser_process.is_some() { // Only send Browser.close when we launched the browser ourselves. // For external connections (--auto-connect, --cdp) we just disconnect // without shutting down the user's browser. let _ = self - .client + .client() .send_command_no_params("Browser.close", None) .await; } @@ -861,10 +1031,17 @@ impl BrowserManager { /// Checks if the CDP connection is alive by sending a simple command. /// Returns false if the command times out or fails. pub async fn is_connection_alive(&self) -> bool { + // Camoufox has no CDP connection; we use the sidecar child liveness + // as the proxy for "connection alive". If the handshake completed + // and the child hasn't been reaped, consider the session healthy. + if self.backend.is_camoufox() { + return self.browser_process.is_some(); + } + let timeout = tokio::time::Duration::from_secs(3); let result = tokio::time::timeout( timeout, - self.client + self.client() .send_command_no_params("Browser.getVersion", None), ) .await; @@ -919,8 +1096,16 @@ impl BrowserManager { return Ok(()); } + // Camoufox manages its page set via Playwright inside the sidecar; + // tab ids sync in Unit 5. In Unit 3 we short-circuit so the open + // flow (which calls `ensure_page` before navigate) doesn't panic — + // `camoufox_navigate` creates the page lazily in the sidecar. + if self.backend.is_camoufox() { + return Ok(()); + } + let result: CreateTargetResult = self - .client + .client() .send_command_typed( "Target.createTarget", &CreateTargetParams { @@ -931,7 +1116,7 @@ impl BrowserManager { .await?; let attach_result: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -1058,7 +1243,7 @@ impl BrowserManager { let target_url = url.unwrap_or("about:blank"); let result: CreateTargetResult = self - .client + .client() .send_command_typed( "Target.createTarget", &CreateTargetParams { @@ -1069,7 +1254,7 @@ impl BrowserManager { .await?; let attach: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -1120,7 +1305,7 @@ impl BrowserManager { // Bring tab to front let _ = self - .client + .client() .send_command("Page.bringToFront", None, Some(&session_id)) .await; @@ -1157,7 +1342,7 @@ impl BrowserManager { let closed_tab_id = page.tab_id; let closed_label = page.label.clone(); let _ = self - .client + .client() .send_command_typed::<_, Value>( "Target.closeTarget", &CloseTargetParams { @@ -1189,7 +1374,7 @@ impl BrowserManager { mobile: bool, ) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setDeviceMetricsOverride", Some(json!({ @@ -1206,7 +1391,7 @@ impl BrowserManager { // viewport, so resize the content area to match. if let Ok(target_id) = self.active_target_id() { if let Ok(window_info) = self - .client + .client() .send_command( "Browser.getWindowForTarget", Some(json!({ "targetId": target_id })), @@ -1216,7 +1401,7 @@ impl BrowserManager { { if let Some(window_id) = window_info.get("windowId").and_then(|v| v.as_i64()) { if let Err(e) = self - .client + .client() .send_command( "Browser.setContentsSize", Some(json!({ @@ -1239,7 +1424,7 @@ impl BrowserManager { pub async fn set_user_agent(&self, user_agent: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setUserAgentOverride", Some(json!({ "userAgent": user_agent })), @@ -1266,7 +1451,7 @@ impl BrowserManager { .collect(); params["features"] = Value::Array(features_arr); } - self.client + self.client() .send_command("Emulation.setEmulatedMedia", Some(params), Some(session_id)) .await?; Ok(()) @@ -1274,7 +1459,7 @@ impl BrowserManager { pub async fn bring_to_front(&self) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command("Page.bringToFront", None, Some(session_id)) .await?; Ok(()) @@ -1282,7 +1467,7 @@ impl BrowserManager { pub async fn set_timezone(&self, timezone_id: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setTimezoneOverride", Some(json!({ "timezoneId": timezone_id })), @@ -1294,7 +1479,7 @@ impl BrowserManager { pub async fn set_locale(&self, locale: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setLocaleOverride", Some(json!({ "locale": locale })), @@ -1311,7 +1496,7 @@ impl BrowserManager { accuracy: Option, ) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setGeolocationOverride", Some(json!({ @@ -1326,7 +1511,7 @@ impl BrowserManager { } pub async fn grant_permissions(&self, permissions: &[String]) -> Result<(), String> { - self.client + self.client() .send_command( "Browser.grantPermissions", Some(json!({ "permissions": permissions })), @@ -1346,7 +1531,7 @@ impl BrowserManager { if let Some(text) = prompt_text { params["promptText"] = Value::String(text.to_string()); } - self.client + self.client() .send_command( "Page.handleJavaScriptDialog", Some(params), @@ -1365,12 +1550,17 @@ impl BrowserManager { ) -> Result<(), String> { let session_id = self.active_session_id()?; - let (object_id, effective_session_id) = - resolve_element_object_id(&self.client, session_id, ref_map, selector, iframe_sessions) - .await?; + let (object_id, effective_session_id) = resolve_element_object_id( + &self.backend, + session_id, + ref_map, + selector, + iframe_sessions, + ) + .await?; let describe: Value = self - .client + .client() .send_command( "DOM.describeNode", Some(json!({ "objectId": object_id })), @@ -1384,7 +1574,7 @@ impl BrowserManager { .and_then(|v| v.as_i64()) .ok_or("Could not get backendNodeId for file input")?; - self.client + self.client() .send_command( "DOM.setFileInputFiles", Some(json!({ @@ -1401,7 +1591,7 @@ impl BrowserManager { pub async fn add_script_to_evaluate(&self, source: &str) -> Result { let session_id = self.active_session_id()?; let result = self - .client + .client() .send_command( "Page.addScriptToEvaluateOnNewDocument", Some(json!({ "source": source })), @@ -1488,7 +1678,7 @@ impl BrowserManager { pub async fn set_download_behavior(&self, download_path: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Browser.setDownloadBehavior", Some(json!({ @@ -1501,6 +1691,172 @@ impl BrowserManager { .await?; Ok(()) } + + // ----------------------------------------------------------------------- + // Camoufox tab management (Unit 5) + // + // Tab ID coordination: the Rust side owns ``next_tab_id`` and hands + // each freshly assigned ``t`` string down to the sidecar. The sidecar + // stores Playwright ``Page`` objects keyed by that string. This keeps + // "tab ids never reuse" as a single invariant with one authoritative + // counter; the alternative (sidecar-assigned ids relayed back up) + // splits that invariant across a process boundary for no gain. + // ----------------------------------------------------------------------- + + /// Camoufox analogue of ``tab_new`` — delegates to the sidecar, which + /// creates a Playwright ``Page`` and registers it under the ``t`` + /// string the Rust side assigned. Keeps ``self.pages`` in sync so the + /// rest of the daemon (``tab_list``, ``resolve_tab_ref``, ``has_tab_id``) + /// stays engine-agnostic. + pub async fn camoufox_tab_new( + &mut self, + url: Option<&str>, + label: Option<&str>, + ) -> Result { + if let Some(label) = label { + if !is_valid_label(label) { + return Err(format!( + "Invalid tab label `{}`; labels must start with a letter and contain only \ + letters, digits, `-`, and `_`", + label + )); + } + if self.has_label(label) { + return Err(format!( + "Label `{}` is already used by another tab; labels must be unique within a \ + session", + label + )); + } + } + + let tab_id = self.next_tab_id; + self.next_tab_id += 1; + let tab_id_str = format_tab_id(tab_id); + + let mut args = json!({ "tabId": tab_id_str }); + if let Some(u) = url { + args["url"] = json!(u); + } + + // Roll the counter back on sidecar failure so we don't burn an id + // that never corresponded to a live tab. The counter bump is cheap + // but "tab ids never reuse" is a guarantee agents rely on, so the + // failure path must not leave `next_tab_id` pointing past a gap + // that no agent will ever see. + let result = match self.camoufox_client().call("tab.new", args).await { + Ok(v) => v, + Err(e) => { + self.next_tab_id -= 1; + return Err(e); + } + }; + + let current_url = result + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let title = result + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let index = self.pages.len(); + let label_owned = label.map(|s| s.to_string()); + self.pages.push(PageInfo { + tab_id, + label: label_owned.clone(), + target_id: tab_id_str.clone(), // sidecar doesn't expose CDP targets; reuse id + session_id: String::new(), + url: current_url.clone(), + title, + target_type: "page".to_string(), + }); + self.active_page_index = index; + + Ok(json!({ + "tabId": tab_id_str, + "label": label_owned, + "url": current_url, + "total": self.pages.len(), + })) + } + + pub async fn camoufox_tab_switch(&mut self, tab_id: u32) -> Result { + let index = self + .pages + .iter() + .position(|p| p.tab_id == tab_id) + .ok_or_else(|| format!("Tab ID {} not found", format_tab_id(tab_id)))?; + let tab_id_str = format_tab_id(tab_id); + let result = self + .camoufox_client() + .call("tab.switch", json!({ "tabId": tab_id_str })) + .await?; + self.active_page_index = index; + let url = result + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let title = result + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + if let Some(page) = self.pages.get_mut(index) { + page.url = url.clone(); + page.title = title.clone(); + } + let label = self.pages[index].label.clone(); + Ok(json!({ + "tabId": tab_id_str, + "label": label, + "url": url, + "title": title, + })) + } + + pub async fn camoufox_tab_close(&mut self, tab_id: Option) -> Result { + let target_index = match tab_id { + Some(id) => self + .pages + .iter() + .position(|p| p.tab_id == id) + .ok_or_else(|| format!("Tab ID {} not found", format_tab_id(id)))?, + None => self.active_page_index, + }; + if target_index >= self.pages.len() { + return Err(format!("Tab index {} out of range", target_index)); + } + if self.pages.len() <= 1 { + // Match the Chrome path's behavior: refuse to close the last + // tab via `tab close`. Agents that want the "browser is gone" + // state call `close` instead, which tears down both the sidecar + // and the Playwright browser through a single, tested path. + return Err("Cannot close the last tab".to_string()); + } + let page = &self.pages[target_index]; + let closed_tab_id = page.tab_id; + let closed_tab_id_str = format_tab_id(closed_tab_id); + let closed_label = page.label.clone(); + // Tell the sidecar first; only drop our Rust-side bookkeeping once + // the Playwright page.close() has actually succeeded, otherwise we + // end up with a phantom page in the sidecar. + let _ = self + .camoufox_client() + .call("tab.close", json!({ "tabId": closed_tab_id_str })) + .await?; + self.pages.remove(target_index); + self.update_active_page_after_removal(target_index); + Ok(json!({ + "tabId": closed_tab_id_str, + "label": closed_label, + "closed": true, + })) + } } /// Core network-idle polling loop, extracted so it can be unit-tested without a @@ -1598,6 +1954,56 @@ async fn connect_cdp_with_retry( } } +async fn initialize_camoufox_manager(options: &LaunchOptions) -> Result { + use super::cdp::camoufox::{launch_camoufox_sidecar, CamoufoxLaunchOptions}; + + let mut extra = serde_json::Map::new(); + if let Some(ref scheme) = options.color_scheme { + // Camoufox accepts `locale` / various knobs; colour-scheme is exposed + // as a kwarg-level hint that the sidecar forwards to Playwright's + // BrowserContext options. The sidecar validates the key against its + // allowlist, so the launch fails cleanly if the kwarg is not supported. + extra.insert("color_scheme".to_string(), json!(scheme)); + } + if options.ignore_https_errors { + extra.insert("ignore_https_errors".to_string(), json!(true)); + } + + let cf_options = CamoufoxLaunchOptions { + headless: options.headless, + executable_path: options.executable_path.clone(), + proxy: options.proxy.as_ref().map(|s| json!({ "server": s })), + extra, + }; + + if options.stealth { + eprintln!( + "[agent-browser] warning: --stealth is redundant with --engine camoufox; \ + Camoufox's C++ stealth supersedes the JS init script — proceeding without \ + injected scripts." + ); + } + + let (process, client) = launch_camoufox_sidecar(&cf_options).await?; + let backend = BrowserBackend::Camoufox(client); + + Ok(BrowserManager { + backend, + browser_process: Some(BrowserProcess::Camoufox(process)), + ws_url: String::new(), + pages: Vec::new(), + active_page_index: 0, + default_timeout_ms: 25_000, + download_path: options.download_path.clone(), + ignore_https_errors: options.ignore_https_errors, + visited_origins: HashSet::new(), + next_tab_id: 1, + // Camoufox supersedes JS-injection stealth. Always false so + // `enable_domains` never tries to inject `STEALTH_INIT_SCRIPT`. + stealth: false, + }) +} + async fn initialize_lightpanda_manager( ws_url: String, process: BrowserProcess, @@ -1623,8 +2029,10 @@ async fn initialize_lightpanda_manager( } }; + let client = Arc::new(client); + let backend = BrowserBackend::Cdp(client); let mut manager = BrowserManager { - client: Arc::new(client), + backend, browser_process: None, ws_url: ws_url.clone(), pages: Vec::new(), diff --git a/cli/src/native/camoufox_client.rs b/cli/src/native/camoufox_client.rs new file mode 100644 index 000000000..4ed77ceae --- /dev/null +++ b/cli/src/native/camoufox_client.rs @@ -0,0 +1,470 @@ +//! Client for the Camoufox Python sidecar. +//! +//! Speaks the JSON-line protocol documented in +//! `packages/camoufox-sidecar/camoufox_sidecar/protocol.py`: +//! +//! ```text +//! request: {"id": N, "cmd": "", "args": {...}} +//! response: {"id": N, "ok": true, "result": {...}} +//! {"id": N, "ok": false, "error": {"code": "...", "message": "..."}} +//! event: {"event": "", "data": {...}} +//! ``` +//! +//! The client owns the subprocess's stdin (writer) and stdout (reader task). +//! A monotonic request id plus a pending `HashMap` +//! demultiplexes responses back to the matching `call`. Asynchronous frames +//! (the `ready` event carries the sidecar pid; other events like +//! `page.console` are forwarded in later units) fan out on a broadcast +//! channel that callers can `subscribe` to. +//! +//! Errors from the sidecar arrive as `{code, message}` objects; we surface +//! them as `": "` strings to match the rest of the Rust +//! daemon's `Result<_, String>` convention. The error-code catalog the +//! sidecar may emit today is: +//! +//! - `invalid-frame` — malformed JSON on the wire +//! - `not-yet-supported` — unknown command (post-Unit 3 this is the +//! dominant failure while more commands are ported in Units 4–5) +//! - `invalid-args` — well-formed frame with a bad args shape +//! - `unknown-launch-option` — launch kwarg not on the sidecar's allowlist +//! - `unsupported-launch-option` — explicitly rejected launch kwarg +//! (`persistent_context`, `user_data_dir`) +//! - `already-launched` — second `launch` without a `close` +//! - `camoufox-not-installed` — `import camoufox` failed or binary missing +//! - `launch-failed` — any other launch-time failure +//! - `internal-error` — uncaught exception inside a handler + +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use serde_json::{json, Value}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::process::{ChildStdin, ChildStdout}; +use tokio::sync::{broadcast, oneshot}; + +const EVENT_CHANNEL_CAPACITY: usize = 64; +const DEFAULT_CALL_TIMEOUT: Duration = Duration::from_secs(30); +const CLOSE_TIMEOUT: Duration = Duration::from_secs(5); + +/// A named event forwarded from the sidecar. Callers that don't care about +/// events can simply never subscribe; the broadcast channel is bounded, so a +/// slow consumer that lags drops old events rather than backpressuring the +/// reader. +#[derive(Debug, Clone)] +pub struct CamoufoxEvent { + pub name: String, + pub data: Value, +} + +type PendingMap = Arc>>>>; + +/// Sidecar client. Cheap to clone via `Arc`; construct once per session. +pub struct CamoufoxClient { + writer: tokio::sync::Mutex, + pending: PendingMap, + next_id: AtomicU64, + events: broadcast::Sender, + /// Signals the reader loop to shut down on Drop. + shutdown: Arc, + _reader: std::sync::Mutex>>, +} + +impl CamoufoxClient { + /// Consume the sidecar's stdin/stdout handles, wait for the `ready` + /// event (with the reported pid), and spawn the background reader task. + /// On success returns `(Arc, Option)`. The pid is + /// `None` only if the sidecar omits it — older sidecars may, but current + /// ones always attach it. + pub async fn start( + stdin: ChildStdin, + stdout: ChildStdout, + ready_timeout: Duration, + ) -> Result<(Arc, Option), String> { + let mut reader = BufReader::new(stdout); + + // Read the first frame: must be the `ready` event. Anything else is + // either a protocol bug on the sidecar side or a premature exit, + // both of which we treat as a readiness failure so the error is + // actionable. + let ready_line = tokio::time::timeout(ready_timeout, read_one_nonblank_line(&mut reader)) + .await + .map_err(|_| { + format!( + "timed out after {}ms waiting for camoufox-sidecar `ready` event", + ready_timeout.as_millis() + ) + })? + .map_err(|e| format!("reading first sidecar frame: {}", e))?; + + let ready_frame: Value = serde_json::from_str(&ready_line).map_err(|e| { + format!( + "first sidecar frame was not valid JSON: {} (frame: {:?})", + e, ready_line + ) + })?; + let pid = parse_ready_frame(&ready_frame)?; + + let (events_tx, _) = broadcast::channel(EVENT_CHANNEL_CAPACITY); + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let shutdown = Arc::new(tokio::sync::Notify::new()); + + let reader_task = + spawn_reader(reader, pending.clone(), events_tx.clone(), shutdown.clone()); + + let client = Arc::new(Self { + writer: tokio::sync::Mutex::new(stdin), + pending, + next_id: AtomicU64::new(1), + events: events_tx, + shutdown, + _reader: std::sync::Mutex::new(Some(reader_task)), + }); + Ok((client, pid)) + } + + /// Send a request and await the response. Times out after + /// `DEFAULT_CALL_TIMEOUT` so a misbehaving sidecar cannot wedge a caller + /// indefinitely. + pub async fn call(&self, cmd: &str, args: Value) -> Result { + self.call_with_timeout(cmd, args, DEFAULT_CALL_TIMEOUT) + .await + } + + pub async fn call_with_timeout( + &self, + cmd: &str, + args: Value, + timeout: Duration, + ) -> Result { + let id = self.next_id.fetch_add(1, Ordering::SeqCst); + + let (tx, rx) = oneshot::channel(); + { + let mut pending = self.pending.lock().expect("camoufox pending map poisoned"); + pending.insert(id, tx); + } + + let frame = json!({ "id": id, "cmd": cmd, "args": args }); + let serialized = serde_json::to_string(&frame).expect("frame serializes"); + { + let mut writer = self.writer.lock().await; + writer + .write_all(serialized.as_bytes()) + .await + .map_err(|e| format!("sending `{}` to camoufox-sidecar: {}", cmd, e))?; + writer + .write_all(b"\n") + .await + .map_err(|e| format!("sending newline to camoufox-sidecar: {}", e))?; + writer + .flush() + .await + .map_err(|e| format!("flushing camoufox-sidecar stdin: {}", e))?; + } + + match tokio::time::timeout(timeout, rx).await { + Ok(Ok(result)) => result, + Ok(Err(_canceled)) => { + self.drop_pending(id); + Err(format!( + "camoufox-sidecar dropped response for `{}` (reader task exited)", + cmd + )) + } + Err(_) => { + self.drop_pending(id); + Err(format!( + "camoufox-sidecar `{}` timed out after {}ms", + cmd, + timeout.as_millis() + )) + } + } + } + + fn drop_pending(&self, id: u64) { + if let Ok(mut pending) = self.pending.lock() { + pending.remove(&id); + } + } + + /// Subscribe to events emitted by the sidecar. New subscribers only see + /// events sent after they subscribe (consistent with `tokio::broadcast` + /// semantics); the `ready` event is consumed during `start()` and does + /// not reach subscribers. + pub fn subscribe(&self) -> broadcast::Receiver { + self.events.subscribe() + } + + /// Send the `close` command and wait for its response. Note that + /// receiving the response does NOT imply the sidecar process has + /// exited — the sidecar sets its shutdown event right after responding, + /// and the OS-level reap happens shortly after. Callers must wait on the + /// process handle separately if they need that guarantee (see + /// `CamoufoxProcess::wait_or_kill`). + pub async fn close(&self) -> Result { + self.call_with_timeout("close", json!({}), CLOSE_TIMEOUT) + .await + } +} + +impl std::fmt::Debug for CamoufoxClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CamoufoxClient").finish_non_exhaustive() + } +} + +impl Drop for CamoufoxClient { + fn drop(&mut self) { + // Signal the reader task to exit and fail any in-flight calls so + // awaiters get a clean error instead of hanging forever. + self.shutdown.notify_waiters(); + if let Ok(mut pending) = self.pending.lock() { + for (_, tx) in pending.drain() { + let _ = tx.send(Err( + "camoufox-sidecar client dropped while request was in flight".to_string(), + )); + } + } + if let Ok(mut slot) = self._reader.lock() { + if let Some(handle) = slot.take() { + handle.abort(); + } + } + } +} + +async fn read_one_nonblank_line( + reader: &mut R, +) -> std::io::Result { + loop { + let mut buf = String::new(); + let n = reader.read_line(&mut buf).await?; + if n == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "camoufox-sidecar stdout closed before first frame", + )); + } + let trimmed = buf.trim_end_matches(['\r', '\n']).to_string(); + if trimmed.trim().is_empty() { + continue; + } + return Ok(trimmed); + } +} + +fn parse_ready_frame(frame: &Value) -> Result, String> { + let event = frame + .get("event") + .and_then(|v| v.as_str()) + .ok_or_else(|| format!("expected first frame to be a ready event, got: {}", frame))?; + if event != "ready" { + return Err(format!( + "expected first frame to be `ready`, got event `{}`", + event + )); + } + let pid = frame + .get("data") + .and_then(|d| d.get("pid")) + .and_then(|v| v.as_u64()) + .and_then(|v| u32::try_from(v).ok()); + Ok(pid) +} + +fn spawn_reader( + mut reader: BufReader, + pending: PendingMap, + events: broadcast::Sender, + shutdown: Arc, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let mut line = String::new(); + loop { + line.clear(); + let read = tokio::select! { + biased; + _ = shutdown.notified() => { return; } + r = reader.read_line(&mut line) => r, + }; + match read { + Ok(0) => break, // stdout closed → sidecar exited + Ok(_) => { + let trimmed = line.trim_end_matches(['\r', '\n']).trim(); + if trimmed.is_empty() { + continue; + } + dispatch_frame(trimmed, &pending, &events); + } + Err(_) => break, + } + } + // Sidecar exited: fail every pending call so callers don't hang. + if let Ok(mut p) = pending.lock() { + for (_, tx) in p.drain() { + let _ = tx.send(Err( + "camoufox-sidecar closed stdout before responding".to_string() + )); + } + } + }) +} + +fn dispatch_frame(line: &str, pending: &PendingMap, events: &broadcast::Sender) { + let frame: Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(e) => { + eprintln!( + "[agent-browser] camoufox-sidecar sent malformed JSON on stdout: {} ({:?})", + e, line + ); + return; + } + }; + + if let Some(event) = frame.get("event").and_then(|v| v.as_str()) { + let data = frame.get("data").cloned().unwrap_or(Value::Null); + // Ignore send errors: if no subscribers are attached we just drop the + // event, which is the intended behavior. + let _ = events.send(CamoufoxEvent { + name: event.to_string(), + data, + }); + return; + } + + let Some(id) = frame.get("id").and_then(|v| v.as_u64()) else { + // Responses must carry an id; if not, log and drop. + eprintln!( + "[agent-browser] camoufox-sidecar response had no id: {:?}", + line + ); + return; + }; + + let tx = { + let mut p = match pending.lock() { + Ok(g) => g, + Err(_) => return, + }; + p.remove(&id) + }; + + let Some(tx) = tx else { + // Late response for a request we already timed out on. + return; + }; + + let ok = frame.get("ok").and_then(|v| v.as_bool()).unwrap_or(false); + let result = if ok { + let value = frame.get("result").cloned().unwrap_or(Value::Null); + Ok(value) + } else { + let err = frame.get("error"); + let code = err + .and_then(|e| e.get("code")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + let message = err + .and_then(|e| e.get("message")) + .and_then(|v| v.as_str()) + .unwrap_or("no message provided"); + Err(format!("{}: {}", code, message)) + }; + let _ = tx.send(result); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_ready_frame_extracts_pid() { + let frame = json!({"event": "ready", "data": {"pid": 12345}}); + assert_eq!(parse_ready_frame(&frame).unwrap(), Some(12345)); + } + + #[test] + fn parse_ready_frame_rejects_wrong_event() { + let frame = json!({"event": "closed", "data": {}}); + let err = parse_ready_frame(&frame).unwrap_err(); + assert!(err.contains("expected first frame to be `ready`")); + } + + #[test] + fn parse_ready_frame_rejects_response_frame() { + let frame = json!({"id": 1, "ok": true, "result": {}}); + let err = parse_ready_frame(&frame).unwrap_err(); + assert!(err.contains("ready event")); + } + + #[test] + fn parse_ready_frame_tolerates_missing_pid() { + let frame = json!({"event": "ready", "data": {}}); + assert_eq!(parse_ready_frame(&frame).unwrap(), None); + } + + #[test] + fn dispatch_frame_routes_response_to_pending() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, _) = broadcast::channel::(8); + + let (tx, rx) = oneshot::channel(); + pending.lock().unwrap().insert(7, tx); + + dispatch_frame( + r#"{"id":7,"ok":true,"result":{"hello":"world"}}"#, + &pending, + &events_tx, + ); + let got = rx.blocking_recv().unwrap().unwrap(); + assert_eq!(got["hello"], json!("world")); + assert!(pending.lock().unwrap().is_empty()); + } + + #[test] + fn dispatch_frame_surfaces_error_code() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, _) = broadcast::channel::(8); + + let (tx, rx) = oneshot::channel(); + pending.lock().unwrap().insert(9, tx); + + dispatch_frame( + r#"{"id":9,"ok":false,"error":{"code":"launch-failed","message":"boom"}}"#, + &pending, + &events_tx, + ); + let err = rx.blocking_recv().unwrap().unwrap_err(); + assert_eq!(err, "launch-failed: boom"); + } + + #[test] + fn dispatch_frame_fans_events() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, mut events_rx) = broadcast::channel::(8); + + dispatch_frame( + r#"{"event":"page.console","data":{"level":"warn","text":"hi"}}"#, + &pending, + &events_tx, + ); + + let evt = events_rx.try_recv().unwrap(); + assert_eq!(evt.name, "page.console"); + assert_eq!(evt.data["text"], json!("hi")); + } + + #[test] + fn dispatch_frame_ignores_unknown_id() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, _) = broadcast::channel::(8); + + // Should not panic or crash. + dispatch_frame(r#"{"id":999,"ok":true,"result":{}}"#, &pending, &events_tx); + assert!(pending.lock().unwrap().is_empty()); + } +} diff --git a/cli/src/native/camoufox_embed/mod.rs b/cli/src/native/camoufox_embed/mod.rs new file mode 100644 index 000000000..09aded3f5 --- /dev/null +++ b/cli/src/native/camoufox_embed/mod.rs @@ -0,0 +1,171 @@ +//! Embedded Camoufox sidecar package. +//! +//! The full `camoufox_sidecar` Python package (multi-file, sibling imports) +//! is baked into the agent-browser binary via `include_dir!` so users who +//! install only the Rust binary still get a working sidecar to spawn. On +//! first launch we extract the tree into a version-keyed cache directory and +//! spawn `python3 /__main__.py` with `PYTHONPATH` pointed at the +//! extraction dir so sibling imports resolve. +//! +//! The extraction dir is keyed by the crate version so upgrades re-extract +//! deterministically. A `.extracted` sentinel marks a completed extraction; +//! subsequent launches observing the sentinel skip re-extraction so process +//! startup stays fast and the files' mtimes are stable. +//! +//! In E2B (and other environments where the sidecar is `pip install`'d) we +//! prefer `python3 -m camoufox_sidecar` and only fall back to the extracted +//! tree if the module import fails — handled in `camoufox_client.rs`. + +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +use include_dir::{include_dir, Dir}; + +/// Embedded Python package. Path is resolved at compile time by `include_dir!` +/// against `$CARGO_MANIFEST_DIR` (the `cli/` crate root). +static SIDECAR_PACKAGE: Dir<'_> = + include_dir!("$CARGO_MANIFEST_DIR/../packages/camoufox-sidecar/camoufox_sidecar"); + +/// Filename written inside the extracted tree once extraction has completed +/// successfully. Its presence is the signal that the tree is safe to use. +const EXTRACTED_SENTINEL: &str = ".extracted"; + +/// Root of the version-keyed extraction tree for this crate build. The +/// sidecar package itself lives in a `camoufox_sidecar/` subdirectory of +/// this root; callers point `PYTHONPATH` at the root and spawn +/// `python3 -m camoufox_sidecar`. +pub fn extraction_root() -> io::Result { + let base = dirs::cache_dir().ok_or_else(|| { + io::Error::new( + io::ErrorKind::NotFound, + "no user cache directory available (dirs::cache_dir returned None)", + ) + })?; + Ok(base.join(format!( + "agent-browser/camoufox-sidecar-{}", + env!("CARGO_PKG_VERSION") + ))) +} + +/// Path to the extracted `camoufox_sidecar` Python package directory. This +/// is `extraction_root()/camoufox_sidecar/` — the name must stay in sync +/// with the Python import name (hence the underscore rather than the dash +/// the outer directory uses for the crate version). +pub fn package_dir() -> io::Result { + Ok(extraction_root()?.join("camoufox_sidecar")) +} + +/// Ensure the embedded sidecar package is laid out on disk and return the +/// PYTHONPATH root (the directory that contains the `camoufox_sidecar` +/// package). If the sentinel file is already present we skip extraction so +/// mtimes stay stable (see the "running twice in a row" test scenario in +/// the Camoufox engine plan). +/// +/// Extraction is best-effort atomic: we extract into a staging directory +/// and rename into place, so a crash mid-extraction cannot leave a +/// half-populated tree that is then reused on the next launch. +pub fn ensure_extracted() -> io::Result { + let root = extraction_root()?; + if is_already_extracted(&root) { + return Ok(root); + } + + if let Some(parent) = root.parent() { + fs::create_dir_all(parent)?; + } + + let staging = staging_dir_for(&root); + let _ = fs::remove_dir_all(&staging); + fs::create_dir_all(&staging)?; + let package_in_staging = staging.join("camoufox_sidecar"); + fs::create_dir_all(&package_in_staging)?; + + SIDECAR_PACKAGE.extract(&package_in_staging)?; + fs::write( + staging.join(EXTRACTED_SENTINEL), + env!("CARGO_PKG_VERSION").as_bytes(), + )?; + + if root.exists() { + let _ = fs::remove_dir_all(&root); + } + fs::rename(&staging, &root)?; + + Ok(root) +} + +/// True if `path` already hosts a successfully-extracted package. We check +/// the sentinel specifically because `camoufox_sidecar/__main__.py` alone +/// could be the remnant of an interrupted extraction. +fn is_already_extracted(path: &Path) -> bool { + path.join(EXTRACTED_SENTINEL).is_file() + && path.join("camoufox_sidecar").join("__main__.py").is_file() +} + +fn staging_dir_for(target: &Path) -> PathBuf { + let mut staging = target.as_os_str().to_owned(); + staging.push(format!(".staging-{}", std::process::id())); + PathBuf::from(staging) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Mutex; + + // `std::env::set_var` is process-global. Serialise the tests that touch + // $XDG_CACHE_HOME so parallel cargo test runs don't race. + static ENV_LOCK: Mutex<()> = Mutex::new(()); + + fn with_temp_cache(f: F) { + let guard = ENV_LOCK.lock().unwrap(); + let tmp = tempfile::tempdir().unwrap(); + let prev_xdg = std::env::var_os("XDG_CACHE_HOME"); + let prev_home = std::env::var_os("HOME"); + std::env::set_var("XDG_CACHE_HOME", tmp.path()); + std::env::set_var("HOME", tmp.path()); + f(tmp.path()); + if let Some(v) = prev_xdg { + std::env::set_var("XDG_CACHE_HOME", v); + } else { + std::env::remove_var("XDG_CACHE_HOME"); + } + if let Some(v) = prev_home { + std::env::set_var("HOME", v); + } else { + std::env::remove_var("HOME"); + } + drop(guard); + } + + #[test] + fn extracts_all_expected_files() { + with_temp_cache(|_| { + let root = ensure_extracted().unwrap(); + let pkg = root.join("camoufox_sidecar"); + assert!(pkg.join("__main__.py").is_file()); + assert!(pkg.join("protocol.py").is_file()); + assert!(pkg.join("session.py").is_file()); + assert!(root.join(EXTRACTED_SENTINEL).is_file()); + }); + } + + #[test] + fn second_call_is_idempotent_and_preserves_mtime() { + with_temp_cache(|_| { + let dir = ensure_extracted().unwrap(); + let marker = dir.join(EXTRACTED_SENTINEL); + let first = fs::metadata(&marker).unwrap().modified().unwrap(); + + // Small sleep so a re-extract would show up as a newer mtime on + // filesystems with second-level precision. + std::thread::sleep(std::time::Duration::from_millis(1100)); + + let dir2 = ensure_extracted().unwrap(); + assert_eq!(dir, dir2); + let second = fs::metadata(&marker).unwrap().modified().unwrap(); + assert_eq!(first, second, "sentinel mtime should be unchanged"); + }); + } +} diff --git a/cli/src/native/cdp/camoufox.rs b/cli/src/native/cdp/camoufox.rs new file mode 100644 index 000000000..9f4bf6fff --- /dev/null +++ b/cli/src/native/cdp/camoufox.rs @@ -0,0 +1,590 @@ +//! Camoufox Python sidecar: process lifecycle + launch pipeline. +//! +//! The sidecar is a long-lived `python3` child that holds a +//! Playwright+Camoufox browser open and speaks the JSON-line protocol +//! documented in `packages/camoufox-sidecar/camoufox_sidecar/protocol.py`. +//! This module mirrors `lightpanda.rs` in shape (process ownership, bounded +//! log drainer, structured readiness error) and adds the Python-specific +//! dispatch logic: `python3 -m camoufox_sidecar` first, with a fallback to +//! `python3 /__main__.py` + `PYTHONPATH` when the package is +//! not pip-installed. +//! +//! `CamoufoxProcess` owns the `Child` (and kills it on drop). The +//! `CamoufoxClient` that rides on top is constructed inside +//! `launch_camoufox_sidecar` from the child's stdio and returned alongside. + +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; +use std::process::Stdio; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use serde_json::{json, Value}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::{Child, Command}; + +use crate::native::camoufox_client::CamoufoxClient; +use crate::native::camoufox_embed; + +const READY_TIMEOUT: Duration = Duration::from_secs(15); +const LAUNCH_TIMEOUT: Duration = Duration::from_secs(60); +const MAX_LOG_LINES: usize = 40; +const GRACEFUL_EXIT_WAIT: Duration = Duration::from_millis(500); + +#[cfg(unix)] +const SIGNAL_TERMINATE: i32 = libc::SIGTERM; +#[cfg(unix)] +const SIGNAL_FORCE_KILL: i32 = libc::SIGKILL; +#[cfg(not(unix))] +const SIGNAL_TERMINATE: i32 = 15; +#[cfg(not(unix))] +const SIGNAL_FORCE_KILL: i32 = 9; + +/// Send `signal` to the process group led by `pid`. Because the sidecar is +/// spawned with `setpgid(0, 0)`, its pid and pgid are the same and this +/// hits every descendant (Python → Camoufox → plugin-container helpers). +#[cfg(unix)] +fn send_signal_to_group(pid: u32, signal: i32) { + unsafe { + libc::killpg(pid as libc::pid_t, signal); + } +} + +#[cfg(not(unix))] +fn send_signal_to_group(_pid: u32, _signal: i32) { + // Windows path not yet supported. The sidecar is not expected to run on + // Windows in Unit 3 (E2B is Linux, dev is Linux/macOS); when Windows + // support is added, use `TerminateProcess` + job objects here. +} + +/// Owns the Python sidecar subprocess and its stderr log drainer. +/// +/// `CamoufoxClient` owns the stdio half of the relationship (writer + +/// demultiplexing reader). `CamoufoxProcess` owns the OS-level child: it is +/// responsible for killing the process on drop so a panicking daemon cannot +/// leak a Python+Firefox grandchild tree. +pub struct CamoufoxProcess { + child: Option, + /// PID reported by the sidecar's `ready` event. Mostly useful for + /// integration tests that assert the process tree is gone after close. + pub sidecar_pid: Option, + _stderr_drainer: Option>, + stderr_log: SharedLog, +} + +impl CamoufoxProcess { + /// Best-effort terminate. Sends SIGTERM to the sidecar's process group + /// so the entire descendant tree (Python → Camoufox → plugin-container + /// helpers) shuts down together. Call `wait_or_kill` afterwards if you + /// need the OS-level reap to complete before returning. + pub fn kill(&mut self) { + if let Some(pid) = self.child.as_ref().and_then(|c| c.id()) { + send_signal_to_group(pid, SIGNAL_TERMINATE); + } + } + + /// Graceful-then-forceful shutdown. Sends SIGTERM, waits up to + /// `timeout` for the sidecar (and its descendants) to exit, then + /// SIGKILLs the process group if anything is still alive. + /// + /// Purely synchronous — the caller is expected to invoke this from a + /// blocking context (e.g. `tokio::task::spawn_blocking`). We use raw + /// `libc::waitpid` rather than tokio's `Child::wait` because wiring + /// futures through a potentially-detached `tokio::spawn` was a + /// persistent source of racy teardown where the process wasn't + /// actually gone by the time `mgr.close()` returned. Synchronous + /// waitpid blocks on the kernel and returns deterministically. + pub fn wait_or_kill(&mut self, timeout: Duration) { + let Some(child) = self.child.take() else { + return; + }; + let Some(pid) = child.id() else { + // Already reaped; tokio may have taken the exit status. + return; + }; + // We own the Child here; dropping it at the end of this function + // is fine because we've already reaped the kernel-level process + // entry below. We don't need to hold it across the wait. + drop(child); + + send_signal_to_group(pid, SIGNAL_TERMINATE); + + #[cfg(unix)] + { + const POLL: Duration = Duration::from_millis(100); + let start = std::time::Instant::now(); + while start.elapsed() < timeout { + let mut status: libc::c_int = 0; + let ret = unsafe { libc::waitpid(pid as libc::pid_t, &mut status, libc::WNOHANG) }; + if ret == pid as libc::pid_t || ret == -1 { + return; + } + std::thread::sleep(POLL); + } + + send_signal_to_group(pid, SIGNAL_FORCE_KILL); + let mut status: libc::c_int = 0; + unsafe { libc::waitpid(pid as libc::pid_t, &mut status, 0) }; + } + } + + /// Snapshot of the last few stderr lines — used to build a detailed + /// error message when readiness times out or the child exits early. + pub fn snapshot_stderr(&self) -> Vec { + self.stderr_log + .lock() + .expect("stderr log poisoned") + .iter() + .cloned() + .collect() + } + + /// Non-blocking probe: has the sidecar subprocess exited? Also reaps + /// the zombie if so, matching Chrome/Lightpanda semantics. + pub fn has_exited(&mut self) -> bool { + let Some(child) = self.child.as_mut() else { + return true; + }; + matches!(child.try_wait(), Ok(Some(_))) + } +} + +impl Drop for CamoufoxProcess { + /// Synchronous cleanup path for the ungraceful case (daemon panic, the + /// `BrowserManager` being dropped without a `close()` call). Sends + /// SIGTERM to the sidecar's process group, waits briefly for its + /// asyncio cleanup + Playwright Firefox teardown to complete, and + /// escalates to SIGKILL if that times out. We use `libc::waitpid` + /// directly rather than `Child::wait` so this stays cheap in Drop — + /// spinning up a fresh tokio runtime from a destructor has historically + /// been a source of subtle deadlocks. + fn drop(&mut self) { + let Some(pid) = self.child.as_ref().and_then(|c| c.id()) else { + return; + }; + + send_signal_to_group(pid, SIGNAL_TERMINATE); + + #[cfg(unix)] + { + const DROP_GRACEFUL_WAIT: Duration = Duration::from_secs(3); + const DROP_POLL: Duration = Duration::from_millis(100); + + let start = std::time::Instant::now(); + let mut reaped = false; + while start.elapsed() < DROP_GRACEFUL_WAIT { + let mut status: libc::c_int = 0; + let ret = unsafe { libc::waitpid(pid as libc::pid_t, &mut status, libc::WNOHANG) }; + if ret == pid as libc::pid_t { + reaped = true; + break; + } + if ret == -1 { + // ECHILD = already reaped (e.g. by tokio), which is fine. + reaped = true; + break; + } + std::thread::sleep(DROP_POLL); + } + + if !reaped { + send_signal_to_group(pid, SIGNAL_FORCE_KILL); + let mut status: libc::c_int = 0; + unsafe { libc::waitpid(pid as libc::pid_t, &mut status, 0) }; + } + } + } +} + +type SharedLog = Arc>>; + +fn empty_log() -> SharedLog { + Arc::new(Mutex::new(VecDeque::with_capacity(MAX_LOG_LINES))) +} + +fn push_bounded(log: &SharedLog, line: String) { + let mut g = log.lock().expect("stderr log poisoned"); + if g.len() >= MAX_LOG_LINES { + g.pop_front(); + } + g.push_back(line); +} + +/// Validated Camoufox launch kwargs passed through to the sidecar `launch` +/// command. `args` contains exactly the object the sidecar will feed into +/// `AsyncCamoufox(**kwargs)`; the Python side re-validates against its own +/// allowlist so new options can be rolled out from the sidecar without a +/// Rust release. +#[derive(Debug, Default, Clone)] +pub struct CamoufoxLaunchOptions { + pub headless: bool, + pub executable_path: Option, + pub proxy: Option, + /// Extra allowed kwargs forwarded verbatim to the sidecar. Left open + /// (instead of strongly typed) because the sidecar already enforces the + /// allowlist; adding fields here just duplicates validation. + pub extra: serde_json::Map, +} + +impl CamoufoxLaunchOptions { + fn to_launch_args(&self) -> Value { + let mut args = serde_json::Map::new(); + args.insert("headless".to_string(), json!(self.headless)); + if let Some(path) = &self.executable_path { + args.insert("executable_path".to_string(), json!(path)); + } + if let Some(proxy) = &self.proxy { + args.insert("proxy".to_string(), proxy.clone()); + } + for (k, v) in &self.extra { + args.insert(k.clone(), v.clone()); + } + Value::Object(args) + } +} + +/// Launch the Python sidecar, wait for its `ready` event, then send the +/// `launch` command to bring up the Camoufox browser. Returns the owning +/// process handle paired with the client that the rest of the daemon drives. +/// +/// Failure cleans up the subprocess before returning; callers never receive +/// a `CamoufoxProcess` whose `ready` handshake did not complete. +pub async fn launch_camoufox_sidecar( + options: &CamoufoxLaunchOptions, +) -> Result<(CamoufoxProcess, Arc), String> { + let python = resolve_python_executable()?; + let extracted = camoufox_embed::ensure_extracted() + .map_err(|e| format!("Failed to extract embedded camoufox-sidecar: {}", e))?; + + let (mut child, dispatch) = spawn_sidecar(&python, &extracted).await?; + + let stdin = child + .stdin + .take() + .ok_or_else(|| "Failed to capture camoufox-sidecar stdin".to_string())?; + let stdout = child + .stdout + .take() + .ok_or_else(|| "Failed to capture camoufox-sidecar stdout".to_string())?; + let stderr = child + .stderr + .take() + .ok_or_else(|| "Failed to capture camoufox-sidecar stderr".to_string())?; + + let stderr_log = empty_log(); + let stderr_drainer = spawn_stderr_drainer(stderr, stderr_log.clone()); + + let (client, ready_pid) = match CamoufoxClient::start(stdin, stdout, READY_TIMEOUT).await { + Ok(c) => c, + Err(e) => { + let _ = child.start_kill(); + let _ = child.wait().await; + let stderr = snapshot(&stderr_log); + return Err(decorate_error( + format!("camoufox-sidecar failed readiness handshake: {}", e), + dispatch, + &stderr, + )); + } + }; + + let launch_args = options.to_launch_args(); + let launch_result = tokio::time::timeout(LAUNCH_TIMEOUT, client.call("launch", launch_args)) + .await + .map_err(|_| "Camoufox launch timed out after 60s".to_string()) + .and_then(|r| r); + + if let Err(err) = launch_result { + // Attempt a graceful close; if that fails, kill. + let _ = tokio::time::timeout(GRACEFUL_EXIT_WAIT, client.close()).await; + let _ = child.start_kill(); + let _ = child.wait().await; + let stderr = snapshot(&stderr_log); + return Err(decorate_error( + format!("Camoufox launch failed: {}", err), + dispatch, + &stderr, + )); + } + + Ok(( + CamoufoxProcess { + child: Some(child), + sidecar_pid: ready_pid, + _stderr_drainer: Some(stderr_drainer), + stderr_log, + }, + client, + )) +} + +/// Describes which invocation path the sidecar used. Retained only for the +/// error message — callers don't care beyond that. +#[derive(Debug, Clone)] +enum SidecarDispatch { + Module(String), + Script { script: PathBuf }, +} + +impl SidecarDispatch { + fn describe(&self) -> String { + match self { + SidecarDispatch::Module(m) => format!("python3 -m {}", m), + SidecarDispatch::Script { script } => { + format!("python3 {}", script.display()) + } + } + } +} + +/// Spawn the sidecar, trying `-m camoufox_sidecar` first (works when the +/// package is pip-installed, as in E2B) and falling back to the embedded +/// copy extracted to the user cache (works when only the Rust binary is +/// installed). +async fn spawn_sidecar( + python: &Path, + extracted: &Path, +) -> Result<(Child, SidecarDispatch), String> { + // Probe: can Python find `camoufox_sidecar` on its own? We do a cheap + // `-c "import camoufox_sidecar"` first so the fallback doesn't require + // swallowing a startup crash. + let probe_ok = tokio::time::timeout( + Duration::from_secs(5), + Command::new(python) + .args(["-c", "import camoufox_sidecar"]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(), + ) + .await + .ok() + .and_then(|r| r.ok()) + .map(|s| s.success()) + .unwrap_or(false); + + if probe_ok { + let child = build_command(python) + .args(["-m", "camoufox_sidecar"]) + .spawn() + .map_err(|e| format!("Failed to spawn `python3 -m camoufox_sidecar`: {}", e))?; + return Ok(( + child, + SidecarDispatch::Module("camoufox_sidecar".to_string()), + )); + } + + // Fallback: `extracted` is the PYTHONPATH root — it contains a + // `camoufox_sidecar/` package directory. We set PYTHONPATH and invoke + // `python3 -m camoufox_sidecar` so Python loads the module as a proper + // package (relative imports like `from .protocol import ...` resolve). + let package_init = extracted.join("camoufox_sidecar").join("__main__.py"); + if !package_init.is_file() { + return Err(format!( + "Embedded camoufox-sidecar is missing __main__.py at {}", + package_init.display() + )); + } + let pythonpath = prepend_pythonpath(extracted); + + let child = build_command(python) + .args(["-m", "camoufox_sidecar"]) + .env("PYTHONPATH", pythonpath) + .spawn() + .map_err(|e| { + format!( + "Failed to spawn fallback `python3 -m camoufox_sidecar` (PYTHONPATH={}): {}", + extracted.display(), + e + ) + })?; + Ok(( + child, + SidecarDispatch::Script { + script: package_init, + }, + )) +} + +fn build_command(python: &Path) -> Command { + let mut cmd = Command::new(python); + cmd.stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + // Python must flush stdout after every frame — otherwise the sidecar + // protocol deadlocks on buffered output. The sidecar itself calls + // `sys.stdout.flush()` but we set this too as belt-and-braces. + .env("PYTHONUNBUFFERED", "1"); + + // Make the sidecar the leader of its own process group so we can signal + // the entire descendant tree (Python → Camoufox → plugin-container + // helpers) with one kill. Without this, on macOS the Firefox + // grandchildren survive when we SIGKILL only the Python parent and + // leak across test runs. `kill_on_drop` is deliberately NOT set — it + // uses SIGKILL, which gives the sidecar no chance to run its asyncio + // cleanup (which is how Playwright closes Firefox cleanly). + #[cfg(unix)] + { + unsafe { + cmd.pre_exec(|| { + if libc::setpgid(0, 0) != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + } + cmd +} + +fn prepend_pythonpath(dir: &Path) -> std::ffi::OsString { + let existing = std::env::var_os("PYTHONPATH"); + let sep = if cfg!(windows) { ";" } else { ":" }; + let mut out = std::ffi::OsString::from(dir.as_os_str()); + if let Some(v) = existing { + if !v.is_empty() { + out.push(sep); + out.push(v); + } + } + out +} + +/// Discovery order per the plan: env var → `python3` on PATH → error. +fn resolve_python_executable() -> Result { + if let Ok(v) = std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") { + if !v.is_empty() { + let p = PathBuf::from(v); + if p.exists() { + return Ok(p); + } + return Err(format!( + "AGENT_BROWSER_CAMOUFOX_PYTHON points to a path that does not exist: {}", + p.display() + )); + } + } + + #[cfg(unix)] + { + for candidate in ["python3", "python"] { + if let Ok(output) = std::process::Command::new("which").arg(candidate).output() { + if output.status.success() { + let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if !path.is_empty() { + return Ok(PathBuf::from(path)); + } + } + } + } + } + #[cfg(windows)] + { + for candidate in ["python3", "python"] { + if let Ok(output) = std::process::Command::new("where").arg(candidate).output() { + if output.status.success() { + let path = String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .unwrap_or("") + .trim() + .to_string(); + if !path.is_empty() { + return Ok(PathBuf::from(path)); + } + } + } + } + } + + Err( + "Camoufox requires a Python 3 runtime with the `camoufox` package installed. \ + Set AGENT_BROWSER_CAMOUFOX_PYTHON to your python3 binary or install python3 on PATH. \ + See docs/engines/camoufox.md." + .to_string(), + ) +} + +fn spawn_stderr_drainer( + stderr: tokio::process::ChildStderr, + log: SharedLog, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let mut reader = BufReader::new(stderr).lines(); + while let Ok(Some(line)) = reader.next_line().await { + push_bounded(&log, line); + } + }) +} + +fn snapshot(log: &SharedLog) -> Vec { + log.lock() + .expect("stderr log poisoned") + .iter() + .cloned() + .collect() +} + +fn decorate_error(message: String, dispatch: SidecarDispatch, stderr: &[String]) -> String { + let mut out = format!("{}\n dispatch: {}", message, dispatch.describe()); + if !stderr.is_empty() { + out.push_str(&format!( + "\n sidecar stderr (last {} lines):\n {}", + stderr.len(), + stderr.join("\n ") + )); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn launch_options_marshals_headless() { + let opts = CamoufoxLaunchOptions { + headless: true, + executable_path: Some("/tmp/cf".into()), + proxy: None, + extra: serde_json::Map::new(), + }; + let args = opts.to_launch_args(); + assert_eq!(args["headless"], json!(true)); + assert_eq!(args["executable_path"], json!("/tmp/cf")); + } + + #[test] + fn launch_options_preserves_extra() { + let mut extra = serde_json::Map::new(); + extra.insert("humanize".into(), json!(true)); + let opts = CamoufoxLaunchOptions { + headless: false, + executable_path: None, + proxy: None, + extra, + }; + let args = opts.to_launch_args(); + assert_eq!(args["humanize"], json!(true)); + } + + #[test] + fn resolve_python_returns_env_var_when_set() { + let tmp = tempfile::NamedTempFile::new().unwrap(); + std::env::set_var("AGENT_BROWSER_CAMOUFOX_PYTHON", tmp.path()); + let got = resolve_python_executable().unwrap(); + assert_eq!(got, tmp.path()); + std::env::remove_var("AGENT_BROWSER_CAMOUFOX_PYTHON"); + } + + #[test] + fn resolve_python_rejects_missing_env_path() { + std::env::set_var( + "AGENT_BROWSER_CAMOUFOX_PYTHON", + "/nonexistent/python3-no-such-file", + ); + let err = resolve_python_executable().unwrap_err(); + assert!(err.contains("does not exist")); + std::env::remove_var("AGENT_BROWSER_CAMOUFOX_PYTHON"); + } +} diff --git a/cli/src/native/cdp/mod.rs b/cli/src/native/cdp/mod.rs index 2f097372e..98cf6391f 100644 --- a/cli/src/native/cdp/mod.rs +++ b/cli/src/native/cdp/mod.rs @@ -1,3 +1,4 @@ +pub mod camoufox; pub mod chrome; pub mod client; pub mod discovery; diff --git a/cli/src/native/cookies.rs b/cli/src/native/cookies.rs index b0fdbeda4..957fa43da 100644 --- a/cli/src/native/cookies.rs +++ b/cli/src/native/cookies.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] @@ -24,7 +24,11 @@ pub struct Cookie { pub same_site: Option, } -pub async fn get_all_cookies(client: &CdpClient, session_id: &str) -> Result, String> { +pub async fn get_all_cookies( + backend: &BrowserBackend, + session_id: &str, +) -> Result, String> { + let client = backend.require_cdp()?; let result = client .send_command_no_params("Network.getAllCookies", Some(session_id)) .await?; @@ -38,10 +42,11 @@ pub async fn get_all_cookies(client: &CdpClient, session_id: &str) -> Result>, ) -> Result, String> { + let client = backend.require_cdp()?; let params = match urls { Some(ref u) if !u.is_empty() => json!({ "urls": u }), _ => json!({}), @@ -60,11 +65,12 @@ pub async fn get_cookies( } pub async fn set_cookies( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, cookies: Vec, current_url: Option<&str>, ) -> Result<(), String> { + let client = backend.require_cdp()?; let cookies: Vec = cookies .into_iter() .map(|mut c| { @@ -92,7 +98,8 @@ pub async fn set_cookies( Ok(()) } -pub async fn clear_cookies(client: &CdpClient, session_id: &str) -> Result<(), String> { +pub async fn clear_cookies(backend: &BrowserBackend, session_id: &str) -> Result<(), String> { + let client = backend.require_cdp()?; client .send_command_no_params("Network.clearBrowserCookies", Some(session_id)) .await?; diff --git a/cli/src/native/e2e_tests.rs b/cli/src/native/e2e_tests.rs index 21b0d7a5b..76db6521d 100644 --- a/cli/src/native/e2e_tests.rs +++ b/cli/src/native/e2e_tests.rs @@ -4127,7 +4127,7 @@ async fn e2e_externally_opened_tab_detected() { // opens a tab while agent-browser is connected via --cdp. let browser = state.browser.as_ref().expect("browser should be launched"); let _: Value = browser - .client + .client() .send_command( "Target.createTarget", Some(json!({ "url": "data:text/html,

External Tab

" })), diff --git a/cli/src/native/element.rs b/cli/src/native/element.rs index 7b22ed9ad..6756bf321 100644 --- a/cli/src/native/element.rs +++ b/cli/src/native/element.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use serde_json::Value; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; use super::cdp::types::*; #[derive(Debug, Clone)] @@ -147,7 +147,7 @@ pub fn parse_ref(input: &str) -> Option { } pub async fn resolve_element_center( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -214,7 +214,7 @@ pub async fn resolve_element_center( } pub async fn resolve_element_object_id( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -338,7 +338,7 @@ fn resolve_frame_session<'a>( /// (Accessibility.getFullAXTree) that built the ref map during snapshot, /// so role/name matching is guaranteed to be consistent. async fn find_node_id_by_role_name( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, role: &str, name: &str, @@ -439,7 +439,7 @@ fn build_selector_js(selector: &str) -> String { } async fn resolve_by_selector( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, selector: &str, ) -> Result<(f64, f64), String> { @@ -479,7 +479,7 @@ fn box_model_center(model: &BoxModel) -> (f64, f64) { } pub async fn get_element_text( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -517,7 +517,7 @@ pub async fn get_element_text( } pub async fn get_element_attribute( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -554,7 +554,7 @@ pub async fn get_element_attribute( } pub async fn is_element_visible( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -599,7 +599,7 @@ pub async fn is_element_visible( } pub async fn is_element_enabled( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -636,7 +636,7 @@ pub async fn is_element_enabled( } pub async fn is_element_checked( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -706,7 +706,7 @@ pub async fn is_element_checked( } pub async fn get_element_inner_text( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -743,7 +743,7 @@ pub async fn get_element_inner_text( } pub async fn get_element_inner_html( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -780,7 +780,7 @@ pub async fn get_element_inner_html( } pub async fn get_element_input_value( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -819,7 +819,7 @@ pub async fn get_element_input_value( } pub async fn set_element_value( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -858,7 +858,7 @@ pub async fn set_element_value( } pub async fn get_element_bounding_box( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -898,7 +898,7 @@ pub async fn get_element_bounding_box( } pub async fn get_element_count( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, selector: &str, ) -> Result { @@ -920,7 +920,7 @@ pub async fn get_element_count( } pub async fn get_element_styles( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, diff --git a/cli/src/native/interaction.rs b/cli/src/native/interaction.rs index 0c51ec7d0..dd1dc94e4 100644 --- a/cli/src/native/interaction.rs +++ b/cli/src/native/interaction.rs @@ -2,12 +2,12 @@ use std::collections::HashMap; use serde_json::Value; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; use super::cdp::types::*; use super::element::{resolve_element_center, resolve_element_object_id, RefMap}; pub async fn click( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -27,7 +27,7 @@ pub async fn click( } pub async fn dblclick( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -46,7 +46,7 @@ pub async fn dblclick( } pub async fn hover( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -81,7 +81,7 @@ pub async fn hover( } pub async fn fill( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -148,7 +148,7 @@ pub async fn fill( #[allow(clippy::too_many_arguments)] pub async fn type_text( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -206,7 +206,7 @@ pub async fn type_text( } pub async fn type_text_into_active_context( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, text: &str, delay_ms: Option, @@ -273,7 +273,7 @@ pub async fn type_text_into_active_context( Ok(()) } -pub async fn press_key(client: &CdpClient, session_id: &str, key: &str) -> Result<(), String> { +pub async fn press_key(client: &BrowserBackend, session_id: &str, key: &str) -> Result<(), String> { press_key_with_modifiers(client, session_id, key, None).await } @@ -285,7 +285,7 @@ pub async fn press_key(client: &CdpClient, session_id: &str, key: &str) -> Resul /// Callers that need a platform-appropriate modifier (e.g. Cmd on macOS, /// Ctrl elsewhere) must choose the value themselves -- see `cfg!(target_os)`. pub async fn press_key_with_modifiers( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, key: &str, modifiers: Option, @@ -339,7 +339,7 @@ pub async fn press_key_with_modifiers( } pub async fn scroll( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: Option<&str>, @@ -391,7 +391,7 @@ pub async fn scroll( } pub async fn select_option( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -437,7 +437,7 @@ pub async fn select_option( } pub async fn check( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -489,7 +489,7 @@ pub async fn check( } pub async fn uncheck( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -548,7 +548,7 @@ pub async fn uncheck( /// 3. If the element has a nested `` → `.click()` that input. /// 4. Otherwise → `.click()` the element itself (handles ARIA role controls). async fn js_click_checkbox( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -605,7 +605,7 @@ async fn js_click_checkbox( } pub async fn focus( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -638,7 +638,7 @@ pub async fn focus( } pub async fn clear( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -677,7 +677,7 @@ pub async fn clear( } pub async fn select_all( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -722,7 +722,7 @@ pub async fn select_all( } pub async fn scroll_into_view( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -757,7 +757,7 @@ pub async fn scroll_into_view( } pub async fn dispatch_event( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -802,7 +802,7 @@ pub async fn dispatch_event( } pub async fn highlight( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -844,7 +844,7 @@ pub async fn highlight( } pub async fn tap_touch( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -885,7 +885,7 @@ pub async fn tap_touch( } async fn dispatch_click( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, x: f64, y: f64, diff --git a/cli/src/native/mod.rs b/cli/src/native/mod.rs index 5979da0d0..202edab79 100644 --- a/cli/src/native/mod.rs +++ b/cli/src/native/mod.rs @@ -3,8 +3,14 @@ pub mod actions; #[allow(dead_code)] pub mod auth; #[allow(dead_code)] +pub mod backend; +#[allow(dead_code)] pub mod browser; #[allow(dead_code)] +pub mod camoufox_client; +#[allow(dead_code)] +pub mod camoufox_embed; +#[allow(dead_code)] pub mod cdp; #[allow(dead_code)] pub mod cookies; diff --git a/cli/src/native/network.rs b/cli/src/native/network.rs index 39c69df38..47a368924 100644 --- a/cli/src/native/network.rs +++ b/cli/src/native/network.rs @@ -1,13 +1,14 @@ use serde_json::{json, Value}; use std::collections::HashMap; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; pub async fn set_extra_headers( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, headers: &HashMap, ) -> Result<(), String> { + let client = backend.require_cdp()?; let headers_value: Value = headers .iter() .map(|(k, v)| (k.clone(), Value::String(v.clone()))) @@ -26,10 +27,11 @@ pub async fn set_extra_headers( } pub async fn set_offline( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, offline: bool, ) -> Result<(), String> { + let client = backend.require_cdp()?; client .send_command( "Network.emulateNetworkConditions", @@ -45,7 +47,12 @@ pub async fn set_offline( Ok(()) } -pub async fn set_content(client: &CdpClient, session_id: &str, html: &str) -> Result<(), String> { +pub async fn set_content( + backend: &BrowserBackend, + session_id: &str, + html: &str, +) -> Result<(), String> { + let client = backend.require_cdp()?; // Get current frame ID let tree_result = client .send_command_no_params("Page.getFrameTree", Some(session_id)) @@ -134,10 +141,13 @@ fn parse_domain_list(input: &str) -> Vec { } pub async fn sanitize_existing_pages( - client: &CdpClient, + backend: &BrowserBackend, pages: &[super::browser::PageInfo], filter: &DomainFilter, ) { + let Ok(client) = backend.require_cdp() else { + return; + }; for page in pages { if page.url.is_empty() || page.url == "about:blank" { continue; @@ -159,10 +169,11 @@ pub async fn sanitize_existing_pages( } pub async fn install_domain_filter_script( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, allowed_domains: &[String], ) -> Result<(), String> { + let client = backend.require_cdp()?; if allowed_domains.is_empty() { return Ok(()); } @@ -231,10 +242,11 @@ pub async fn install_domain_filter_script( /// The actual handling of `Fetch.requestPaused` events happens in /// `resolve_fetch_paused` in the actions module. pub async fn install_domain_filter_fetch( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, handle_auth_requests: bool, ) -> Result<(), String> { + let client = backend.require_cdp()?; let mut params = json!({ "patterns": [{ "urlPattern": "*" }] }); @@ -251,13 +263,13 @@ pub async fn install_domain_filter_fetch( /// 1. JS patching (WebSocket, EventSource, sendBeacon) /// 2. Fetch-based network interception pub async fn install_domain_filter( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, allowed_domains: &[String], handle_auth_requests: bool, ) -> Result<(), String> { - install_domain_filter_script(client, session_id, allowed_domains).await?; - install_domain_filter_fetch(client, session_id, handle_auth_requests).await?; + install_domain_filter_script(backend, session_id, allowed_domains).await?; + install_domain_filter_fetch(backend, session_id, handle_auth_requests).await?; Ok(()) } diff --git a/cli/src/native/screenshot.rs b/cli/src/native/screenshot.rs index 0736691f3..2fa8e241e 100644 --- a/cli/src/native/screenshot.rs +++ b/cli/src/native/screenshot.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use std::collections::HashMap; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::*; use super::element::RefMap; @@ -98,16 +99,17 @@ impl Serialize for ScreenshotAnnotation { /// Captures a screenshot via CDP and optionally overlays numbered annotations /// that mirror the Node.js screenshot `annotate` mode. pub async fn take_screenshot( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, options: &ScreenshotOptions, iframe_sessions: &HashMap, ) -> Result { + let client = backend.require_cdp()?; let target_rect = if options.annotate { match options.selector.as_deref() { Some(selector) => { - get_rect_for_selector(client, session_id, ref_map, selector, iframe_sessions) + get_rect_for_selector(backend, session_id, ref_map, selector, iframe_sessions) .await? } None => None, @@ -117,7 +119,7 @@ pub async fn take_screenshot( }; let raw_annotations = if options.annotate { - collect_annotations(client, session_id, ref_map).await? + collect_annotations(backend, session_id, ref_map).await? } else { Vec::new() }; @@ -131,7 +133,7 @@ pub async fn take_screenshot( }; let base64 = - capture_screenshot_base64(client, session_id, ref_map, options, iframe_sessions).await; + capture_screenshot_base64(backend, session_id, ref_map, options, iframe_sessions).await; if overlay_injected { let _ = remove_annotation_overlay(client, session_id).await; @@ -169,12 +171,13 @@ pub async fn take_screenshot( } async fn capture_screenshot_base64( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, options: &ScreenshotOptions, iframe_sessions: &HashMap, ) -> Result { + let client = backend.require_cdp()?; let mut params = CaptureScreenshotParams { format: Some(options.format.clone()), quality: if options.format == "jpeg" { @@ -209,7 +212,7 @@ async fn capture_screenshot_base64( } } else if let Some(ref selector) = options.selector { if let Some(rect) = - get_rect_for_selector(client, session_id, ref_map, selector, iframe_sessions).await? + get_rect_for_selector(backend, session_id, ref_map, selector, iframe_sessions).await? { params.clip = Some(Viewport { x: rect.x, @@ -229,10 +232,11 @@ async fn capture_screenshot_base64( } async fn collect_annotations( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, ) -> Result, String> { + let client = backend.require_cdp()?; let entries = ref_map.entries_sorted(); if entries.is_empty() { return Ok(Vec::new()); @@ -322,20 +326,21 @@ async fn collect_annotations( } async fn get_rect_for_selector( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector: &str, iframe_sessions: &HashMap, ) -> Result, String> { let (object_id, effective_session_id) = super::element::resolve_element_object_id( - client, + backend, session_id, ref_map, selector, iframe_sessions, ) .await?; + let client = backend.require_cdp()?; get_rect_for_object(client, &effective_session_id, &object_id).await } diff --git a/cli/src/native/snapshot.rs b/cli/src/native/snapshot.rs index 8aec758e9..84c41978c 100644 --- a/cli/src/native/snapshot.rs +++ b/cli/src/native/snapshot.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use serde_json::Value; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::{ AXNode, AXProperty, AXValue, EvaluateParams, EvaluateResult, GetFullAXTreeResult, @@ -214,13 +215,14 @@ impl RoleNameTracker { } pub async fn take_snapshot( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, options: &SnapshotOptions, ref_map: &mut RefMap, frame_id: Option<&str>, iframe_sessions: &HashMap, ) -> Result { + let client = backend.require_cdp()?; client .send_command_no_params("DOM.enable", Some(session_id)) .await?; @@ -505,7 +507,7 @@ pub async fn take_snapshot( // Snapshot the child frame; errors are silently ignored // (e.g. cross-origin iframes) if let Ok(child_text) = Box::pin(take_snapshot( - client, + backend, session_id, options, ref_map, diff --git a/cli/src/native/state.rs b/cli/src/native/state.rs index 882764b9a..bf4645ad0 100644 --- a/cli/src/native/state.rs +++ b/cli/src/native/state.rs @@ -7,6 +7,7 @@ use std::collections::HashSet; use std::fs; use std::path::PathBuf; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::{ AttachToTargetParams, AttachToTargetResult, CloseTargetParams, CreateTargetParams, @@ -245,14 +246,15 @@ async fn collect_storage_in_target( } pub async fn save_state( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, path: Option<&str>, session_name: Option<&str>, session_id_str: &str, visited_origins: &HashSet, ) -> Result { - let cookies = cookies::get_all_cookies(client, session_id).await?; + let client = backend.require_cdp()?; + let cookies = cookies::get_all_cookies(backend, session_id).await?; let origin_js = r#"(() => { const result = { origin: location.origin, localStorage: [], sessionStorage: [] }; @@ -333,7 +335,12 @@ pub async fn save_state( Ok(save_path) } -pub async fn load_state(client: &CdpClient, session_id: &str, path: &str) -> Result<(), String> { +pub async fn load_state( + backend: &BrowserBackend, + session_id: &str, + path: &str, +) -> Result<(), String> { + let client = backend.require_cdp()?; let json_str = if path.ends_with(".enc") { let key = std::env::var("AGENT_BROWSER_ENCRYPTION_KEY").map_err(|_| { "Encrypted state file requires AGENT_BROWSER_ENCRYPTION_KEY".to_string() @@ -373,7 +380,7 @@ pub async fn load_state(client: &CdpClient, session_id: &str, path: &str) -> Res .iter() .map(|c| serde_json::to_value(c).unwrap_or(Value::Null)) .collect(); - cookies::set_cookies(client, session_id, cookie_values, None).await?; + cookies::set_cookies(backend, session_id, cookie_values, None).await?; } // Load storage per origin diff --git a/cli/src/native/storage.rs b/cli/src/native/storage.rs index 5b6ffe5cc..f70fbbb87 100644 --- a/cli/src/native/storage.rs +++ b/cli/src/native/storage.rs @@ -1,14 +1,16 @@ use serde_json::{json, Value}; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::EvaluateParams; pub async fn storage_get( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, storage_type: &str, key: Option<&str>, ) -> Result { + let client = backend.require_cdp()?; let st = storage_js_name(storage_type); if let Some(k) = key { @@ -38,12 +40,13 @@ pub async fn storage_get( } pub async fn storage_set( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, storage_type: &str, key: &str, value: &str, ) -> Result<(), String> { + let client = backend.require_cdp()?; let st = storage_js_name(storage_type); let js = format!( "{}.setItem({}, {})", @@ -56,10 +59,11 @@ pub async fn storage_set( } pub async fn storage_clear( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, storage_type: &str, ) -> Result<(), String> { + let client = backend.require_cdp()?; let st = storage_js_name(storage_type); let js = format!("{}.clear()", st); eval_simple(client, session_id, &js).await?; @@ -73,6 +77,9 @@ fn storage_js_name(storage_type: &str) -> &str { } } +/// Internal helper: only reachable once the caller has already extracted a +/// CDP client from `BrowserBackend::Cdp`, so the raw `&CdpClient` is an +/// enum-arm body, not a user-facing interface. async fn eval_simple(client: &CdpClient, session_id: &str, js: &str) -> Result { let result: super::cdp::types::EvaluateResult = client .send_command_typed( diff --git a/cli/src/native/stream/cdp_loop.rs b/cli/src/native/stream/cdp_loop.rs index e3ce56566..166942a87 100644 --- a/cli/src/native/stream/cdp_loop.rs +++ b/cli/src/native/stream/cdp_loop.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use tokio::sync::{broadcast, watch, Mutex, RwLock}; +use crate::native::backend::BrowserBackend; use crate::native::cdp::client::CdpClient; use crate::native::network; @@ -278,14 +279,19 @@ pub(super) async fn cdp_event_loop( } } +/// Chrome-only entry point — asserts the backend is `BrowserBackend::Cdp` +/// and surfaces `engine-incompatible` on Camoufox, which does not speak raw +/// CDP screencast. Internal helpers below still take `&CdpClient` directly +/// because they are only reachable through this assertion. pub async fn start_screencast( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, format: &str, quality: i32, max_width: i32, max_height: i32, ) -> Result<(), String> { + let client = backend.require_cdp_for("screencast")?; client .send_command( "Page.startScreencast", @@ -302,7 +308,8 @@ pub async fn start_screencast( Ok(()) } -pub async fn stop_screencast(client: &CdpClient, session_id: &str) -> Result<(), String> { +pub async fn stop_screencast(backend: &BrowserBackend, session_id: &str) -> Result<(), String> { + let client = backend.require_cdp_for("screencast")?; client .send_command_no_params("Page.stopScreencast", Some(session_id)) .await?; @@ -310,10 +317,11 @@ pub async fn stop_screencast(client: &CdpClient, session_id: &str) -> Result<(), } pub async fn ack_screencast_frame( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, screencast_session_id: i64, ) -> Result<(), String> { + let client = backend.require_cdp_for("screencast")?; client .send_command( "Page.screencastFrameAck", diff --git a/cli/src/native/stream/chat.rs b/cli/src/native/stream/chat.rs index 884f22a8d..5336c16d5 100644 --- a/cli/src/native/stream/chat.rs +++ b/cli/src/native/stream/chat.rs @@ -145,7 +145,7 @@ RULES: - Keep responses concise. - For screenshots, omit the path argument so they save to the default location (which will be displayed inline). Screenshots from tool calls are ALREADY shown to the user. Do NOT re-display them with markdown image syntax in your text response. Never use `![...]()` to reference screenshots. - To create a new session: add `--session ` to any command (e.g. `agent-browser --session my-session open https://example.com`). If the session does not exist, it will be created automatically. -- To use a different browser engine: add `--engine ` (e.g. `agent-browser --session lp-session --engine lightpanda open https://example.com`). Supported engines: chrome (default), lightpanda. +- To use a different browser engine: add `--engine ` (e.g. `agent-browser --session lp-session --engine lightpanda open https://example.com`). Supported engines: chrome (default), lightpanda, camoufox. The following skill references describe agent-browser capabilities in detail. Use them when deciding which commands to run and how to approach tasks. {sections}"#, diff --git a/cli/src/native/tracing.rs b/cli/src/native/tracing.rs index 6737c7801..f32f12749 100644 --- a/cli/src/native/tracing.rs +++ b/cli/src/native/tracing.rs @@ -1,6 +1,7 @@ use serde_json::{json, Value}; use std::path::PathBuf; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; const MAX_PROFILE_EVENTS: usize = 5_000_000; @@ -40,10 +41,11 @@ impl TracingState { } pub async fn trace_start( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, ) -> Result { + let client = backend.require_cdp()?; if tracing_state.active { return Err("Tracing already active".to_string()); } @@ -69,11 +71,12 @@ pub async fn trace_start( } pub async fn trace_stop( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, path: Option<&str>, ) -> Result { + let client = backend.require_cdp()?; if !tracing_state.active { return Err("No tracing in progress".to_string()); } @@ -181,11 +184,12 @@ pub async fn trace_stop( } pub async fn profiler_start( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, categories: Option>, ) -> Result { + let client = backend.require_cdp()?; if tracing_state.active { return Err("Profiling/tracing already active".to_string()); } @@ -219,11 +223,12 @@ pub async fn profiler_start( } pub async fn profiler_stop( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, path: Option<&str>, ) -> Result { + let client = backend.require_cdp()?; if !tracing_state.active { return Err("No profiling in progress".to_string()); } diff --git a/cli/src/output.rs b/cli/src/output.rs index 743638d85..db5f02d6f 100644 --- a/cli/src/output.rs +++ b/cli/src/output.rs @@ -3069,7 +3069,7 @@ Options: --action-policy Action policy JSON file (or AGENT_BROWSER_ACTION_POLICY) --confirm-actions Categories requiring confirmation (or AGENT_BROWSER_CONFIRM_ACTIONS) --confirm-interactive Interactive confirmation prompts; auto-denies if stdin is not a TTY (or AGENT_BROWSER_CONFIRM_INTERACTIVE) - --engine Browser engine: chrome (default), lightpanda (or AGENT_BROWSER_ENGINE) + --engine Browser engine: chrome (default), lightpanda, camoufox (or AGENT_BROWSER_ENGINE) --no-auto-dialog Disable automatic dismissal of alert/beforeunload dialogs (or AGENT_BROWSER_NO_AUTO_DIALOG) --stealth Mask common bot-detection signals (webdriver, chrome.runtime, plugins, languages, WebGL) (or AGENT_BROWSER_STEALTH) @@ -3133,7 +3133,7 @@ Environment: AGENT_BROWSER_CONFIRM_ACTIONS Action categories requiring confirmation AGENT_BROWSER_CONFIRM_INTERACTIVE Enable interactive confirmation prompts AGENT_BROWSER_NO_AUTO_DIALOG Disable automatic dismissal of alert/beforeunload dialogs - AGENT_BROWSER_ENGINE Browser engine: chrome (default), lightpanda + AGENT_BROWSER_ENGINE Browser engine: chrome (default), lightpanda, camoufox HTTP_PROXY / HTTPS_PROXY Standard proxy env vars (fallback if AGENT_BROWSER_PROXY not set) ALL_PROXY SOCKS proxy (fallback for proxy) NO_PROXY Bypass proxy for hosts (fallback for proxy-bypass) diff --git a/cli/tests/backend_refactor_smoke.rs b/cli/tests/backend_refactor_smoke.rs new file mode 100644 index 000000000..a09ac8267 --- /dev/null +++ b/cli/tests/backend_refactor_smoke.rs @@ -0,0 +1,149 @@ +//! Smoke + characterization tests for the `BrowserBackend` refactor (Units 1 +//! and 3 of the Camoufox engine plan). +//! +//! These tests cover two things the refactor must guarantee: +//! +//! 1. `agent-browser --engine camoufox open ` does **not** panic. It +//! must exit cleanly with a structured JSON error whose message +//! mentions Camoufox — either the Unit 1 `not-yet-implemented` stub or +//! the Unit 3 launch-failure message (Python missing / sidecar failed +//! readiness) depending on how much of the plan has landed. +//! +//! 2. Unknown engines are rejected with a message that enumerates +//! `chrome, lightpanda, camoufox` — proves the launch dispatch table +//! has the new arm wired up. +//! +//! Both tests spawn the real CLI binary (no Chrome required) so they run in +//! CI without infrastructure. Chrome + Lightpanda happy-path parity is covered +//! by the existing `#[ignore]`d integration suite in `cli/src/native/e2e_tests.rs` +//! which we ran manually against this refactor to produce the characterization +//! baseline — the invariant those tests enforce (execute_command returns the +//! same response shape before/after Unit 1) is what this smoke file locks in +//! cheaply. + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +#[test] +fn camoufox_engine_returns_structured_error_without_panic() { + let tmp = TempDir::new().unwrap(); + + // Point the sidecar at a non-existent python so Unit 3's launch path + // exits cleanly on environments that don't have Camoufox installed — + // CI will otherwise spend minutes in the Python probe. + let output = build_cmd( + &tmp, + &[ + "--engine", + "camoufox", + "--json", + "open", + "https://example.com", + ], + ) + .env( + "AGENT_BROWSER_CAMOUFOX_PYTHON", + "/definitely/not/a/real/python3", + ) + .output() + .expect("failed to invoke agent-browser"); + + // The command must not panic. A panic surfaces as signal-death (exit code + // 101 for explicit panics, 134/137/139 for signals, or None on Unix signal + // termination). A non-zero but structured exit is fine. + assert!( + !matches!(output.status.code(), Some(101)), + "--engine camoufox open panicked (exit 101)\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + assert!( + output.status.code().is_some(), + "--engine camoufox open died from a signal (no exit code)\nstderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); + + // JSON output must parse and carry a failure payload. + let payload: serde_json::Value = serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("stdout was not JSON: {}\n---\n{}", e, stdout)); + + assert_eq!( + payload.get("success").and_then(|v| v.as_bool()), + Some(false), + "expected success:false for camoufox launch failure, got payload:\n{}", + stdout + ); + + let error = payload + .get("error") + .and_then(|v| v.as_str()) + .expect("payload must contain an error string"); + // Accept either the Unit 1 stub shape or the Unit 3 "python missing" + // shape; both are characterised by a mention of camoufox or the python + // env var we set above. + assert!( + error.to_lowercase().contains("camoufox") + || error.contains("AGENT_BROWSER_CAMOUFOX_PYTHON"), + "error message did not mention camoufox/python: {:?}", + error + ); +} + +#[test] +fn unknown_engine_lists_camoufox_in_supported_engines() { + let tmp = TempDir::new().unwrap(); + + let output = build_cmd( + &tmp, + &[ + "--engine", + "nonsense", + "--json", + "open", + "https://example.com", + ], + ) + .output() + .expect("failed to invoke agent-browser"); + + let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); + + // Either the flag layer rejects it or the launch layer does; both should + // surface a user-visible message that enumerates the valid engines, + // including `camoufox` now that Unit 1 has wired it in. + let payload: serde_json::Value = serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("stdout was not JSON: {}\n---\n{}", e, stdout)); + let error = payload + .get("error") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + + assert!( + error.contains("camoufox"), + "unknown-engine error should enumerate `camoufox` among supported engines, got: {:?}", + error + ); +} diff --git a/cli/tests/camoufox_launch.rs b/cli/tests/camoufox_launch.rs new file mode 100644 index 000000000..66470480f --- /dev/null +++ b/cli/tests/camoufox_launch.rs @@ -0,0 +1,313 @@ +//! Camoufox engine integration tests (Unit 3 of the engine plan). +//! +//! Feature-gated: requires `--features camoufox-integration` to run, since +//! they spawn a real Python sidecar + Camoufox browser. On a development +//! machine set `AGENT_BROWSER_CAMOUFOX_PYTHON` to the venv under +//! `packages/camoufox-sidecar/.venv/bin/python3` so the tests don't depend +//! on the system Python. +//! +//! The non-gated tests in this file (error/validation paths that don't need +//! Camoufox installed) always run so regressions in Rust-side wiring surface +//! in CI. + +#![cfg_attr( + not(feature = "camoufox-integration"), + allow(dead_code, unused_imports) +)] + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +/// These tests run unconditionally — they exercise error paths that don't +/// depend on Camoufox being installed, so they catch plumbing regressions +/// without the integration harness. +mod rust_only { + use super::*; + + /// `--engine camoufox --extension foo.crx` must be rejected by + /// `validate_camoufox_options` with a clear message. This is the + /// "Error path" R4-parity test from the plan. + #[test] + fn rejects_extensions_with_camoufox() { + let tmp = TempDir::new().unwrap(); + let output = build_cmd( + &tmp, + &[ + "--engine", + "camoufox", + "--extension", + "/nonexistent/ext", + "--json", + "open", + "https://example.com", + ], + ) + // Pointing at a missing python short-circuits the launch path on + // test environments that don't have Camoufox installed, so the + // error comes from `validate_camoufox_options` rather than the + // sidecar spawn probe. + .env( + "AGENT_BROWSER_CAMOUFOX_PYTHON", + "/definitely/not/a/real/python3", + ) + .output() + .expect("invoke agent-browser"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("Extensions are not supported with Camoufox"), + "expected extensions-rejection error message, got: {}", + stdout + ); + } + + /// `AGENT_BROWSER_CAMOUFOX_PYTHON=/nonexistent` must surface an + /// actionable error and not partially start any process. + #[test] + fn missing_python_surfaces_actionable_error() { + let tmp = TempDir::new().unwrap(); + let output = build_cmd( + &tmp, + &[ + "--engine", + "camoufox", + "--json", + "open", + "https://example.com", + ], + ) + .env("AGENT_BROWSER_CAMOUFOX_PYTHON", "/nonexistent/python3-xyz") + .output() + .expect("invoke agent-browser"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("does not exist") || stdout.contains("AGENT_BROWSER_CAMOUFOX_PYTHON"), + "expected python-not-found error, got: {}", + stdout + ); + // Must return a structured error (non-panic, non-signal exit). + assert_ne!(output.status.code(), Some(101), "should not panic"); + } +} + +// ----------------------------------------------------------------------------- +// Feature-gated integration tests. These require a real Camoufox install. +// ----------------------------------------------------------------------------- + +#[cfg(feature = "camoufox-integration")] +mod integration { + use super::*; + use std::sync::Mutex; + use std::thread::sleep; + use std::time::Duration; + + /// Integration tests share the Camoufox browser binary cache and can + /// each leak stray sidecar / Firefox processes if they run concurrently. + /// Cargo's default parallel runner would also make "no process leaked" + /// assertions non-deterministic because each test's ps snapshot would + /// see other tests' in-flight sidecars. Serialise the whole integration + /// suite behind this mutex so each test sees a clean slate. + static INTEGRATION_LOCK: Mutex<()> = Mutex::new(()); + + fn acquire() -> std::sync::MutexGuard<'static, ()> { + match INTEGRATION_LOCK.lock() { + Ok(g) => g, + Err(poisoned) => poisoned.into_inner(), + } + } + + fn fixture_python() -> Option { + // Prefer the package's dev venv if it exists — faster than spinning up + // a new environment per run. + let crate_root = env!("CARGO_MANIFEST_DIR"); + let repo_root = std::path::Path::new(crate_root).parent()?; + let venv_python = repo_root.join("packages/camoufox-sidecar/.venv/bin/python3"); + if venv_python.is_file() { + return Some(venv_python); + } + std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") + .ok() + .map(std::path::PathBuf::from) + } + + fn cmd_with_python(tmp: &TempDir, args: &[&str]) -> Command { + let mut cmd = build_cmd(tmp, args); + if let Some(py) = fixture_python() { + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", py); + } + cmd + } + + /// Happy path: open + close completes, and the child Python/Firefox + /// processes belonging to our daemon are gone afterwards. + #[test] + fn open_and_close_cleans_up_children() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + + let open = cmd_with_python( + &tmp, + &[ + "--engine", + "camoufox", + "--session", + "ce_open", + "--json", + "open", + "https://example.com", + ], + ) + .output() + .expect("open"); + assert!( + open.status.success(), + "open failed: stdout={} stderr={}", + String::from_utf8_lossy(&open.stdout), + String::from_utf8_lossy(&open.stderr) + ); + let out = String::from_utf8_lossy(&open.stdout); + assert!( + out.contains("\"success\":true") || out.contains("\"success\": true"), + "open output did not indicate success: {}", + out + ); + + let close = cmd_with_python(&tmp, &["--session", "ce_open", "close"]) + .output() + .expect("close"); + assert!(close.status.success(), "close failed"); + + // Give the OS a moment to reap the grandchildren. + sleep(Duration::from_secs(2)); + let daemon_pids = pgrep_contains("agent-browser --daemon"); + assert!( + daemon_pids.is_empty(), + "daemon process survived close: {:?}", + daemon_pids + ); + let sidecar_pids = pgrep_contains("camoufox_sidecar"); + assert!( + sidecar_pids.is_empty(), + "camoufox_sidecar process survived close: {:?}", + sidecar_pids + ); + } + + /// Loop smoke test from the plan: open → close → reopen 10× with no + /// process leak between iterations. + #[test] + fn loop_smoke_no_process_leaks() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + + for iteration in 0..10 { + let open = cmd_with_python( + &tmp, + &[ + "--engine", + "camoufox", + "--session", + "ce_loop", + "--json", + "open", + "about:blank", + ], + ) + .output() + .unwrap_or_else(|e| panic!("iter {}: open failed: {}", iteration, e)); + assert!( + open.status.success(), + "iter {}: open non-zero: {}", + iteration, + String::from_utf8_lossy(&open.stdout) + ); + + let close = cmd_with_python(&tmp, &["--session", "ce_loop", "close"]) + .output() + .unwrap_or_else(|e| panic!("iter {}: close failed: {}", iteration, e)); + assert!(close.status.success(), "iter {}: close non-zero", iteration); + + sleep(Duration::from_secs(2)); + let sidecar_pids = pgrep_contains("camoufox_sidecar"); + assert!( + sidecar_pids.is_empty(), + "iter {}: camoufox_sidecar survived close: {:?}", + iteration, + sidecar_pids + ); + } + } + + /// `--stealth --engine camoufox` should still succeed. The warning + /// itself is emitted from the daemon process (not the CLI client), so + /// asserting on its text would require parsing the daemon debug log — + /// we leave the warning's string contents locked in by the unit tests + /// on `initialize_camoufox_manager` and limit this integration check + /// to the observable outcome: the combination does not fail. + #[test] + fn stealth_plus_camoufox_still_succeeds() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let out = cmd_with_python( + &tmp, + &[ + "--engine", + "camoufox", + "--stealth", + "--session", + "ce_stealth", + "--json", + "open", + "about:blank", + ], + ) + .output() + .expect("open"); + assert!( + out.status.success(), + "open with --stealth failed: {}", + String::from_utf8_lossy(&out.stdout) + ); + let _ = cmd_with_python(&tmp, &["--session", "ce_stealth", "close"]).output(); + } +} + +/// `pgrep -f ` returning the matching PIDs as strings. We prefer +/// `pgrep` over parsing `ps -A` output because `pgrep`'s exit code is +/// unambiguous (0 = found, 1 = none) and its matching scope is the full +/// command line, which is what we need to pick up `python -m camoufox_sidecar`. +#[cfg(feature = "camoufox-integration")] +fn pgrep_contains(needle: &str) -> Vec { + let output = Command::new("pgrep") + .args(["-f", needle]) + .output() + .expect("pgrep"); + String::from_utf8_lossy(&output.stdout) + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + .map(str::to_string) + .collect() +} diff --git a/cli/tests/camoufox_parity.rs b/cli/tests/camoufox_parity.rs new file mode 100644 index 000000000..525f404ba --- /dev/null +++ b/cli/tests/camoufox_parity.rs @@ -0,0 +1,309 @@ +//! Rust-level parity + command-surface tests for the Camoufox engine (Unit 4 +//! of the engine plan). +//! +//! The *happy-path* tests drive an actual Camoufox browser via the `--engine +//! camoufox` CLI surface and compare the snapshot output against the Chrome +//! golden at `cli/tests/fixtures/form-chrome-golden.json`. They're gated on +//! `--features camoufox-integration` to match the existing Unit 3 suite at +//! `cli/tests/camoufox_launch.rs`. +//! +//! Structural parity is the contract we care about: +//! +//! - same number of `@eN` refs, +//! - same set of `(role, name)` pairs, +//! +//! **not** identical ref ordering. Chrome's accessibility tree walk ends up +//! visiting cursor-interactive elements after AX-native ones, so the Submit +//! button lands at `e3` on Chrome and `e6` on Camoufox for this fixture. +//! Comparing anything finer than "did both engines see the same set of +//! interactive things?" is a recipe for flakes on engine upgrades. + +#![cfg_attr( + not(feature = "camoufox-integration"), + allow(dead_code, unused_imports) +)] + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +#[cfg(feature = "camoufox-integration")] +mod integration { + use super::*; + use serde_json::Value; + use std::collections::BTreeSet; + use std::path::PathBuf; + use std::sync::Mutex; + use std::thread::sleep; + use std::time::Duration; + + /// Camoufox integration tests cannot run in parallel — they share the + /// same Camoufox browser cache and any overlapping ``ps`` probes (the + /// ``leak`` assertions in Unit 3) would race. Mirror the + /// ``camoufox_launch.rs`` INTEGRATION_LOCK so Cargo's default parallel + /// runner doesn't wedge the suite. + static INTEGRATION_LOCK: Mutex<()> = Mutex::new(()); + + fn acquire() -> std::sync::MutexGuard<'static, ()> { + match INTEGRATION_LOCK.lock() { + Ok(g) => g, + Err(poisoned) => poisoned.into_inner(), + } + } + + fn fixture_python() -> Option { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let repo_root = std::path::Path::new(crate_root).parent()?; + let venv_python = repo_root.join("packages/camoufox-sidecar/.venv/bin/python3"); + if venv_python.is_file() { + return Some(venv_python); + } + std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") + .ok() + .map(PathBuf::from) + } + + fn cmd_with_python(tmp: &TempDir, args: &[&str]) -> Command { + let mut cmd = build_cmd(tmp, args); + if let Some(py) = fixture_python() { + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", py); + } + cmd + } + + fn fixture_url() -> String { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let p = std::path::Path::new(crate_root).join("tests/fixtures/form.html"); + format!("file://{}", p.display()) + } + + fn chrome_golden() -> Value { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let p = std::path::Path::new(crate_root).join("tests/fixtures/form-chrome-golden.json"); + let raw = std::fs::read_to_string(p).expect("read chrome golden"); + serde_json::from_str(&raw).expect("parse chrome golden") + } + + fn role_name_set(refs: &Value) -> BTreeSet<(String, String)> { + let obj = refs.as_object().expect("refs is object"); + obj.values() + .map(|entry| { + let role = entry + .get("role") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or("") + .trim() + .to_string(); + (role, name) + }) + .collect() + } + + fn session_args<'a>(session: &'a str, extras: &'a [&'a str]) -> Vec<&'a str> { + let mut v: Vec<&str> = vec!["--engine", "camoufox", "--session", session, "--json"]; + v.extend(extras); + v + } + + fn open_fixture(tmp: &TempDir, session: &str) { + let url = fixture_url(); + let open_args = ["open", url.as_str()]; + let args = session_args(session, &open_args); + let out = cmd_with_python(tmp, &args).output().expect("open"); + assert!( + out.status.success(), + "open failed: stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + } + + fn run_json(tmp: &TempDir, session: &str, extras: &[&str]) -> Value { + let args: Vec<&str> = { + let mut v: Vec<&str> = vec!["--session", session, "--json"]; + v.extend(extras); + v + }; + let out = cmd_with_python(tmp, &args).output().expect("run_json"); + assert!( + out.status.success(), + "cmd {:?} failed: stdout={} stderr={}", + extras, + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + let stdout = String::from_utf8_lossy(&out.stdout).to_string(); + serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("invalid JSON response: {} — body: {}", e, stdout)) + } + + fn close(tmp: &TempDir, session: &str) { + let _ = cmd_with_python(tmp, &["--session", session, "close"]).output(); + sleep(Duration::from_secs(1)); + } + + /// Parity: snapshot role/name set matches Chrome golden on the form fixture. + #[test] + fn snapshot_refs_match_chrome_golden_on_fixture() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_refs"; + open_fixture(&tmp, session); + + let snap = run_json(&tmp, session, &["snapshot"]); + close(&tmp, session); + + let refs = snap + .get("data") + .and_then(|d| d.get("refs")) + .expect("response has data.refs"); + let got = role_name_set(refs); + + let golden = chrome_golden(); + let golden_refs = golden + .get("data") + .and_then(|d| d.get("refs")) + .expect("golden has data.refs"); + let expected = role_name_set(golden_refs); + + assert_eq!( + got, expected, + "Camoufox snapshot refs diverge from Chrome golden (set-level parity)", + ); + } + + /// Ref-based click+fill+gettext pipeline exercises the sidecar's ref cache + /// via the CLI surface. + #[test] + fn click_fill_gettext_by_ref_roundtrip() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_click"; + open_fixture(&tmp, session); + + let snap = run_json(&tmp, session, &["snapshot"]); + let refs = snap + .get("data") + .and_then(|d| d.get("refs")) + .and_then(|v| v.as_object()) + .expect("refs"); + let email_ref = refs + .iter() + .find(|(_, v)| { + v.get("role").and_then(|r| r.as_str()) == Some("textbox") + && v.get("name").and_then(|n| n.as_str()).map(str::trim) == Some("Email") + }) + .map(|(k, _)| k.clone()) + .expect("email textbox ref"); + let submit_ref = refs + .iter() + .find(|(_, v)| { + v.get("role").and_then(|r| r.as_str()) == Some("button") + && v.get("name").and_then(|n| n.as_str()).map(str::trim) == Some("Submit") + }) + .map(|(k, _)| k.clone()) + .expect("submit button ref"); + + let email_token = format!("@{}", email_ref); + let submit_token = format!("@{}", submit_ref); + + let _ = run_json(&tmp, session, &["fill", &email_token, "test@example.com"]); + let _ = run_json(&tmp, session, &["click", &submit_token]); + + let status = run_json(&tmp, session, &["get", "text", "#status"]); + close(&tmp, session); + + let text = status + .get("data") + .and_then(|d| d.get("text")) + .and_then(|v| v.as_str()) + .unwrap_or_default(); + assert_eq!(text, "Submitted", "status didn't update after ref-click"); + } + + /// CSS-selector path: ``click "#submit"`` must work without a prior + /// snapshot. + #[test] + fn click_by_css_selector_without_snapshot() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_css"; + open_fixture(&tmp, session); + + let _ = run_json(&tmp, session, &["click", "#submit"]); + let status = run_json(&tmp, session, &["get", "text", "#status"]); + close(&tmp, session); + + let text = status + .get("data") + .and_then(|d| d.get("text")) + .and_then(|v| v.as_str()) + .unwrap_or_default(); + assert_eq!(text, "Submitted"); + } + + /// Stale-ref contract: refs from before a navigation must surface + /// ``ref-stale`` rather than silently acting on a reloaded element. + #[test] + fn ref_stale_after_navigation() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_stale"; + open_fixture(&tmp, session); + + let snap = run_json(&tmp, session, &["snapshot"]); + let refs = snap + .get("data") + .and_then(|d| d.get("refs")) + .and_then(|v| v.as_object()) + .expect("refs"); + let any_ref = refs.keys().next().cloned().expect("at least one ref"); + let token = format!("@{}", any_ref); + + // data: URL dodges the "navigating to about:blank from about:blank" + // Playwright interruption. + let _ = run_json( + &tmp, + session, + &["navigate", "data:text/html,after"], + ); + + // The CLI wraps non-zero `success:false` responses into a non-zero + // exit status, so we can't use `run_json`. Use a direct command. + let out = cmd_with_python(&tmp, &["--session", session, "--json", "click", &token]) + .output() + .expect("click after nav"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("ref-stale"), + "expected ref-stale error, got: {}", + stdout + ); + close(&tmp, session); + } +} diff --git a/cli/tests/camoufox_tabs.rs b/cli/tests/camoufox_tabs.rs new file mode 100644 index 000000000..6fb206e69 --- /dev/null +++ b/cli/tests/camoufox_tabs.rs @@ -0,0 +1,288 @@ +//! Rust-level tab + screenshot + engine-incompatibility tests for the +//! Camoufox engine (Unit 5 of the engine plan). +//! +//! The *integration* block drives an actual Camoufox browser via the +//! `--engine camoufox` CLI surface and needs the sidecar + browser binary +//! available. It's gated on `--features camoufox-integration` so CI only +//! runs it when the environment is provisioned. +//! +//! The *unit* block tests the Rust dispatch shape for Chrome-only surfaces +//! (`cdp_url`, `screencast_*`, `inspect`) — these don't need Camoufox +//! installed and always run. + +#![cfg_attr( + not(feature = "camoufox-integration"), + allow(dead_code, unused_imports) +)] + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +#[cfg(feature = "camoufox-integration")] +mod integration { + use super::*; + use serde_json::Value; + use std::path::PathBuf; + use std::sync::Mutex; + use std::thread::sleep; + use std::time::Duration; + + /// Serialise with the other Camoufox integration suites — they share the + /// Camoufox browser cache and any parallel ``ps`` probes would race. + static INTEGRATION_LOCK: Mutex<()> = Mutex::new(()); + + fn acquire() -> std::sync::MutexGuard<'static, ()> { + match INTEGRATION_LOCK.lock() { + Ok(g) => g, + Err(poisoned) => poisoned.into_inner(), + } + } + + fn fixture_python() -> Option { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let repo_root = std::path::Path::new(crate_root).parent()?; + let venv_python = repo_root.join("packages/camoufox-sidecar/.venv/bin/python3"); + if venv_python.is_file() { + return Some(venv_python); + } + std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") + .ok() + .map(PathBuf::from) + } + + fn cmd_with_python(tmp: &TempDir, args: &[&str]) -> Command { + let mut cmd = build_cmd(tmp, args); + if let Some(py) = fixture_python() { + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", py); + } + cmd + } + + fn session_args<'a>(session: &'a str, extras: &'a [&'a str]) -> Vec<&'a str> { + let mut v: Vec<&str> = vec!["--engine", "camoufox", "--session", session, "--json"]; + v.extend(extras); + v + } + + fn open_blank(tmp: &TempDir, session: &str) { + let open_args = ["open", "data:text/html,t1"]; + let args = session_args(session, &open_args); + let out = cmd_with_python(tmp, &args).output().expect("open"); + assert!( + out.status.success(), + "open failed: stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + } + + fn run_json(tmp: &TempDir, session: &str, extras: &[&str]) -> Value { + let args: Vec<&str> = { + let mut v: Vec<&str> = vec!["--session", session, "--json"]; + v.extend(extras); + v + }; + let out = cmd_with_python(tmp, &args).output().expect("run_json"); + assert!( + out.status.success(), + "cmd {:?} failed: stdout={} stderr={}", + extras, + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + let stdout = String::from_utf8_lossy(&out.stdout).to_string(); + serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("invalid JSON response: {} — body: {}", e, stdout)) + } + + fn run_raw(tmp: &TempDir, session: &str, extras: &[&str]) -> std::process::Output { + let args: Vec<&str> = { + let mut v: Vec<&str> = vec!["--session", session, "--json"]; + v.extend(extras); + v + }; + cmd_with_python(tmp, &args).output().expect("run_raw") + } + + fn close(tmp: &TempDir, session: &str) { + let _ = cmd_with_python(tmp, &["--session", session, "close"]).output(); + sleep(Duration::from_secs(1)); + } + + /// `open` + `tab new` + `tab list` reports both `t1` and `t2`. + #[test] + fn tab_list_after_open_and_new() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_tabs_list"; + open_blank(&tmp, session); + + let _ = run_json( + &tmp, + session, + &["tab", "new", "data:text/html,t2"], + ); + let list = run_json(&tmp, session, &["tab", "list"]); + close(&tmp, session); + + let tabs = list + .get("data") + .and_then(|d| d.get("tabs")) + .and_then(|v| v.as_array()) + .expect("tabs array"); + let ids: Vec = tabs + .iter() + .filter_map(|t| t.get("tabId").and_then(|v| v.as_str()).map(str::to_string)) + .collect(); + assert_eq!(ids, vec!["t1".to_string(), "t2".to_string()]); + } + + /// Tab ids are never reused after close (`open`, `new`, close `t2`, `new` → `t3`). + #[test] + fn tab_ids_never_reused_after_close() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_tabs_never_reuse"; + open_blank(&tmp, session); + + let _ = run_json( + &tmp, + session, + &["tab", "new", "data:text/html,t2"], + ); + let _ = run_json(&tmp, session, &["tab", "close", "t2"]); + let created = run_json( + &tmp, + session, + &["tab", "new", "data:text/html,t3"], + ); + close(&tmp, session); + + let new_id = created + .get("data") + .and_then(|d| d.get("tabId")) + .and_then(|v| v.as_str()) + .unwrap_or_default(); + assert_eq!(new_id, "t3", "counter must advance past the closed t2 slot"); + } + + /// `tab close` on the only remaining tab errors — tearing the session + /// down is the explicit `close` action's job, not `tab close`'s. + #[test] + fn tab_close_refuses_last_tab() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_tabs_last"; + open_blank(&tmp, session); + + let out = run_raw(&tmp, session, &["tab", "close", "t1"]); + close(&tmp, session); + + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("Cannot close the last tab") || stdout.contains("last tab"), + "expected last-tab error, got: {}", + stdout, + ); + } + + /// `screenshot out.png` writes a non-empty PNG; `--full-page` produces a + /// larger file than the viewport-only variant. + #[test] + fn screenshot_and_full_page_variants() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_tabs_shot"; + let open_args = [ + "open", + "data:text/html,tall", + ]; + let args = session_args(session, &open_args); + let out = cmd_with_python(&tmp, &args).output().expect("open"); + assert!(out.status.success()); + + let viewport_path = tmp.path().join("viewport.png"); + let full_path = tmp.path().join("full.png"); + let _ = run_json( + &tmp, + session, + &["screenshot", viewport_path.to_str().unwrap()], + ); + let _ = run_json( + &tmp, + session, + &["screenshot", "--full", full_path.to_str().unwrap()], + ); + close(&tmp, session); + + let vp = std::fs::read(&viewport_path).expect("viewport png written"); + let fp = std::fs::read(&full_path).expect("full-page png written"); + assert_eq!(&vp[..8], b"\x89PNG\r\n\x1a\n", "viewport is a PNG"); + assert_eq!(&fp[..8], b"\x89PNG\r\n\x1a\n", "full-page is a PNG"); + assert!( + fp.len() > vp.len(), + "full-page PNG ({}) should be larger than viewport ({})", + fp.len(), + vp.len(), + ); + } + + /// `cdp_url` (the only Chrome-only surface reachable from the CLI today) + /// surfaces an `engine-incompatible` error on Camoufox, not a panic. + /// ``screencast_*`` and ``inspect`` share the same ``require_cdp_for`` + /// gate but aren't exposed as CLI verbs at the time this test was written. + #[test] + fn cdp_url_returns_engine_incompatible() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_tabs_cdp_url"; + open_blank(&tmp, session); + + let out = run_raw(&tmp, session, &["get", "cdp-url"]); + close(&tmp, session); + + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + stdout.contains("engine-incompatible") || stderr.contains("engine-incompatible"), + "expected engine-incompatible error, got stdout={} stderr={}", + stdout, + stderr, + ); + } +} + +// --------------------------------------------------------------------------- +// Always-on: Chrome-only surface gating on BrowserBackend variants. These +// tests drive the daemon-less guard path so they don't need Camoufox +// installed — only that the `require_cdp_for` short-circuit fires. +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod guards { + // Unit-test-level coverage lives in the backend module itself; the CLI + // end-to-end "engine-incompatible" assertion is in the integration + // block above. Keeping this module present keeps the file compiling + // even when the integration feature is off. +} diff --git a/cli/tests/doctor_camoufox.rs b/cli/tests/doctor_camoufox.rs new file mode 100644 index 000000000..2a300195a --- /dev/null +++ b/cli/tests/doctor_camoufox.rs @@ -0,0 +1,404 @@ +//! Integration tests for Unit 6: `doctor` Camoufox probe + `"engine"` label +//! in `--json` payloads. +//! +//! The CLI binary is invoked via `env!("CARGO_BIN_EXE_*")`. We override +//! `AGENT_BROWSER_SOCKET_DIR`, `HOME`, and (where needed) `PATH` so the +//! tests don't observe or mutate the host's real agent-browser state. +//! +//! The Chrome `--json` engine-label assertion falls back gracefully if +//! Chrome isn't installed on this machine — we still verify the CLI's +//! engine label shape, just from the daemon's error response instead of a +//! successful navigation. + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_doctor_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +fn parse_doctor_json(stdout: &[u8]) -> serde_json::Value { + let s = std::str::from_utf8(stdout).expect("stdout utf8"); + serde_json::from_str(s).unwrap_or_else(|e| { + panic!("stdout was not JSON: {}\n---\n{}", e, s); + }) +} + +fn checks_by_id<'a>(payload: &'a serde_json::Value, id: &str) -> Vec<&'a serde_json::Value> { + payload["checks"] + .as_array() + .expect("checks is array") + .iter() + .filter(|c| c["id"].as_str() == Some(id)) + .collect() +} + +fn find_camoufox_check(payload: &serde_json::Value) -> Option<&serde_json::Value> { + payload["checks"] + .as_array() + .expect("checks is array") + .iter() + .find(|c| { + c["category"].as_str() == Some("Camoufox") + && c["id"] + .as_str() + .map(|s| s.starts_with("camoufox.")) + .unwrap_or(false) + }) +} + +// --------------------------------------------------------------------------- +// Scenario 1 (happy path): doctor reports a present camoufox install. +// --------------------------------------------------------------------------- + +/// Camoufox installed in the fixture venv → doctor should pass all three +/// probes (python / package / binary). Feature-gated because the probe +/// depends on a real Camoufox fetch, which is only guaranteed to be +/// available in the same CI profile as the other camoufox integration +/// suites. +#[cfg(feature = "camoufox-integration")] +#[test] +fn doctor_reports_camoufox_present_when_installed() { + let tmp = TempDir::new().unwrap(); + + let mut cmd = build_doctor_cmd(&tmp, &["doctor", "--offline", "--quick", "--json"]); + let crate_root = env!("CARGO_MANIFEST_DIR"); + let repo_root = std::path::Path::new(crate_root) + .parent() + .expect("repo root"); + let venv_python = repo_root.join("packages/camoufox-sidecar/.venv/bin/python3"); + assert!( + venv_python.is_file(), + "fixture venv missing at {}; run the package tests once to bootstrap it", + venv_python.display() + ); + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", &venv_python); + + let output = cmd.output().expect("invoke doctor"); + let payload = parse_doctor_json(&output.stdout); + + let python = checks_by_id(&payload, "camoufox.python"); + assert_eq!(python.len(), 1, "expected one camoufox.python check"); + assert_eq!( + python[0]["status"].as_str(), + Some("pass"), + "camoufox.python should be pass, got {}", + python[0] + ); + + let package = checks_by_id(&payload, "camoufox.package"); + assert_eq!(package.len(), 1); + assert_eq!(package[0]["status"].as_str(), Some("pass")); + + let binary = checks_by_id(&payload, "camoufox.binary"); + assert_eq!(binary.len(), 1); + assert_eq!( + binary[0]["status"].as_str(), + Some("pass"), + "camoufox.binary should be pass, got {}", + binary[0] + ); + let msg = binary[0]["message"].as_str().unwrap(); + assert!( + msg.contains("browser binary at"), + "binary message should include path, got: {}", + msg + ); +} + +// --------------------------------------------------------------------------- +// Scenario 2 (error paths): each failure mode produces a distinct reason. +// --------------------------------------------------------------------------- + +/// Missing python → only the python check appears, as a non-fatal `info` +/// with a distinct reason mentioning `python3 not found`. +#[test] +fn doctor_missing_python_reports_distinct_reason() { + let tmp = TempDir::new().unwrap(); + + let mut cmd = build_doctor_cmd(&tmp, &["doctor", "--offline", "--quick", "--json"]); + // Clear PATH so the PATH fallback can't find python3. Also clear the + // explicit env var so resolve_python()'s first branch doesn't fire. + cmd.env("PATH", "") + .env_remove("AGENT_BROWSER_CAMOUFOX_PYTHON"); + + let output = cmd.output().expect("invoke doctor"); + // `doctor` may still exit non-zero due to other host checks (e.g. no + // Chrome). That's fine — we only care about the camoufox probe. + let payload = parse_doctor_json(&output.stdout); + + let python = checks_by_id(&payload, "camoufox.python"); + assert_eq!( + python.len(), + 1, + "expected one camoufox.python check, got {:?}", + payload["checks"] + ); + assert_eq!(python[0]["status"].as_str(), Some("info")); + let msg = python[0]["message"].as_str().unwrap(); + assert!( + msg.contains("camoufox: not available") && msg.contains("python3 not found"), + "missing-python reason should be distinct, got: {}", + msg + ); + + // When python is missing we short-circuit — package/binary checks must + // not appear, otherwise the user can't tell the root cause. + assert!( + checks_by_id(&payload, "camoufox.package").is_empty(), + "camoufox.package should be skipped when python is missing" + ); + assert!( + checks_by_id(&payload, "camoufox.binary").is_empty(), + "camoufox.binary should be skipped when python is missing" + ); +} + +/// Python path pointing at a non-existent file is the same category as +/// "no python3 on PATH" for doctor purposes: the probe can't run and we +/// must say so clearly. Uses a distinct reason (spawn-failed-shape) from +/// "package missing" / "binary missing". +#[test] +fn doctor_nonexistent_python_path_reports_distinct_reason() { + let tmp = TempDir::new().unwrap(); + + let mut cmd = build_doctor_cmd(&tmp, &["doctor", "--offline", "--quick", "--json"]); + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", "/does/not/exist/python3"); + + let output = cmd.output().expect("invoke doctor"); + let payload = parse_doctor_json(&output.stdout); + + let python = checks_by_id(&payload, "camoufox.python"); + assert_eq!(python.len(), 1); + assert_eq!(python[0]["status"].as_str(), Some("info")); + let msg = python[0]["message"].as_str().unwrap(); + assert!( + msg.contains("not runnable") && msg.contains("/does/not/exist/python3"), + "bad python path should surface `not runnable` reason, got: {}", + msg + ); + + assert!(checks_by_id(&payload, "camoufox.package").is_empty()); + assert!(checks_by_id(&payload, "camoufox.binary").is_empty()); +} + +/// Python present but `import camoufox` fails → package probe surfaces a +/// distinct reason, and the binary probe is skipped. +#[test] +fn doctor_missing_camoufox_package_reports_distinct_reason() { + // Run on a python that is nearly certain not to have camoufox + // installed: the system python3 (as opposed to the fixture venv that + // the camoufox-integration tests use). If the host doesn't have + // python3 at all, we skip — the missing-python scenario covers it. + let Some(system_python) = which("python3") else { + eprintln!("skipping: no python3 on PATH"); + return; + }; + + // Skip if camoufox happens to be installed into the system python + // already (unlikely on CI but possible on the maintainer's machine). + let has_camoufox = Command::new(&system_python) + .args(["-c", "import camoufox"]) + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if has_camoufox { + eprintln!("skipping: system python has camoufox installed"); + return; + } + + let tmp = TempDir::new().unwrap(); + let mut cmd = build_doctor_cmd(&tmp, &["doctor", "--offline", "--quick", "--json"]); + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", &system_python); + + let output = cmd.output().expect("invoke doctor"); + let payload = parse_doctor_json(&output.stdout); + + let python = checks_by_id(&payload, "camoufox.python"); + assert_eq!(python.len(), 1); + assert_eq!(python[0]["status"].as_str(), Some("pass")); + + let package = checks_by_id(&payload, "camoufox.package"); + assert_eq!( + package.len(), + 1, + "expected one camoufox.package check, got {:?}", + payload["checks"] + ); + assert_eq!(package[0]["status"].as_str(), Some("info")); + let msg = package[0]["message"].as_str().unwrap(); + assert!( + msg.contains("camoufox: not available") && msg.contains("camoufox package not installed"), + "package-missing reason should be distinct, got: {}", + msg + ); + + assert!( + checks_by_id(&payload, "camoufox.binary").is_empty(), + "camoufox.binary should be skipped when package is missing" + ); +} + +// --------------------------------------------------------------------------- +// Scenario 3 (happy path): --json payload carries "engine": "camoufox". +// --------------------------------------------------------------------------- + +/// Any `--json` response produced by `--engine camoufox` must carry +/// `"engine": "camoufox"` at top level so downstream telemetry can segment. +/// We use the Camoufox + missing-extensions validation error to get a +/// deterministic response without requiring a real browser launch. +#[test] +fn camoufox_json_payload_carries_engine_label() { + let tmp = TempDir::new().unwrap(); + + let output = build_doctor_cmd( + &tmp, + &[ + "--engine", + "camoufox", + "--extension", + "/nonexistent/ext.crx", + "--json", + "open", + "https://example.com", + ], + ) + // Point at a missing python so we don't actually spawn a sidecar; + // `validate_camoufox_options` rejects --extension before that path. + .env("AGENT_BROWSER_CAMOUFOX_PYTHON", "/nonexistent/python3-xyz") + .output() + .expect("invoke agent-browser"); + + let stdout = std::str::from_utf8(&output.stdout).expect("stdout utf8"); + let payload: serde_json::Value = serde_json::from_str(stdout) + .unwrap_or_else(|e| panic!("expected JSON, got: {}\n---\n{}", e, stdout)); + + assert_eq!( + payload["engine"].as_str(), + Some("camoufox"), + "--engine camoufox payload should carry `engine: camoufox`, got: {}", + stdout + ); + // Sanity: the validation rejection is what we expected to trigger. + assert_eq!(payload["success"].as_bool(), Some(false)); +} + +// --------------------------------------------------------------------------- +// Scenario 4 (structure-insensitive): chrome payload still carries +// "engine": "chrome". +// --------------------------------------------------------------------------- + +/// Chrome `--json` responses must carry `"engine": "chrome"` at top level. +/// We don't want this test to depend on a working Chrome install, so we +/// trigger a validation error that goes through the same response path — +/// any action dispatched against a Chrome-engine daemon produces the same +/// shape. +#[test] +fn chrome_json_payload_carries_engine_label() { + let tmp = TempDir::new().unwrap(); + + // `screencast` is not available without a live session; the daemon + // will return a structured error. The exact action doesn't matter — + // what matters is that the response envelope carries an engine label. + // Use a local-only command that doesn't need Chrome: `state list` + // runs without a daemon, so we pick a command that *does* hit the + // daemon path. `session list` does. + // + // Simplest: use `--engine chrome` explicitly and rely on the + // missing-chrome auto-launch error. That response goes through + // `error_response` which carries the engine label. + let output = build_doctor_cmd( + &tmp, + &[ + "--engine", + "chrome", + "--json", + "navigate", + "https://example.com", + ], + ) + // Prevent the daemon from finding a real Chrome install — forces a + // structured error rather than actually launching a browser. + .env("AGENT_BROWSER_NO_AUTO_CONNECT", "1") + .env("PUPPETEER_EXECUTABLE_PATH", "/nonexistent/chrome") + .output() + .expect("invoke agent-browser"); + + let stdout = std::str::from_utf8(&output.stdout).expect("stdout utf8"); + if stdout.trim().is_empty() { + // On hosts without Chrome, the CLI may fail before producing a + // JSON payload (e.g. refused to start daemon). In that case + // the test is not meaningful on this host. The camoufox label + // scenario is the load-bearing one; chrome label parity is + // verified by the exhaustive daemon-side unit test + // (`test_success_response_structure`, `test_error_response_structure`). + eprintln!("skipping: chrome path produced no JSON on this host"); + return; + } + + // The daemon may retry and emit multiple JSON lines; parse the first + // complete object that contains an `engine` field. + let label = stdout + .lines() + .filter_map(|line| serde_json::from_str::(line).ok()) + .find_map(|v| v.get("engine").and_then(|e| e.as_str()).map(str::to_string)); + + match label { + Some(engine) => assert_eq!( + engine, "chrome", + "chrome payload should carry engine=chrome, got {} in output: {}", + engine, stdout + ), + None => { + // Host did not reach a daemon response, see comment above. + eprintln!( + "skipping: no JSON response from daemon on this host (output: {})", + stdout + ); + } + } +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +fn which(name: &str) -> Option { + let which_cmd = if cfg!(target_os = "windows") { + "where" + } else { + "which" + }; + let out = Command::new(which_cmd).arg(name).output().ok()?; + if !out.status.success() { + return None; + } + let s = String::from_utf8_lossy(&out.stdout); + s.lines().next().map(|l| l.trim().to_string()) +} + +// The `find_camoufox_check` helper is used only by the integration-gated +// happy-path test above; silence dead-code warnings when that feature is +// off. +#[cfg(not(feature = "camoufox-integration"))] +#[allow(dead_code)] +fn _suppress_dead_code_without_feature(p: &serde_json::Value) -> Option<&serde_json::Value> { + find_camoufox_check(p) +} diff --git a/cli/tests/fixtures/form-chrome-golden.json b/cli/tests/fixtures/form-chrome-golden.json new file mode 100644 index 000000000..426511d73 --- /dev/null +++ b/cli/tests/fixtures/form-chrome-golden.json @@ -0,0 +1,34 @@ +{ + "success": true, + "data": { + "origin": "file:///Users/davide/git/agent-browser/cli/tests/fixtures/form.html", + "refs": { + "e1": { + "name": "Contact Form", + "role": "heading" + }, + "e2": { + "name": " Subscribe to updates", + "role": "checkbox" + }, + "e3": { + "name": "Submit", + "role": "button" + }, + "e4": { + "name": "Name", + "role": "textbox" + }, + "e5": { + "name": "Email", + "role": "textbox" + }, + "e6": { + "name": "Message", + "role": "textbox" + } + }, + "snapshot": "- heading \"Contact Form\" [level=1, ref=e1]\n- paragraph\n - StaticText \"Fill out the form below.\"\n- generic\n - LabelText\n - StaticText \"Name\"\n - textbox \"Name\" [ref=e4]\n - LabelText\n - StaticText \"Email\"\n - textbox \"Email\" [ref=e5]\n - LabelText\n - StaticText \"Message\"\n - textbox \"Message\" [ref=e6]\n - checkbox \" Subscribe to updates\" [checked=false, ref=e2]\n - button \"Submit\" [ref=e3]\n- paragraph\n - StaticText \"Idle\"" + }, + "error": null +} diff --git a/cli/tests/fixtures/form.html b/cli/tests/fixtures/form.html new file mode 100644 index 000000000..314bd84a9 --- /dev/null +++ b/cli/tests/fixtures/form.html @@ -0,0 +1,24 @@ + + + + + Form Parity Fixture + + +

Contact Form

+

Fill out the form below.

+
+ + + + + +
+

Idle

+ + + diff --git a/package.json b/package.json index 1ca55e938..85bc6275f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "agent-browser", - "version": "0.26.0-celeria-stealth.2", + "version": "0.26.0-celeria-camoufox.1", "description": "Browser automation CLI for AI agents", "type": "module", "files": [ diff --git a/packages/camoufox-sidecar/README.md b/packages/camoufox-sidecar/README.md new file mode 100644 index 000000000..f205c1846 --- /dev/null +++ b/packages/camoufox-sidecar/README.md @@ -0,0 +1,32 @@ +# camoufox-sidecar + +Python sidecar that agent-browser spawns when `--engine camoufox` is selected. +It drives [Camoufox](https://camoufox.com/) via Playwright and speaks a +JSON-line protocol over stdio to the Rust daemon. + +This package is not meant to be used directly by humans. See +`docs/engines/camoufox.md` in the agent-browser repo for the user-facing docs. + +## Install + +``` +pip install -U "camoufox[geoip]" +python -m camoufox fetch +pip install -e packages/camoufox-sidecar +``` + +## Run + +``` +python -m camoufox_sidecar +``` + +Emits `{"event": "ready"}` on startup, then reads JSON-line commands from +stdin. + +## Test + +``` +pip install -e 'packages/camoufox-sidecar[test]' +pytest packages/camoufox-sidecar/tests/ +``` diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__init__.py b/packages/camoufox-sidecar/camoufox_sidecar/__init__.py new file mode 100644 index 000000000..9047a4178 --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/__init__.py @@ -0,0 +1,3 @@ +"""camoufox-sidecar: Playwright+Camoufox driver process for agent-browser.""" + +__version__ = "0.26.0+celeria.camoufox.1" diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py new file mode 100644 index 000000000..f36ac13b5 --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py @@ -0,0 +1,207 @@ +"""Sidecar entry point. + +Lifecycle: + + 1. Attach to stdin/stdout, emit `{"event": "ready"}`. + 2. Read command frames from stdin; dispatch to Session handlers. + 3. Exit cleanly on stdin EOF, SIGTERM, SIGINT, or `{"cmd": "close"}`. + +Unit 2 only ships lifecycle commands (`launch`, `close`). Anything else is +responded to with `not-yet-supported` so agents get a clear signal rather than +silent drops; later units replace those stubs. +""" + +from __future__ import annotations + +import asyncio +import signal +import sys +from typing import Any, Awaitable, Callable, Optional + +from .protocol import Protocol, log +from .session import LaunchError, Session + + +class Sidecar: + def __init__(self) -> None: + self.protocol = Protocol() + # Hand the protocol to the session so per-page events (console, crash) + # can fan out to the Rust daemon without the session holding a + # stdout handle of its own. + self.session = Session(protocol=self.protocol) + self._shutdown = asyncio.Event() + + async def run(self) -> int: + await self.protocol.start() + _install_signal_handlers(self._shutdown) + + await self.protocol.write_event("ready", {"pid": _own_pid()}) + + reader_task = asyncio.create_task(self._read_loop(), name="sidecar-reader") + shutdown_task = asyncio.create_task( + self._shutdown.wait(), name="sidecar-shutdown" + ) + try: + done, _ = await asyncio.wait( + {reader_task, shutdown_task}, + return_when=asyncio.FIRST_COMPLETED, + ) + for task in done: + exc = task.exception() + if exc is not None: + log(f"sidecar task raised: {exc!r}") + finally: + reader_task.cancel() + shutdown_task.cancel() + for task in (reader_task, shutdown_task): + try: + await task + except (asyncio.CancelledError, Exception): # noqa: BLE001 + pass + await self.session.close() + return 0 + + async def _read_loop(self) -> None: + try: + async for frame in self.protocol.messages(): + await self._dispatch(frame) + finally: + # stdin closed → daemon gone → we shut down + self._shutdown.set() + + async def _dispatch(self, frame: dict) -> None: + cmd = frame.get("cmd") + req_id = frame.get("id") + args = frame.get("args") or {} + + if cmd == "close": + await self.protocol.write_response(req_id, ok=True, result={"closed": True}) + self._shutdown.set() + return + + handler = _HANDLERS.get(cmd) # type: ignore[arg-type] + if handler is None: + await self.protocol.write_response( + req_id, + ok=False, + error={ + "code": "not-yet-supported" if isinstance(cmd, str) else "invalid-frame", + "message": ( + f"command {cmd!r} is not implemented in this sidecar version" + if isinstance(cmd, str) + else "frame is missing a 'cmd' field" + ), + }, + ) + return + + try: + result = await handler(self, args) + except LaunchError as exc: + await self.protocol.write_response( + req_id, + ok=False, + error={"code": exc.code, "message": exc.message}, + ) + return + except Exception as exc: # noqa: BLE001 + log(f"handler {cmd} raised: {exc!r}") + await self.protocol.write_response( + req_id, + ok=False, + error={"code": "internal-error", "message": str(exc)}, + ) + return + + await self.protocol.write_response(req_id, ok=True, result=result) + + +Handler = Callable[["Sidecar", dict], Awaitable[Any]] + + +async def _cmd_launch(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.launch(args) + + +async def _cmd_page_goto(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.goto(args) + + +async def _cmd_page_snapshot(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.snapshot(args) + + +async def _cmd_page_click(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.click(args) + + +async def _cmd_page_fill(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.fill(args) + + +async def _cmd_page_get_text(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.get_text(args) + + +async def _cmd_page_screenshot(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.screenshot(args) + + +async def _cmd_tab_new(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.tab_new(args) + + +async def _cmd_tab_switch(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.tab_switch(args) + + +async def _cmd_tab_close(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.tab_close(args) + + +async def _cmd_tab_list(sidecar: "Sidecar", args: dict) -> dict: + return sidecar.session.tab_list(args) + + +_HANDLERS: dict[str, Handler] = { + "launch": _cmd_launch, + "page.goto": _cmd_page_goto, + "page.navigate": _cmd_page_goto, # alias for CDP-side naming parity + "page.snapshot": _cmd_page_snapshot, + "page.click": _cmd_page_click, + "page.fill": _cmd_page_fill, + "page.getText": _cmd_page_get_text, + "page.screenshot": _cmd_page_screenshot, + "tab.new": _cmd_tab_new, + "tab.switch": _cmd_tab_switch, + "tab.close": _cmd_tab_close, + "tab.list": _cmd_tab_list, +} + + +def _own_pid() -> int: + import os + + return os.getpid() + + +def _install_signal_handlers(shutdown: asyncio.Event) -> None: + loop = asyncio.get_event_loop() + for sig in (signal.SIGTERM, signal.SIGINT): + try: + loop.add_signal_handler(sig, shutdown.set) + except (NotImplementedError, RuntimeError): + # Windows / non-main thread: fall back to default disposition. + pass + + +def main(argv: Optional[list[str]] = None) -> int: + _ = argv # reserved for future flags; the sidecar takes config via stdio + try: + return asyncio.run(Sidecar().run()) + except KeyboardInterrupt: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/packages/camoufox-sidecar/camoufox_sidecar/protocol.py b/packages/camoufox-sidecar/camoufox_sidecar/protocol.py new file mode 100644 index 000000000..18d3c30db --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/protocol.py @@ -0,0 +1,107 @@ +"""JSON-line stdio protocol used by the Rust daemon <-> Python sidecar. + +Frames are single-line JSON documents. Requests and responses carry a +monotonic `id`; events are unsolicited and carry no `id`. + + request: {"id": 42, "cmd": "", "args": {...}} + response: {"id": 42, "ok": true, "result": {...}} + {"id": 42, "ok": false, "error": {"code": "...", "message": "..."}} + event: {"event": "", "data": {...}} + +stdout is reserved for these frames. stderr is free-form diagnostic logging +that the Rust side captures when --verbose is on. +""" + +from __future__ import annotations + +import asyncio +import json +import sys +from typing import Any, AsyncIterator, Optional + + +async def _stdin_reader() -> asyncio.StreamReader: + """Attach an asyncio StreamReader to sys.stdin.""" + loop = asyncio.get_event_loop() + reader = asyncio.StreamReader() + protocol = asyncio.StreamReaderProtocol(reader) + await loop.connect_read_pipe(lambda: protocol, sys.stdin) + return reader + + +class Protocol: + """Async JSON-line protocol bound to stdin/stdout. + + Writes are synchronous and flushed — correctness beats throughput here, + since the Rust side relies on line-boundary framing and the volume is low. + """ + + def __init__(self) -> None: + self._reader: Optional[asyncio.StreamReader] = None + self._write_lock = asyncio.Lock() + + async def start(self) -> None: + if self._reader is None: + self._reader = await _stdin_reader() + + async def messages(self) -> AsyncIterator[dict]: + """Yield incoming frames until stdin EOF. + + Malformed lines are reported back as a response with + {"code": "invalid-frame"} when they carry an id, and logged to stderr + when they do not. The iterator itself does not raise on parse errors. + """ + assert self._reader is not None, "Protocol.start() must be called first" + while True: + raw = await self._reader.readline() + if not raw: + return + line = raw.decode("utf-8", errors="replace").rstrip("\r\n") + if not line.strip(): + continue + try: + frame = json.loads(line) + except json.JSONDecodeError as exc: + log(f"invalid JSON on stdin: {exc}: {line!r}") + await self.write_response( + req_id=None, + ok=False, + error={ + "code": "invalid-frame", + "message": f"could not parse JSON: {exc}", + }, + ) + continue + if not isinstance(frame, dict): + log(f"non-object frame on stdin: {line!r}") + continue + yield frame + + async def write_event(self, name: str, data: Optional[dict] = None) -> None: + await self._write({"event": name, "data": data or {}}) + + async def write_response( + self, + req_id: Optional[int], + ok: bool, + result: Optional[Any] = None, + error: Optional[dict] = None, + ) -> None: + frame: dict[str, Any] = {"id": req_id, "ok": ok} + if ok: + frame["result"] = result if result is not None else {} + else: + frame["error"] = error or {"code": "unknown", "message": ""} + await self._write(frame) + + async def _write(self, frame: dict) -> None: + encoded = json.dumps(frame, separators=(",", ":"), ensure_ascii=False) + async with self._write_lock: + sys.stdout.write(encoded + "\n") + sys.stdout.flush() + + +def log(message: str) -> None: + """Diagnostic logging. Goes to stderr; never touches the protocol pipe.""" + sys.stderr.write(f"[camoufox-sidecar] {message}\n") + sys.stderr.flush() diff --git a/packages/camoufox-sidecar/camoufox_sidecar/refs.py b/packages/camoufox-sidecar/camoufox_sidecar/refs.py new file mode 100644 index 000000000..fbfa83f48 --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/refs.py @@ -0,0 +1,110 @@ +"""`@eN` ref cache for the Camoufox sidecar. + +The CDP path in agent-browser hands agents ``@e1``, ``@e2`` … tokens that +survive beyond the snapshot that created them, backed by Chrome's cross-tree +``backend_node_id`` identity. Playwright exposes no equivalent — an +``ElementHandle`` is the closest thing, and it has narrower semantics: handles +only remain valid while their element stays attached to the same document. + +Per the plan's Key Technical Decisions, the sidecar therefore: + + * caches an ``ElementHandle`` per ``@eN`` during ``page.snapshot``; + * clears the cache on ``frame.navigated`` so cross-navigation refs become + structurally unavailable rather than silently pointing at a new element; + * surfaces ``{"code": "ref-stale"}`` when a caller reaches for a ref that + is either missing from the cache or whose handle Playwright reports as + detached. + +This is a narrower semantic than Chrome's ``backend_node_id``. The narrower +shape is documented in ``docs/engines/camoufox.md`` (planned Unit 8) and +surfaces as ``ref-stale`` rather than a silent cross-navigation mismatch. +""" + +from __future__ import annotations + +import re +from typing import Any, Optional + + +# Recognise ``@e1``, ``e1``, or ``ref=e1`` — mirrors ``parse_ref`` on the +# Rust side (``cli/src/native/element.rs``) so both engines accept the same +# agent-facing token shapes. +_REF_RE = re.compile(r"^(?:@|ref=)?(e[0-9]+)$") + + +def parse_ref(selector_or_ref: str) -> Optional[str]: + """Return ``"eN"`` if the input looks like an agent-browser ref, else ``None``.""" + if not isinstance(selector_or_ref, str): + return None + match = _REF_RE.match(selector_or_ref.strip()) + return match.group(1) if match else None + + +class RefStale(Exception): + """Raised by ``RefCache.require`` when a ref is missing or detached.""" + + def __init__(self, message: str) -> None: + super().__init__(message) + self.message = message + + +class RefCache: + """ElementHandle cache keyed by ``@eN``. + + Cheap to construct; no background tasks. The owner is expected to call + :meth:`invalidate` whenever the browser navigates so callers see an honest + ``ref-stale`` error instead of a silently-rebound handle. + """ + + def __init__(self) -> None: + self._handles: dict[str, Any] = {} + self._metadata: dict[str, dict[str, Any]] = {} + self._next_id: int = 1 + + def __contains__(self, ref_id: str) -> bool: + return ref_id in self._handles + + def invalidate(self) -> None: + """Drop all cached handles. + + We don't ``await handle.dispose()`` here because callers hit this on + the sync ``framenavigated`` event path; Playwright cleans up detached + handles on its own. This method is cheap to call more than once. + """ + self._handles.clear() + self._metadata.clear() + self._next_id = 1 + + def next_ref_id(self) -> str: + ref_id = f"e{self._next_id}" + self._next_id += 1 + return ref_id + + def put(self, ref_id: str, handle: Any, *, role: str, name: str) -> None: + self._handles[ref_id] = handle + self._metadata[ref_id] = {"role": role, "name": name} + + def get(self, ref_id: str) -> Optional[Any]: + return self._handles.get(ref_id) + + def metadata(self, ref_id: str) -> Optional[dict[str, Any]]: + return self._metadata.get(ref_id) + + def entries(self) -> dict[str, dict[str, Any]]: + """Return a ``{ref_id: {role, name}}`` view suitable for the ``refs`` response field.""" + return {k: dict(v) for k, v in self._metadata.items()} + + def require(self, ref_id: str) -> Any: + """Return the handle for ``ref_id`` or raise :class:`RefStale`. + + Callers should catch Playwright errors when *using* the returned handle + and translate them into ``RefStale`` as well — this method only covers + the "not in cache" failure mode. + """ + handle = self._handles.get(ref_id) + if handle is None: + raise RefStale( + f"ref {ref_id!r} is not in the snapshot cache " + "(may have been invalidated by a navigation; re-snapshot)" + ) + return handle diff --git a/packages/camoufox-sidecar/camoufox_sidecar/session.py b/packages/camoufox-sidecar/camoufox_sidecar/session.py new file mode 100644 index 000000000..3cab786de --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/session.py @@ -0,0 +1,794 @@ +"""Session holds the AsyncCamoufox browser + per-tab pages for the sidecar. + +Unit 2 owned lifecycle (launch / close); Unit 4 grew the per-page command +surface (snapshot, click, fill, get_text, navigate); Unit 5 replaces the +Unit-4 single ``self._page`` stopgap with a per-tab map keyed by the +agent-browser ``t`` scheme. The Rust daemon owns the counter (reusing +``BrowserManager::format_tab_id`` / ``resolve_tab_ref``) and passes stable +string tab ids in to every command; the sidecar is a pure map from those +ids to Playwright ``Page`` instances, with a per-tab :class:`RefCache` so +a ``click @e1`` on ``t2`` cannot resolve against a stale ``t1`` snapshot. + +Why Rust owns the counter (deferred decision from the plan): the Rust +``BrowserManager`` already tracks ``next_tab_id``, formats ids with +``t``, and resolves labels via ``TabRef::parse``. Duplicating any of +that in the sidecar would split a single invariant ("tab ids never +reused") across a process boundary. Instead the sidecar stores pages +under whatever string the Rust side hands it; on ``tab.new`` the Rust +side assigns the next id and tells the sidecar to register the Playwright +``Page`` under that id. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from .protocol import Protocol, log +from .refs import RefCache, RefStale, parse_ref +from .snapshot import SnapshotError, take_snapshot + +# Allowlist derived from https://camoufox.com/python/usage/ — keep in sync with +# the plan's Unit 2 Approach. New kwargs must be added deliberately so the +# Rust side knows to expose them; silently passing unknown kwargs through is a +# footgun when Camoufox bumps and adds options we haven't reviewed. +ALLOWED_LAUNCH_KWARGS: frozenset[str] = frozenset( + { + "headless", + "humanize", + "os", + "locale", + "geoip", + "screen", + "window", + "webgl_config", + "fonts", + "block_images", + "block_webrtc", + "block_webgl", + "disable_coop", + "executable_path", + "proxy", + "addons", + "exclude_default_addons", + "main_world_eval", + "enable_cache", + "config", + } +) + +# Explicitly rejected in v1 (see plan). Surfacing a distinct code makes the +# "not-yet-supported" state obvious rather than conflating it with typos. +REJECTED_LAUNCH_KWARGS: frozenset[str] = frozenset( + { + "persistent_context", + "user_data_dir", + } +) + +# Default timeout for per-element actions (ms). Matches agent-browser's +# default_timeout_ms on the Rust side (see BrowserManager::launch). +DEFAULT_ACTION_TIMEOUT_MS: int = 25_000 + + +class LaunchError(Exception): + """Structured error surfaced as a {"ok": false, "error": {...}} response.""" + + def __init__(self, code: str, message: str) -> None: + super().__init__(message) + self.code = code + self.message = message + + +class Tab: + """A single Playwright :class:`Page` with its own ref cache. + + Keeping the cache per-tab (not per-session, as Unit 4 had it) prevents + a ``click @e1`` on ``t2`` from resolving against a handle cached by a + snapshot taken on ``t1``. That failure mode is silent on CDP because + refs live on the Rust side; it would be silent on Camoufox too without + this split. + """ + + def __init__(self, tab_id: str, page: Any) -> None: + self.tab_id = tab_id + self.page = page + self.refs = RefCache() + + +class Session: + """Holds the AsyncCamoufox browser + a ``{tab_id: Tab}`` map. + + The browser is launched lazily on the first `launch` command so that + bringing up the sidecar process itself does not require Camoufox to be + installed — useful for the startup-and-close lifecycle test. + """ + + def __init__(self, protocol: Optional[Protocol] = None) -> None: + self._camoufox_cm: Optional[Any] = None # AsyncCamoufox context manager + self._browser: Optional[Any] = None + self._tabs: dict[str, Tab] = {} + self._active_tab_id: Optional[str] = None + self._launched: bool = False + # Retained to broadcast page.console / page.crashed back to the Rust + # daemon. Unit 4 wired framenavigated internally only; Unit 5 finishes + # the observability story. + self._protocol: Optional[Protocol] = protocol + + @property + def is_launched(self) -> bool: + return self._launched + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + async def launch(self, args: Optional[dict] = None) -> dict: + """Launch the Camoufox browser with validated kwargs. + + Returns a result dict for the response frame. Raises LaunchError for + validation or environment failures that should surface as structured + errors to the Rust side. + """ + if self._launched: + raise LaunchError( + "already-launched", + "sidecar already has an active Camoufox browser; close it first", + ) + + kwargs = _validate_launch_args(args or {}) + + try: + from camoufox.async_api import AsyncCamoufox # type: ignore + except ImportError as exc: + raise LaunchError( + "camoufox-not-installed", + ( + "camoufox Python package is not importable: " + f"{exc}. Install with `pip install -U 'camoufox[geoip]'`." + ), + ) from exc + + cm = AsyncCamoufox(**kwargs) + try: + browser = await cm.__aenter__() + except FileNotFoundError as exc: + # Camoufox raises FileNotFoundError when the browser binary has + # not been fetched. Surface the actionable message. + raise LaunchError( + "camoufox-not-installed", + ( + f"Camoufox browser binary not found: {exc}. " + "Run `python -m camoufox fetch`." + ), + ) from exc + except Exception as exc: # noqa: BLE001 + message = str(exc) + if _looks_like_missing_binary(message): + raise LaunchError( + "camoufox-not-installed", + ( + f"Camoufox browser binary not available: {message}. " + "Run `python -m camoufox fetch`." + ), + ) from exc + raise LaunchError("launch-failed", message) from exc + + self._camoufox_cm = cm + self._browser = browser + self._launched = True + log("camoufox launched") + return {"launched": True} + + async def close(self) -> dict: + """Close the browser if launched. Safe to call when never launched.""" + cm = self._camoufox_cm + self._camoufox_cm = None + self._browser = None + self._tabs.clear() + self._active_tab_id = None + self._launched = False + if cm is None: + return {"closed": False} + try: + await cm.__aexit__(None, None, None) + except Exception as exc: # noqa: BLE001 + log(f"error during close: {exc}") + # Don't re-raise; the sidecar is shutting down either way and + # leaving a half-closed state just masks the root cause. + return {"closed": True} + + # ------------------------------------------------------------------ + # Tab management (Unit 5) + # ------------------------------------------------------------------ + + async def tab_new(self, args: Optional[dict] = None) -> dict: + """Create a new tab and register it under ``args['tabId']``. + + Rust has already assigned the stable ``t`` id before calling — see + ``BrowserManager::camoufox_tab_new`` in ``browser.rs``. The sidecar + rejects reused ids so double-registration shows up as a structured + error rather than a silent handle swap. + """ + args = args or {} + browser = self._require_browser() + tab_id = _require_str(args, "tabId") + url = args.get("url") + if tab_id in self._tabs: + raise LaunchError( + "tab-id-in-use", + f"tab id {tab_id!r} is already registered in the sidecar", + ) + page = await browser.new_page() + tab = Tab(tab_id, page) + self._wire_page_events(tab) + self._tabs[tab_id] = tab + self._active_tab_id = tab_id + + if isinstance(url, str) and url and url != "about:blank": + try: + await page.goto(url, wait_until=_wait_until(args.get("waitUntil", "load"))) + except Exception as exc: # noqa: BLE001 + # Roll the registration back so Rust doesn't end up thinking + # the sidecar has a tab it can target. + await self._close_tab_silently(tab_id) + raise LaunchError("navigation-failed", str(exc)) from exc + + current_url = _safe_page_url(page) + title = await _safe_page_title(page) + return { + "tabId": tab_id, + "url": current_url, + "title": title, + } + + async def tab_switch(self, args: Optional[dict] = None) -> dict: + args = args or {} + tab_id = _require_str(args, "tabId") + tab = self._require_tab(tab_id) + try: + await tab.page.bring_to_front() + except Exception as exc: # noqa: BLE001 + # bring_to_front failing usually means the page has been + # externally closed (e.g. window.close()). Report a structured + # error; the caller can re-issue tab.list. + raise LaunchError("tab-gone", str(exc)) from exc + self._active_tab_id = tab_id + return { + "tabId": tab_id, + "url": _safe_page_url(tab.page), + "title": await _safe_page_title(tab.page), + } + + async def tab_close(self, args: Optional[dict] = None) -> dict: + args = args or {} + tab_id = args.get("tabId") + if not isinstance(tab_id, str) or not tab_id: + # Default to the active tab, matching the Chrome path's + # ``mgr.tab_close_by_id(None)`` semantics. + tab_id = self._active_tab_id + if tab_id is None: + raise LaunchError("no-active-tab", "no tabs are open in the sidecar") + tab = self._require_tab(tab_id) + try: + await tab.page.close() + except Exception as exc: # noqa: BLE001 + log(f"page.close({tab_id!r}) raised: {exc}") + # ``page.close()`` also fires the ``close`` event wired in + # ``_wire_page_events``, which pops the tab out of ``self._tabs`` + # before we get here. Use ``pop(..., None)`` so the idempotent path + # doesn't race a KeyError on whichever handler won. + self._tabs.pop(tab_id, None) + if self._active_tab_id == tab_id: + # Promote the first remaining tab to active (arbitrary but + # deterministic order — Python dicts preserve insertion order). + self._active_tab_id = next(iter(self._tabs), None) + return {"tabId": tab_id, "closed": True, "remaining": len(self._tabs)} + + def tab_list(self, args: Optional[dict] = None) -> dict: + _ = args + tabs = [ + { + "tabId": tab.tab_id, + "url": _safe_page_url(tab.page), + "active": tab.tab_id == self._active_tab_id, + } + for tab in self._tabs.values() + ] + return {"tabs": tabs, "active": self._active_tab_id} + + async def screenshot(self, args: Optional[dict] = None) -> dict: + args = args or {} + page = await self._page_for(args) + full_page = bool(args.get("fullPage", False)) + fmt = (args.get("format") or "png").lower() + if fmt not in ("png", "jpeg"): + raise LaunchError( + "invalid-args", + f"screenshot format must be `png` or `jpeg`, got {fmt!r}", + ) + path = args.get("path") + # Auto-allocate a temp path when the caller didn't pass one. Keeping + # screenshots on disk (not base64 in the response frame) matches the + # Chrome CDP path and avoids blowing past the asyncio stdio reader's + # default line-length limit on full-page captures. + if not isinstance(path, str) or not path: + import tempfile + import time + + suffix = ".jpg" if fmt == "jpeg" else ".png" + fd, path = tempfile.mkstemp(prefix=f"ab-camoufox-{int(time.time() * 1000)}-", suffix=suffix) + import os + + os.close(fd) + kwargs: dict[str, Any] = { + "full_page": full_page, + "type": fmt, + "path": path, + } + quality = args.get("quality") + if fmt == "jpeg" and isinstance(quality, int): + kwargs["quality"] = quality + try: + await page.screenshot(**kwargs) + except Exception as exc: # noqa: BLE001 + raise LaunchError("screenshot-failed", str(exc)) from exc + + return { + "path": path, + "format": fmt, + "fullPage": full_page, + } + + async def _page_for(self, args: dict) -> Any: + """Resolve the target page for a command. + + Commands may pass an explicit ``tabId``; otherwise we fall back to + the active tab, creating it on demand for the first `page.goto` + that happens before `tab.new` (preserves the Unit-4 flow where + `agent-browser --engine camoufox open ` doesn't issue an + explicit tab.new first). + """ + tab_id = args.get("tabId") + if isinstance(tab_id, str) and tab_id: + return self._require_tab(tab_id).page + # Lazy default tab: only created when someone actually needs a page. + if self._active_tab_id is None: + await self._ensure_default_tab() + return self._require_tab(self._active_tab_id).page # type: ignore[arg-type] + + async def _ensure_default_tab(self) -> None: + """Create an implicit ``t1`` tab on first use. + + The Unit-4 smoke path (`agent-browser --engine camoufox open `) + issues a raw `page.goto` without first calling `tab.new`. We keep + that working by auto-creating a page under the canonical first id; + once Rust issues an explicit `tab.new` the implicit tab stays + registered under its id and the counter continues from there. + """ + browser = self._require_browser() + page = await browser.new_page() + tab = Tab("t1", page) + self._wire_page_events(tab) + self._tabs["t1"] = tab + self._active_tab_id = "t1" + + def _require_browser(self) -> Any: + if not self._launched or self._browser is None: + raise LaunchError( + "not-launched", + "Camoufox browser is not launched; send `launch` first", + ) + return self._browser + + def _require_tab(self, tab_id: str) -> Tab: + tab = self._tabs.get(tab_id) + if tab is None: + raise LaunchError( + "tab-not-found", + f"no tab registered with id {tab_id!r}", + ) + return tab + + async def _close_tab_silently(self, tab_id: str) -> None: + tab = self._tabs.pop(tab_id, None) + if tab is None: + return + if self._active_tab_id == tab_id: + self._active_tab_id = next(iter(self._tabs), None) + try: + await tab.page.close() + except Exception as exc: # noqa: BLE001 + log(f"silent tab close ({tab_id!r}) raised: {exc}") + + def _wire_page_events(self, tab: Tab) -> None: + """Invalidate the tab's ref cache on nav + forward console/crash events. + + Playwright's ``framenavigated`` fires for every frame; we invalidate + only on main-frame navigations. ``console`` and ``crash`` events + fan out to the Rust daemon via the shared :class:`Protocol`. + """ + page = tab.page + + def _on_framenavigated(frame: Any) -> None: + try: + if frame == page.main_frame: + tab.refs.invalidate() + except Exception as exc: # noqa: BLE001 + log(f"framenavigated handler: {exc}") + + def _on_console(msg: Any) -> None: + if self._protocol is None: + return + try: + data = { + "tabId": tab.tab_id, + "level": getattr(msg, "type", lambda: "log")() + if callable(getattr(msg, "type", None)) + else getattr(msg, "type", "log"), + "text": getattr(msg, "text", lambda: "")() + if callable(getattr(msg, "text", None)) + else getattr(msg, "text", ""), + } + except Exception as exc: # noqa: BLE001 + log(f"console handler payload build failed: {exc}") + return + _schedule_event(self._protocol, "page.console", data) + + def _on_crash(_page: Any) -> None: + if self._protocol is None: + return + _schedule_event(self._protocol, "page.crashed", {"tabId": tab.tab_id}) + + def _on_close(_page: Any) -> None: + # A tab closed out from under us (window.close(), target_blank + # cascade, etc.): drop our reference so later commands see a + # clean `tab-not-found` rather than a dangling Playwright handle. + self._tabs.pop(tab.tab_id, None) + if self._active_tab_id == tab.tab_id: + self._active_tab_id = next(iter(self._tabs), None) + + for event_name, handler in ( + ("framenavigated", _on_framenavigated), + ("console", _on_console), + ("crash", _on_crash), + ("close", _on_close), + ): + try: + page.on(event_name, handler) + except Exception as exc: # noqa: BLE001 + log(f"could not attach {event_name} handler: {exc}") + + # ------------------------------------------------------------------ + # Commands that operate on a tab's page + # ------------------------------------------------------------------ + + async def goto(self, args: Optional[dict] = None) -> dict: + """Navigate the target page to ``args['url']``. + + ``args['tabId']`` selects the tab; defaulting to the active tab + (which is auto-created on first use) keeps the single-tab open flow + from Unit 3 working unchanged. + """ + args = args or {} + url = args.get("url") + if not isinstance(url, str) or not url: + raise LaunchError( + "invalid-args", + "`page.goto` requires a non-empty `url` string", + ) + wait_until = _wait_until(args.get("waitUntil", "load")) + + tab = await self._tab_for(args) + # Any navigation request invalidates prior refs, even before + # ``framenavigated`` fires; clearing here closes the window in which + # an agent could click on a stale ref after issuing ``navigate``. + tab.refs.invalidate() + + try: + response = await tab.page.goto(url, wait_until=wait_until) + except Exception as exc: # noqa: BLE001 + raise LaunchError("navigation-failed", str(exc)) from exc + + title = await _safe_page_title(tab.page) + final_url = _safe_page_url(tab.page) + status = response.status if response is not None else None + return {"url": final_url, "title": title, "status": status, "tabId": tab.tab_id} + + async def snapshot(self, args: Optional[dict] = None) -> dict: + args = args or {} + tab = await self._tab_for(args) + try: + return await take_snapshot( + tab.page, + tab.refs, + interactive_only=bool(args.get("interactive", False)), + selector=args.get("selector"), + ) + except SnapshotError as exc: + raise LaunchError(exc.code, exc.message) from exc + + async def click(self, args: Optional[dict] = None) -> dict: + args = args or {} + selector_or_ref = _require_str(args, "selector") + button = args.get("button", "left") + click_count = int(args.get("clickCount", 1) or 1) + timeout = int(args.get("timeoutMs") or DEFAULT_ACTION_TIMEOUT_MS) + + tab = await self._tab_for(args) + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = _require_ref(tab, ref_id) + await _try_click_handle(handle, button, click_count, timeout) + else: + await _try_click_locator( + tab.page.locator(selector_or_ref), + selector_or_ref, + button, + click_count, + timeout, + ) + return {"clicked": selector_or_ref, "tabId": tab.tab_id} + + async def fill(self, args: Optional[dict] = None) -> dict: + args = args or {} + selector_or_ref = _require_str(args, "selector") + value = args.get("value") + if not isinstance(value, str): + raise LaunchError("invalid-args", "`fill` requires a string `value` argument") + timeout = int(args.get("timeoutMs") or DEFAULT_ACTION_TIMEOUT_MS) + + tab = await self._tab_for(args) + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = _require_ref(tab, ref_id) + await _try_fill_handle(handle, value, timeout) + else: + await _try_fill_locator( + tab.page.locator(selector_or_ref), selector_or_ref, value, timeout + ) + return {"filled": selector_or_ref, "tabId": tab.tab_id} + + async def get_text(self, args: Optional[dict] = None) -> dict: + args = args or {} + selector_or_ref = _require_str(args, "selector") + timeout = int(args.get("timeoutMs") or DEFAULT_ACTION_TIMEOUT_MS) + + tab = await self._tab_for(args) + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = _require_ref(tab, ref_id) + text = await _handle_text(handle, timeout) + else: + text = await _locator_text( + tab.page.locator(selector_or_ref), selector_or_ref, timeout + ) + return {"text": text, "origin": _safe_page_url(tab.page), "tabId": tab.tab_id} + + async def _tab_for(self, args: dict) -> Tab: + tab_id = args.get("tabId") + if isinstance(tab_id, str) and tab_id: + return self._require_tab(tab_id) + if self._active_tab_id is None: + await self._ensure_default_tab() + return self._require_tab(self._active_tab_id) # type: ignore[arg-type] + + +# --------------------------------------------------------------------------- +# Internal helpers — kept module-level so Session stays focused on lifecycle +# and command dispatch, not Playwright error translation. +# --------------------------------------------------------------------------- + + +def _require_str(args: dict, key: str) -> str: + value = args.get(key) + if not isinstance(value, str) or not value: + raise LaunchError("invalid-args", f"missing required `{key}` string argument") + return value + + +def _require_ref(tab: Tab, ref_id: str) -> Any: + try: + return tab.refs.require(ref_id) + except RefStale as exc: + raise LaunchError("ref-stale", exc.message) from exc + + +def _classify_playwright_error(exc: Exception, selector_or_ref: str) -> LaunchError: + """Translate Playwright errors into agent-browser error codes. + + Keeping this logic in one place means new error codes (e.g. ``timeout``) + pick up the same behaviour across click/fill/get_text without each handler + reimplementing the pattern match. + """ + msg = str(exc) + lowered = msg.lower() + if "strict mode violation" in lowered or "resolved to" in lowered and "elements" in lowered: + # Try to parse the match count from the message ("resolved to N elements"). + import re + + match = re.search(r"resolved to\s+(\d+)\s+elements", msg) + count = int(match.group(1)) if match else 0 + return LaunchError( + "ambiguous-selector", + f"Selector {selector_or_ref!r} matched {count} elements; refine it or use a ref", + ) + if "element is not attached" in lowered or "node is detached" in lowered or "detached" in lowered: + return LaunchError("element-detached", msg) + if "timeout" in lowered and "exceeded" in lowered: + return LaunchError("timeout", msg) + if "no element matches" in lowered or "no elements match" in lowered: + return LaunchError("selector-not-found", msg) + return LaunchError("action-failed", msg) + + +async def _try_click_handle(handle: Any, button: str, click_count: int, timeout: int) -> None: + try: + await handle.click(button=button, click_count=click_count, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + + +async def _try_click_locator( + locator: Any, selector: str, button: str, click_count: int, timeout: int +) -> None: + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector!r} matched {count} elements; refine it or use a ref", + ) + try: + await locator.click(button=button, click_count=click_count, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + + +async def _try_fill_handle(handle: Any, value: str, timeout: int) -> None: + try: + await handle.fill(value, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + + +async def _try_fill_locator(locator: Any, selector: str, value: str, timeout: int) -> None: + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector!r} matched {count} elements; refine it or use a ref", + ) + try: + await locator.fill(value, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + + +async def _handle_text(handle: Any, timeout: int) -> str: + # ElementHandle.text_content does not accept a ``timeout`` kwarg (only the + # Locator variant does) — if the handle is still live in the cache we + # already know the element is attached, so no additional timeout gymnastics + # are needed. + _ = timeout + try: + raw = await handle.text_content() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + return (raw or "").strip() + + +async def _locator_text(locator: Any, selector: str, timeout: int) -> str: + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector!r} matched {count} elements; refine it or use a ref", + ) + try: + raw = await locator.text_content(timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + return (raw or "").strip() + + +def _validate_launch_args(args: dict) -> dict: + if not isinstance(args, dict): + raise LaunchError( + "invalid-args", + f"launch args must be an object, got {type(args).__name__}", + ) + rejected = sorted(set(args) & REJECTED_LAUNCH_KWARGS) + if rejected: + raise LaunchError( + "unsupported-launch-option", + ( + f"launch options not supported in v1: {rejected}. " + "persistent_context / user_data_dir are tracked as a v2 item." + ), + ) + unknown = sorted(set(args) - ALLOWED_LAUNCH_KWARGS - REJECTED_LAUNCH_KWARGS) + if unknown: + raise LaunchError( + "unknown-launch-option", + f"unknown launch option(s): {unknown}", + ) + return dict(args) + + +def _looks_like_missing_binary(message: str) -> bool: + """Heuristic for Camoufox's 'please run camoufox fetch' family of errors.""" + lowered = message.lower() + return any( + needle in lowered + for needle in ( + "camoufox fetch", + "no camoufox", + "camoufox is not installed", + "executable doesn't exist", + ) + ) + + +def _wait_until(raw: Any) -> str: + if not isinstance(raw, str): + return "load" + return "commit" if raw == "none" else raw + + +def _safe_page_url(page: Any) -> str: + try: + return page.url or "" + except Exception: # noqa: BLE001 - Playwright raises when the page has closed + return "" + + +async def _safe_page_title(page: Any) -> str: + try: + return await page.title() + except Exception: # noqa: BLE001 + return "" + + +def _schedule_event(protocol: Protocol, name: str, data: dict) -> None: + """Fire-and-forget an event frame from a sync Playwright callback. + + Playwright's ``page.on(...)`` handlers are invoked synchronously from + Playwright's dispatcher task, so we drop onto the running event loop + via ``asyncio.ensure_future``. Errors are swallowed because a console + event that doesn't reach the daemon must not take down a live session. + """ + import asyncio + + try: + loop = asyncio.get_event_loop() + except RuntimeError: + return + try: + loop.create_task(protocol.write_event(name, data)) + except Exception as exc: # noqa: BLE001 + log(f"could not schedule {name} event: {exc}") + + diff --git a/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py b/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py new file mode 100644 index 000000000..c4e1c965e --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py @@ -0,0 +1,347 @@ +"""Accessibility snapshot for the Camoufox sidecar. + +Playwright's ``page.accessibility.snapshot()`` gives us a Firefox-side AX tree, +but it doesn't return ``ElementHandle`` instances, so we can't use it to drive +subsequent ``click``/``fill``/``gettext`` commands. + +Instead we run a single ``page.evaluate`` that: + + * walks the DOM; + * classifies each element using a minimal ARIA role mapping that mirrors + what Chrome's AX tree produces for the same markup; + * tags every ref-worthy element with ``data-__ab-ref="eN"``; + * returns one metadata row per tagged element. + +The Python side then resolves each row back to an ``ElementHandle`` via +``page.query_selector("[data-__ab-ref='eN']")`` and populates the +:class:`~camoufox_sidecar.refs.RefCache`. Future ``click``/``fill`` calls by +``@eN`` pull the handle out of the cache; by CSS selector they hit +``page.locator(selector)`` directly. + +The public contract is the same JSON shape the Chrome path emits on +``{"success": true, "data": { snapshot, origin, refs }}``: a preformatted text +tree, a navigation origin, and a ``{ref: {role, name}}`` map for agent +consumption. Parity is measured at the ``refs`` level — exact text matches +aren't expected because Firefox's AX tree differs structurally from Chrome's. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from .refs import RefCache + + +# Interactive roles that always get a ref. Mirrors the Chrome-path +# ``INTERACTIVE_ROLES`` list in ``cli/src/native/snapshot.rs`` so the sidecar +# emits the same agent-facing role names Chrome does. +INTERACTIVE_ROLES: frozenset[str] = frozenset( + { + "button", + "link", + "textbox", + "checkbox", + "radio", + "combobox", + "listbox", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "option", + "searchbox", + "slider", + "spinbutton", + "switch", + "tab", + "treeitem", + } +) + +# Content roles that get a ref only when they carry a non-empty accessible name. +# Chrome includes ``heading`` + several landmarks here; v1 keeps the list small +# so parity against Firefox is tractable. The list intentionally does *not* +# include ``generic`` / ``group`` — those produce noise without names. +CONTENT_ROLES_WITH_NAMES: frozenset[str] = frozenset( + { + "heading", + "cell", + "gridcell", + "columnheader", + "rowheader", + "listitem", + "article", + "region", + "main", + "navigation", + } +) + + +# Executed inside the browser context. Returns a list of metadata dicts, one +# per ref-worthy element. The element retains a ``data-__ab-ref`` attribute so +# Python can re-resolve an ``ElementHandle`` for each ref via +# ``page.query_selector("[data-__ab-ref='eN']")``. The attribute is deliberately +# left in place until the next snapshot — Playwright ``Locator`` objects built +# from a ref selector stay valid as long as the page doesn't mutate the +# attribute away, and any mutation (navigation, innerHTML overwrite) is covered +# by the ``framenavigated`` invalidation. +_SNAPSHOT_JS = r""" +((root, { interactiveRoles, contentRolesWithNames }) => { + const IMPLICIT_ROLES = { + 'a': 'link', + 'button': 'button', + 'select': 'combobox', + 'textarea': 'textbox', + 'h1': 'heading', 'h2': 'heading', 'h3': 'heading', 'h4': 'heading', + 'h5': 'heading', 'h6': 'heading', + 'nav': 'navigation', + 'main': 'main', + 'article': 'article', + 'li': 'listitem', + }; + const INTERACTIVE = new Set(interactiveRoles); + const CONTENT_WITH_NAMES = new Set(contentRolesWithNames); + + const roleFor = (el) => { + const explicit = el.getAttribute('role'); + if (explicit) return explicit.trim().toLowerCase(); + const tag = el.tagName.toLowerCase(); + if (tag === 'a') return el.hasAttribute('href') ? 'link' : null; + if (tag === 'input') { + const t = (el.getAttribute('type') || 'text').toLowerCase(); + if (t === 'checkbox') return 'checkbox'; + if (t === 'radio') return 'radio'; + if (t === 'button' || t === 'submit' || t === 'reset') return 'button'; + if (t === 'range') return 'slider'; + if (t === 'search') return 'searchbox'; + if (t === 'number') return 'spinbutton'; + if (t === 'hidden' || t === 'file') return null; + return 'textbox'; + } + return IMPLICIT_ROLES[tag] || null; + }; + + const stripRefAttr = (node) => { + // Clone and strip our own marker attribute plus any nested inputs so the + // wrapping