From 6efbbf1e627fe306b8eaf843b0097408b69ca85a Mon Sep 17 00:00:00 2001 From: davide Date: Mon, 20 Apr 2026 15:36:07 -0400 Subject: [PATCH 1/9] refactor: introduce BrowserBackend enum for engine dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unit 1 of the Camoufox engine plan. Gives BrowserManager an engine-tagged BrowserBackend value so every action-layer function that used to accept a bare &CdpClient now accepts &BrowserBackend and dispatches on the variant. - New modules: native/backend.rs (enum + delegating send_command family), native/camoufox_client.rs (stub for Unit 3), native/cdp/camoufox.rs (stub process for Unit 3). - BrowserProcess grows a Camoufox variant so the enum is total once the sidecar lands. - `launch` matches "camoufox" after validation and returns a structured not-yet-implemented error rather than panicking. - Chrome-only subsystems (inspect proxy, screencast stream) assert on BrowserBackend::Cdp at entry via `require_cdp_for` and surface `engine-incompatible` on Camoufox. - Action-layer modules (actions, interaction, element, snapshot, screenshot, cookies, network, storage, state, tracing) now take &BrowserBackend. Internal helpers keep &CdpClient because they are reachable only through a public entry that has already extracted the CDP client — i.e. they are enum-arm-body helpers. - New smoke test `backend_refactor_smoke.rs` locks in the structured-error shape for `--engine camoufox open` so later units cannot regress the characterization snapshot without an intentional update. Co-Authored-By: Claude Opus 4.7 (1M context) --- cli/src/native/actions.rs | 198 ++++++++++++++-------------- cli/src/native/backend.rs | 174 ++++++++++++++++++++++++ cli/src/native/browser.rs | 38 +++++- cli/src/native/camoufox_client.rs | 29 ++++ cli/src/native/cdp/camoufox.rs | 33 +++++ cli/src/native/cdp/mod.rs | 1 + cli/src/native/cookies.rs | 17 ++- cli/src/native/element.rs | 34 ++--- cli/src/native/interaction.rs | 44 +++---- cli/src/native/mod.rs | 4 + cli/src/native/network.rs | 32 +++-- cli/src/native/screenshot.rs | 23 ++-- cli/src/native/snapshot.rs | 6 +- cli/src/native/state.rs | 15 ++- cli/src/native/storage.rs | 13 +- cli/src/native/stream/cdp_loop.rs | 14 +- cli/src/native/tracing.rs | 13 +- cli/tests/backend_refactor_smoke.rs | 125 ++++++++++++++++++ 18 files changed, 634 insertions(+), 179 deletions(-) create mode 100644 cli/src/native/backend.rs create mode 100644 cli/src/native/camoufox_client.rs create mode 100644 cli/src/native/cdp/camoufox.rs create mode 100644 cli/tests/backend_refactor_smoke.rs diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index 5d10e40cc..205fcf871 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -587,7 +587,7 @@ impl DaemonState { if let Some(ref browser) = self.browser { if let Ok(session_id) = browser.active_session_id() { for ack_sid in drained.pending_acks { - let _ = stream::ack_screencast_frame(&browser.client, session_id, ack_sid) + let _ = stream::ack_screencast_frame(&browser.backend, session_id, ack_sid) .await; } } @@ -655,7 +655,7 @@ impl DaemonState { if let Some(ref filter) = *df { let has_proxy_creds = self.proxy_credentials.read().await.is_some(); let _ = network::install_domain_filter( - &mgr.client, + &mgr.backend, &attach.session_id, &filter.allowed_domains, has_proxy_creds, @@ -1610,7 +1610,7 @@ async fn auto_launch(state: &mut DaemonState) -> Result<(), String> { if has_proxy_auth { if let Some(ref mgr) = state.browser { if let Ok(session_id) = mgr.active_session_id() { - let _ = network::install_domain_filter_fetch(&mgr.client, session_id, true).await; + let _ = network::install_domain_filter_fetch(&mgr.backend, session_id, true).await; } } } @@ -1675,7 +1675,7 @@ async fn try_auto_restore_state(state: &mut DaemonState) { if let Some(path) = state::find_auto_state_file(&session_name) { if let Some(ref mgr) = state.browser { if let Ok(session_id) = mgr.active_session_id() { - let _ = state::load_state(&mgr.client, session_id, &path).await; + let _ = state::load_state(&mgr.backend, session_id, &path).await; } } } @@ -1689,7 +1689,7 @@ async fn load_storage_state(state: &DaemonState, path: &Option) -> Resul if let Some(ref path) = path { if let Some(ref mgr) = state.browser { if let Ok(session_id) = mgr.active_session_id() { - state::load_state(&mgr.client, session_id, path).await?; + state::load_state(&mgr.backend, session_id, path).await?; } } } @@ -2023,18 +2023,18 @@ async fn handle_launch(cmd: &Value, state: &mut DaemonState) -> Result Result { async fn handle_inspect(state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + // Chrome-only: the DevTools inspect proxy forwards raw CDP and has no + // Playwright/Camoufox analogue. Fail loud with an actionable error. + let cdp_client = mgr.backend.require_cdp_for("inspect (DevTools proxy)")?; + // Shut down any existing inspect server so we always target the current page if let Some(server) = state.inspect_server.take() { server.shutdown(); @@ -2251,7 +2255,7 @@ async fn handle_inspect(state: &mut DaemonState) -> Result { let target_id = mgr.active_target_id()?.to_string(); let chrome_hp = mgr.chrome_host_port().to_string(); - let proxy_handle = mgr.client.inspect_handle(); + let proxy_handle = cdp_client.inspect_handle(); let server = InspectServer::start(proxy_handle, target_id, chrome_hp).await?; let url = format!("http://127.0.0.1:{}", server.port()); @@ -2334,7 +2338,7 @@ async fn handle_close(state: &mut DaemonState) -> Result { if let Some(ref session_name) = state.session_name { if let Ok(session_id) = mgr.active_session_id() { let _ = state::save_state( - &mgr.client, + &mgr.backend, session_id, None, Some(session_name.as_str()), @@ -2416,7 +2420,7 @@ async fn handle_snapshot(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result { // --------------------------------------------------------------------------- async fn wait_for_selector( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, selector: &str, state: &str, @@ -3152,7 +3156,7 @@ async fn wait_for_selector( } async fn wait_for_url( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, pattern: &str, timeout_ms: u64, @@ -3165,7 +3169,7 @@ async fn wait_for_url( } async fn wait_for_text( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, text: &str, timeout_ms: u64, @@ -3178,7 +3182,7 @@ async fn wait_for_text( } async fn wait_for_function( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, fn_str: &str, timeout_ms: u64, @@ -3188,7 +3192,7 @@ async fn wait_for_function( } async fn poll_until_true( - client: &super::cdp::client::CdpClient, + client: &super::backend::BrowserBackend, session_id: &str, expression: &str, timeout_ms: u64, @@ -3246,7 +3250,7 @@ async fn handle_cookies_get(cmd: &Value, state: &DaemonState) -> Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - cookies::clear_cookies(&mgr.client, &session_id).await?; + cookies::clear_cookies(&mgr.backend, &session_id).await?; Ok(json!({ "cleared": true })) } @@ -3287,7 +3291,7 @@ async fn handle_storage_get(cmd: &Value, state: &DaemonState) -> Result Result { @@ -3302,7 +3306,7 @@ async fn handle_storage_set(cmd: &Value, state: &DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - native_tracing::trace_start(&mgr.client, &session_id, &mut state.tracing_state).await + native_tracing::trace_start(&mgr.backend, &session_id, &mut state.tracing_state).await } async fn handle_trace_stop(cmd: &Value, state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); let path = cmd.get("path").and_then(|v| v.as_str()); - native_tracing::trace_stop(&mgr.client, &session_id, &mut state.tracing_state, path).await + native_tracing::trace_stop(&mgr.backend, &session_id, &mut state.tracing_state, path).await } async fn handle_profiler_start(cmd: &Value, state: &mut DaemonState) -> Result { @@ -3953,7 +3957,7 @@ async fn handle_profiler_start(cmd: &Value, state: &mut DaemonState) -> Result Result Result { @@ -4217,7 +4221,7 @@ async fn handle_focus(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result { - interaction::press_key_with_modifiers(&mgr.client, &session_id, "c", Some(modifier)) + interaction::press_key_with_modifiers(&mgr.backend, &session_id, "c", Some(modifier)) .await?; Ok(json!({ "copied": true })) } "paste" => { - interaction::press_key_with_modifiers(&mgr.client, &session_id, "v", Some(modifier)) + interaction::press_key_with_modifiers(&mgr.backend, &session_id, "v", Some(modifier)) .await?; Ok(json!({ "pasted": true })) } @@ -5006,7 +5010,7 @@ async fn handle_screencast_start(cmd: &Value, state: &mut DaemonState) -> Result .unwrap_or(default_h as i64) as i32; stream::start_screencast( - &mgr.client, + &mgr.backend, &session_id, format, quality, @@ -5040,7 +5044,7 @@ async fn handle_screencast_stop(state: &mut DaemonState) -> Result Result Result { interaction::click( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5302,7 +5306,7 @@ async fn execute_subaction( .and_then(|v| v.as_str()) .ok_or("Missing 'value' for fill subaction")?; interaction::fill( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5314,7 +5318,7 @@ async fn execute_subaction( } "check" => { interaction::check( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5325,7 +5329,7 @@ async fn execute_subaction( } "hover" => { interaction::hover( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5336,7 +5340,7 @@ async fn execute_subaction( } "text" => { let text = super::element::get_element_text( - &mgr.client, + &mgr.backend, &session_id, &state.ref_map, selector, @@ -5724,7 +5728,7 @@ async fn handle_drag(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result), + Camoufox(Arc), +} + +impl BrowserBackend { + /// Human-readable engine label, also used as the `"engine"` field in + /// `--json` output so callers can segment telemetry by backend. + pub fn engine_name(&self) -> &'static str { + match self { + BrowserBackend::Cdp(_) => "cdp", + BrowserBackend::Camoufox(_) => "camoufox", + } + } + + pub fn is_cdp(&self) -> bool { + matches!(self, BrowserBackend::Cdp(_)) + } + + pub fn is_camoufox(&self) -> bool { + matches!(self, BrowserBackend::Camoufox(_)) + } + + /// Return the inner CDP client, or a structured `not-yet-implemented` + /// error when the session is running on Camoufox. Action-layer functions + /// call this at the top of their body until their Camoufox arm is + /// implemented; the returned error surfaces to the CLI as a clean failure. + pub fn require_cdp(&self) -> Result<&Arc, String> { + match self { + BrowserBackend::Cdp(c) => Ok(c), + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(None)), + } + } + + /// Chrome-only subsystem entry points (`inspect_server`, `stream::cdp_loop`) + /// call this instead of `require_cdp` so the error message makes clear that + /// the feature will not work on Camoufox, rather than "not yet implemented". + pub fn require_cdp_for(&self, operation: &str) -> Result<&Arc, String> { + match self { + BrowserBackend::Cdp(c) => Ok(c), + BrowserBackend::Camoufox(_) => Err(engine_incompatible_error(operation)), + } + } + + /// Option accessor for non-`Result` contexts (e.g. sync setup paths that + /// cannot use `?`). Returns `None` on Camoufox. + pub fn cdp_opt(&self) -> Option<&Arc> { + match self { + BrowserBackend::Cdp(c) => Some(c), + BrowserBackend::Camoufox(_) => None, + } + } + + // --------------------------------------------------------------------- + // Delegating methods: mirror the handful of `CdpClient` methods that + // action-layer code calls on the backend. Each arm of the `match` is the + // "enum arm body" the plan refers to — the Cdp arm forwards to the real + // CDP client; the Camoufox arm returns `not-yet-implemented` until the + // corresponding action is wired up in a later unit. Keeping the dispatch + // at this method level (rather than at each call site) lets us lift + // function signatures from `&CdpClient` to `&BrowserBackend` without + // rewriting 130+ action-body lines. + // --------------------------------------------------------------------- + + pub async fn send_command( + &self, + method: &str, + params: Option, + session_id: Option<&str>, + ) -> Result { + match self { + BrowserBackend::Cdp(c) => c.send_command(method, params, session_id).await, + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some(method))), + } + } + + pub async fn send_command_typed( + &self, + method: &str, + params: &P, + session_id: Option<&str>, + ) -> Result { + match self { + BrowserBackend::Cdp(c) => c.send_command_typed(method, params, session_id).await, + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some(method))), + } + } + + pub async fn send_command_no_params( + &self, + method: &str, + session_id: Option<&str>, + ) -> Result { + match self { + BrowserBackend::Cdp(c) => c.send_command_no_params(method, session_id).await, + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some(method))), + } + } + + /// Subscribe to CDP-shaped events. On Camoufox this surfaces a + /// `not-yet-implemented` error; callers in the action layer already + /// propagate with `?` because they return `Result`. + pub fn subscribe(&self) -> Result, String> { + match self { + BrowserBackend::Cdp(c) => Ok(c.subscribe()), + BrowserBackend::Camoufox(_) => Err(not_yet_implemented_error(Some("subscribe"))), + } + } +} + +impl std::fmt::Debug for BrowserBackend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + BrowserBackend::Cdp(_) => f.debug_struct("BrowserBackend::Cdp").finish_non_exhaustive(), + BrowserBackend::Camoufox(_) => f + .debug_struct("BrowserBackend::Camoufox") + .finish_non_exhaustive(), + } + } +} + +/// Structured error returned when an action reaches a Camoufox arm that +/// Unit 3+ has not filled in yet. The JSON shape is stable so downstream +/// tooling (celeria API, dashboards) can pattern-match on `code`. +pub fn not_yet_implemented_error(action: Option<&str>) -> String { + match action { + Some(a) => format!( + "not-yet-implemented: action `{}` is not yet supported on engine=camoufox", + a + ), + None => { + "not-yet-implemented: this action is not yet supported on engine=camoufox".to_string() + } + } +} + +/// Structured error for Chrome-only subsystems (raw CDP streaming, DevTools +/// inspect proxy) that will never work on Camoufox. Distinguished from +/// `not-yet-implemented` because callers can fall back to `--engine chrome` +/// but should not wait for a Camoufox implementation that isn't coming. +pub fn engine_incompatible_error(operation: &str) -> String { + format!( + "engine-incompatible: `{}` requires engine=chrome (Camoufox does not speak raw CDP)", + operation + ) +} diff --git a/cli/src/native/browser.rs b/cli/src/native/browser.rs index 28178dff7..4de241f27 100644 --- a/cli/src/native/browser.rs +++ b/cli/src/native/browser.rs @@ -5,6 +5,8 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::{broadcast, Mutex}; +use super::backend::{not_yet_implemented_error, BrowserBackend}; +use super::cdp::camoufox::CamoufoxProcess; use super::cdp::chrome::{auto_connect_cdp, launch_chrome, ChromeProcess, LaunchOptions}; use super::cdp::client::CdpClient; use super::cdp::discovery::discover_cdp_url; @@ -265,6 +267,10 @@ impl WaitUntil { pub enum BrowserProcess { Chrome(ChromeProcess), Lightpanda(LightpandaProcess), + /// Stub variant for engine=camoufox. Unit 1 never constructs one — launch + /// returns a not-yet-implemented error first — but the variant exists so + /// the enum is total once Unit 3 wires in the real sidecar subprocess. + Camoufox(CamoufoxProcess), } impl BrowserProcess { @@ -272,6 +278,7 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.kill(), BrowserProcess::Lightpanda(p) => p.kill(), + BrowserProcess::Camoufox(p) => p.kill(), } } @@ -279,6 +286,7 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.wait_or_kill(timeout), BrowserProcess::Lightpanda(p) => p.kill(), + BrowserProcess::Camoufox(p) => p.wait_or_kill(timeout), } } @@ -287,11 +295,22 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.has_exited(), BrowserProcess::Lightpanda(_) => false, + BrowserProcess::Camoufox(_) => false, } } } pub struct BrowserManager { + /// Engine-tagged backend. In Unit 1 this is always `BrowserBackend::Cdp` + /// whenever a `BrowserManager` exists, since `launch` for engine=camoufox + /// returns a structured `not-yet-implemented` error before constructing + /// one. Unit 3 begins returning `BrowserBackend::Camoufox` here. + pub backend: BrowserBackend, + /// Direct handle to the CDP client. Kept as a convenience for the + /// Chrome-only code inside this module; always matches the `Cdp` arm of + /// `backend`. Once Camoufox launches land (Unit 3), methods that reach for + /// `self.client` on a non-CDP backend will move to `self.backend` dispatch + /// or early-return with `backend.require_cdp()?`. pub client: Arc, browser_process: Option, ws_url: String, @@ -332,9 +351,15 @@ impl BrowserManager { "lightpanda" => { validate_lightpanda_options(&options)?; } + "camoufox" => { + // Unit 1: validation is deferred to Unit 3; we stop here with a + // structured error so `agent-browser --engine camoufox open ` + // reaches this arm, surfaces a clean failure, and does not panic. + return Err(not_yet_implemented_error(Some("launch"))); + } _ => { return Err(format!( - "Unknown engine '{}'. Supported engines: chrome, lightpanda", + "Unknown engine '{}'. Supported engines: chrome, lightpanda, camoufox", engine )); } @@ -370,7 +395,9 @@ impl BrowserManager { initialize_lightpanda_manager(ws_url, process).await? } else { let client = Arc::new(CdpClient::connect(&ws_url).await?); + let backend = BrowserBackend::Cdp(client.clone()); let mut manager = Self { + backend, client, browser_process: Some(process), ws_url, @@ -460,10 +487,12 @@ impl BrowserManager { ) -> Result { let ws_url = resolve_cdp_url(url).await?; let client = Arc::new(CdpClient::connect_with_headers(&ws_url, headers).await?); + let backend = BrowserBackend::Cdp(client.clone()); let stealth = std::env::var("AGENT_BROWSER_STEALTH") .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True")) .unwrap_or(false); let mut manager = Self { + backend, client, browser_process: None, ws_url, @@ -1366,7 +1395,7 @@ impl BrowserManager { let session_id = self.active_session_id()?; let (object_id, effective_session_id) = - resolve_element_object_id(&self.client, session_id, ref_map, selector, iframe_sessions) + resolve_element_object_id(&self.backend, session_id, ref_map, selector, iframe_sessions) .await?; let describe: Value = self @@ -1623,8 +1652,11 @@ async fn initialize_lightpanda_manager( } }; + let client = Arc::new(client); + let backend = BrowserBackend::Cdp(client.clone()); let mut manager = BrowserManager { - client: Arc::new(client), + backend, + client, browser_process: None, ws_url: ws_url.clone(), pages: Vec::new(), diff --git a/cli/src/native/camoufox_client.rs b/cli/src/native/camoufox_client.rs new file mode 100644 index 000000000..1261c8185 --- /dev/null +++ b/cli/src/native/camoufox_client.rs @@ -0,0 +1,29 @@ +//! Placeholder client for the Camoufox backend. +//! +//! Unit 1 defines the type so that `BrowserBackend::Camoufox(Arc)` +//! compiles and action-layer dispatch can grow a `Camoufox` arm. The real +//! sidecar-driven implementation (reader/writer tasks, JSON-line protocol, +//! request/response demux) lands in Unit 3. + +/// Marker client for engine=camoufox. No state in Unit 1 — Unit 3 fills in +/// stdio handles to the Python sidecar, a pending-request map, and broadcast +/// channels for asynchronous `{"event": ...}` frames. +pub struct CamoufoxClient { + _private: (), +} + +impl CamoufoxClient { + /// Construct a stub client. This is the only way to produce a + /// `CamoufoxClient` in Unit 1; action-layer code that matches on + /// `BrowserBackend::Camoufox` surfaces a structured + /// `not-yet-implemented` error rather than touching this value. + pub fn stub() -> Self { + Self { _private: () } + } +} + +impl std::fmt::Debug for CamoufoxClient { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CamoufoxClient").finish_non_exhaustive() + } +} diff --git a/cli/src/native/cdp/camoufox.rs b/cli/src/native/cdp/camoufox.rs new file mode 100644 index 000000000..fb63b416a --- /dev/null +++ b/cli/src/native/cdp/camoufox.rs @@ -0,0 +1,33 @@ +//! Camoufox sidecar process lifecycle (stubbed in Unit 1). +//! +//! Unit 1 only defines the type so `BrowserProcess::Camoufox(CamoufoxProcess)` +//! compiles. `BrowserManager::launch` returns a structured +//! `not-yet-implemented` error before anything tries to construct this value. +//! Unit 3 fills in the real Python-sidecar child-process lifecycle, mirroring +//! `LightpandaProcess`. + +use std::time::Duration; + +/// Placeholder for the Python sidecar subprocess. In Unit 3 this gains +/// ownership of the `std::process::Child`, the stdio handles, and bounded +/// log-drainer threads (mirrors `LightpandaProcess`). +pub struct CamoufoxProcess { + _private: (), +} + +impl CamoufoxProcess { + pub fn kill(&mut self) { + // No-op: Unit 1 cannot construct a live sidecar. Unit 3 replaces this + // with `child.kill()` + drainer teardown. + } + + pub fn wait_or_kill(&mut self, _timeout: Duration) { + // No-op for the same reason as `kill`. + } +} + +impl Drop for CamoufoxProcess { + fn drop(&mut self) { + self.kill(); + } +} diff --git a/cli/src/native/cdp/mod.rs b/cli/src/native/cdp/mod.rs index 2f097372e..98cf6391f 100644 --- a/cli/src/native/cdp/mod.rs +++ b/cli/src/native/cdp/mod.rs @@ -1,3 +1,4 @@ +pub mod camoufox; pub mod chrome; pub mod client; pub mod discovery; diff --git a/cli/src/native/cookies.rs b/cli/src/native/cookies.rs index b0fdbeda4..957fa43da 100644 --- a/cli/src/native/cookies.rs +++ b/cli/src/native/cookies.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] @@ -24,7 +24,11 @@ pub struct Cookie { pub same_site: Option, } -pub async fn get_all_cookies(client: &CdpClient, session_id: &str) -> Result, String> { +pub async fn get_all_cookies( + backend: &BrowserBackend, + session_id: &str, +) -> Result, String> { + let client = backend.require_cdp()?; let result = client .send_command_no_params("Network.getAllCookies", Some(session_id)) .await?; @@ -38,10 +42,11 @@ pub async fn get_all_cookies(client: &CdpClient, session_id: &str) -> Result>, ) -> Result, String> { + let client = backend.require_cdp()?; let params = match urls { Some(ref u) if !u.is_empty() => json!({ "urls": u }), _ => json!({}), @@ -60,11 +65,12 @@ pub async fn get_cookies( } pub async fn set_cookies( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, cookies: Vec, current_url: Option<&str>, ) -> Result<(), String> { + let client = backend.require_cdp()?; let cookies: Vec = cookies .into_iter() .map(|mut c| { @@ -92,7 +98,8 @@ pub async fn set_cookies( Ok(()) } -pub async fn clear_cookies(client: &CdpClient, session_id: &str) -> Result<(), String> { +pub async fn clear_cookies(backend: &BrowserBackend, session_id: &str) -> Result<(), String> { + let client = backend.require_cdp()?; client .send_command_no_params("Network.clearBrowserCookies", Some(session_id)) .await?; diff --git a/cli/src/native/element.rs b/cli/src/native/element.rs index 7b22ed9ad..6756bf321 100644 --- a/cli/src/native/element.rs +++ b/cli/src/native/element.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use serde_json::Value; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; use super::cdp::types::*; #[derive(Debug, Clone)] @@ -147,7 +147,7 @@ pub fn parse_ref(input: &str) -> Option { } pub async fn resolve_element_center( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -214,7 +214,7 @@ pub async fn resolve_element_center( } pub async fn resolve_element_object_id( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -338,7 +338,7 @@ fn resolve_frame_session<'a>( /// (Accessibility.getFullAXTree) that built the ref map during snapshot, /// so role/name matching is guaranteed to be consistent. async fn find_node_id_by_role_name( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, role: &str, name: &str, @@ -439,7 +439,7 @@ fn build_selector_js(selector: &str) -> String { } async fn resolve_by_selector( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, selector: &str, ) -> Result<(f64, f64), String> { @@ -479,7 +479,7 @@ fn box_model_center(model: &BoxModel) -> (f64, f64) { } pub async fn get_element_text( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -517,7 +517,7 @@ pub async fn get_element_text( } pub async fn get_element_attribute( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -554,7 +554,7 @@ pub async fn get_element_attribute( } pub async fn is_element_visible( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -599,7 +599,7 @@ pub async fn is_element_visible( } pub async fn is_element_enabled( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -636,7 +636,7 @@ pub async fn is_element_enabled( } pub async fn is_element_checked( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -706,7 +706,7 @@ pub async fn is_element_checked( } pub async fn get_element_inner_text( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -743,7 +743,7 @@ pub async fn get_element_inner_text( } pub async fn get_element_inner_html( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -780,7 +780,7 @@ pub async fn get_element_inner_html( } pub async fn get_element_input_value( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -819,7 +819,7 @@ pub async fn get_element_input_value( } pub async fn set_element_value( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -858,7 +858,7 @@ pub async fn set_element_value( } pub async fn get_element_bounding_box( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -898,7 +898,7 @@ pub async fn get_element_bounding_box( } pub async fn get_element_count( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, selector: &str, ) -> Result { @@ -920,7 +920,7 @@ pub async fn get_element_count( } pub async fn get_element_styles( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, diff --git a/cli/src/native/interaction.rs b/cli/src/native/interaction.rs index 0c51ec7d0..dd1dc94e4 100644 --- a/cli/src/native/interaction.rs +++ b/cli/src/native/interaction.rs @@ -2,12 +2,12 @@ use std::collections::HashMap; use serde_json::Value; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; use super::cdp::types::*; use super::element::{resolve_element_center, resolve_element_object_id, RefMap}; pub async fn click( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -27,7 +27,7 @@ pub async fn click( } pub async fn dblclick( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -46,7 +46,7 @@ pub async fn dblclick( } pub async fn hover( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -81,7 +81,7 @@ pub async fn hover( } pub async fn fill( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -148,7 +148,7 @@ pub async fn fill( #[allow(clippy::too_many_arguments)] pub async fn type_text( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -206,7 +206,7 @@ pub async fn type_text( } pub async fn type_text_into_active_context( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, text: &str, delay_ms: Option, @@ -273,7 +273,7 @@ pub async fn type_text_into_active_context( Ok(()) } -pub async fn press_key(client: &CdpClient, session_id: &str, key: &str) -> Result<(), String> { +pub async fn press_key(client: &BrowserBackend, session_id: &str, key: &str) -> Result<(), String> { press_key_with_modifiers(client, session_id, key, None).await } @@ -285,7 +285,7 @@ pub async fn press_key(client: &CdpClient, session_id: &str, key: &str) -> Resul /// Callers that need a platform-appropriate modifier (e.g. Cmd on macOS, /// Ctrl elsewhere) must choose the value themselves -- see `cfg!(target_os)`. pub async fn press_key_with_modifiers( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, key: &str, modifiers: Option, @@ -339,7 +339,7 @@ pub async fn press_key_with_modifiers( } pub async fn scroll( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: Option<&str>, @@ -391,7 +391,7 @@ pub async fn scroll( } pub async fn select_option( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -437,7 +437,7 @@ pub async fn select_option( } pub async fn check( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -489,7 +489,7 @@ pub async fn check( } pub async fn uncheck( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -548,7 +548,7 @@ pub async fn uncheck( /// 3. If the element has a nested `` → `.click()` that input. /// 4. Otherwise → `.click()` the element itself (handles ARIA role controls). async fn js_click_checkbox( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -605,7 +605,7 @@ async fn js_click_checkbox( } pub async fn focus( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -638,7 +638,7 @@ pub async fn focus( } pub async fn clear( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -677,7 +677,7 @@ pub async fn clear( } pub async fn select_all( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -722,7 +722,7 @@ pub async fn select_all( } pub async fn scroll_into_view( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -757,7 +757,7 @@ pub async fn scroll_into_view( } pub async fn dispatch_event( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -802,7 +802,7 @@ pub async fn dispatch_event( } pub async fn highlight( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -844,7 +844,7 @@ pub async fn highlight( } pub async fn tap_touch( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector_or_ref: &str, @@ -885,7 +885,7 @@ pub async fn tap_touch( } async fn dispatch_click( - client: &CdpClient, + client: &BrowserBackend, session_id: &str, x: f64, y: f64, diff --git a/cli/src/native/mod.rs b/cli/src/native/mod.rs index 5979da0d0..f32a649df 100644 --- a/cli/src/native/mod.rs +++ b/cli/src/native/mod.rs @@ -3,8 +3,12 @@ pub mod actions; #[allow(dead_code)] pub mod auth; #[allow(dead_code)] +pub mod backend; +#[allow(dead_code)] pub mod browser; #[allow(dead_code)] +pub mod camoufox_client; +#[allow(dead_code)] pub mod cdp; #[allow(dead_code)] pub mod cookies; diff --git a/cli/src/native/network.rs b/cli/src/native/network.rs index 39c69df38..47a368924 100644 --- a/cli/src/native/network.rs +++ b/cli/src/native/network.rs @@ -1,13 +1,14 @@ use serde_json::{json, Value}; use std::collections::HashMap; -use super::cdp::client::CdpClient; +use super::backend::BrowserBackend; pub async fn set_extra_headers( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, headers: &HashMap, ) -> Result<(), String> { + let client = backend.require_cdp()?; let headers_value: Value = headers .iter() .map(|(k, v)| (k.clone(), Value::String(v.clone()))) @@ -26,10 +27,11 @@ pub async fn set_extra_headers( } pub async fn set_offline( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, offline: bool, ) -> Result<(), String> { + let client = backend.require_cdp()?; client .send_command( "Network.emulateNetworkConditions", @@ -45,7 +47,12 @@ pub async fn set_offline( Ok(()) } -pub async fn set_content(client: &CdpClient, session_id: &str, html: &str) -> Result<(), String> { +pub async fn set_content( + backend: &BrowserBackend, + session_id: &str, + html: &str, +) -> Result<(), String> { + let client = backend.require_cdp()?; // Get current frame ID let tree_result = client .send_command_no_params("Page.getFrameTree", Some(session_id)) @@ -134,10 +141,13 @@ fn parse_domain_list(input: &str) -> Vec { } pub async fn sanitize_existing_pages( - client: &CdpClient, + backend: &BrowserBackend, pages: &[super::browser::PageInfo], filter: &DomainFilter, ) { + let Ok(client) = backend.require_cdp() else { + return; + }; for page in pages { if page.url.is_empty() || page.url == "about:blank" { continue; @@ -159,10 +169,11 @@ pub async fn sanitize_existing_pages( } pub async fn install_domain_filter_script( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, allowed_domains: &[String], ) -> Result<(), String> { + let client = backend.require_cdp()?; if allowed_domains.is_empty() { return Ok(()); } @@ -231,10 +242,11 @@ pub async fn install_domain_filter_script( /// The actual handling of `Fetch.requestPaused` events happens in /// `resolve_fetch_paused` in the actions module. pub async fn install_domain_filter_fetch( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, handle_auth_requests: bool, ) -> Result<(), String> { + let client = backend.require_cdp()?; let mut params = json!({ "patterns": [{ "urlPattern": "*" }] }); @@ -251,13 +263,13 @@ pub async fn install_domain_filter_fetch( /// 1. JS patching (WebSocket, EventSource, sendBeacon) /// 2. Fetch-based network interception pub async fn install_domain_filter( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, allowed_domains: &[String], handle_auth_requests: bool, ) -> Result<(), String> { - install_domain_filter_script(client, session_id, allowed_domains).await?; - install_domain_filter_fetch(client, session_id, handle_auth_requests).await?; + install_domain_filter_script(backend, session_id, allowed_domains).await?; + install_domain_filter_fetch(backend, session_id, handle_auth_requests).await?; Ok(()) } diff --git a/cli/src/native/screenshot.rs b/cli/src/native/screenshot.rs index 0736691f3..2fa8e241e 100644 --- a/cli/src/native/screenshot.rs +++ b/cli/src/native/screenshot.rs @@ -4,6 +4,7 @@ use std::path::PathBuf; use std::collections::HashMap; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::*; use super::element::RefMap; @@ -98,16 +99,17 @@ impl Serialize for ScreenshotAnnotation { /// Captures a screenshot via CDP and optionally overlays numbered annotations /// that mirror the Node.js screenshot `annotate` mode. pub async fn take_screenshot( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, options: &ScreenshotOptions, iframe_sessions: &HashMap, ) -> Result { + let client = backend.require_cdp()?; let target_rect = if options.annotate { match options.selector.as_deref() { Some(selector) => { - get_rect_for_selector(client, session_id, ref_map, selector, iframe_sessions) + get_rect_for_selector(backend, session_id, ref_map, selector, iframe_sessions) .await? } None => None, @@ -117,7 +119,7 @@ pub async fn take_screenshot( }; let raw_annotations = if options.annotate { - collect_annotations(client, session_id, ref_map).await? + collect_annotations(backend, session_id, ref_map).await? } else { Vec::new() }; @@ -131,7 +133,7 @@ pub async fn take_screenshot( }; let base64 = - capture_screenshot_base64(client, session_id, ref_map, options, iframe_sessions).await; + capture_screenshot_base64(backend, session_id, ref_map, options, iframe_sessions).await; if overlay_injected { let _ = remove_annotation_overlay(client, session_id).await; @@ -169,12 +171,13 @@ pub async fn take_screenshot( } async fn capture_screenshot_base64( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, options: &ScreenshotOptions, iframe_sessions: &HashMap, ) -> Result { + let client = backend.require_cdp()?; let mut params = CaptureScreenshotParams { format: Some(options.format.clone()), quality: if options.format == "jpeg" { @@ -209,7 +212,7 @@ async fn capture_screenshot_base64( } } else if let Some(ref selector) = options.selector { if let Some(rect) = - get_rect_for_selector(client, session_id, ref_map, selector, iframe_sessions).await? + get_rect_for_selector(backend, session_id, ref_map, selector, iframe_sessions).await? { params.clip = Some(Viewport { x: rect.x, @@ -229,10 +232,11 @@ async fn capture_screenshot_base64( } async fn collect_annotations( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, ) -> Result, String> { + let client = backend.require_cdp()?; let entries = ref_map.entries_sorted(); if entries.is_empty() { return Ok(Vec::new()); @@ -322,20 +326,21 @@ async fn collect_annotations( } async fn get_rect_for_selector( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, ref_map: &RefMap, selector: &str, iframe_sessions: &HashMap, ) -> Result, String> { let (object_id, effective_session_id) = super::element::resolve_element_object_id( - client, + backend, session_id, ref_map, selector, iframe_sessions, ) .await?; + let client = backend.require_cdp()?; get_rect_for_object(client, &effective_session_id, &object_id).await } diff --git a/cli/src/native/snapshot.rs b/cli/src/native/snapshot.rs index 8aec758e9..84c41978c 100644 --- a/cli/src/native/snapshot.rs +++ b/cli/src/native/snapshot.rs @@ -2,6 +2,7 @@ use std::collections::HashMap; use serde_json::Value; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::{ AXNode, AXProperty, AXValue, EvaluateParams, EvaluateResult, GetFullAXTreeResult, @@ -214,13 +215,14 @@ impl RoleNameTracker { } pub async fn take_snapshot( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, options: &SnapshotOptions, ref_map: &mut RefMap, frame_id: Option<&str>, iframe_sessions: &HashMap, ) -> Result { + let client = backend.require_cdp()?; client .send_command_no_params("DOM.enable", Some(session_id)) .await?; @@ -505,7 +507,7 @@ pub async fn take_snapshot( // Snapshot the child frame; errors are silently ignored // (e.g. cross-origin iframes) if let Ok(child_text) = Box::pin(take_snapshot( - client, + backend, session_id, options, ref_map, diff --git a/cli/src/native/state.rs b/cli/src/native/state.rs index 882764b9a..bf4645ad0 100644 --- a/cli/src/native/state.rs +++ b/cli/src/native/state.rs @@ -7,6 +7,7 @@ use std::collections::HashSet; use std::fs; use std::path::PathBuf; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::{ AttachToTargetParams, AttachToTargetResult, CloseTargetParams, CreateTargetParams, @@ -245,14 +246,15 @@ async fn collect_storage_in_target( } pub async fn save_state( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, path: Option<&str>, session_name: Option<&str>, session_id_str: &str, visited_origins: &HashSet, ) -> Result { - let cookies = cookies::get_all_cookies(client, session_id).await?; + let client = backend.require_cdp()?; + let cookies = cookies::get_all_cookies(backend, session_id).await?; let origin_js = r#"(() => { const result = { origin: location.origin, localStorage: [], sessionStorage: [] }; @@ -333,7 +335,12 @@ pub async fn save_state( Ok(save_path) } -pub async fn load_state(client: &CdpClient, session_id: &str, path: &str) -> Result<(), String> { +pub async fn load_state( + backend: &BrowserBackend, + session_id: &str, + path: &str, +) -> Result<(), String> { + let client = backend.require_cdp()?; let json_str = if path.ends_with(".enc") { let key = std::env::var("AGENT_BROWSER_ENCRYPTION_KEY").map_err(|_| { "Encrypted state file requires AGENT_BROWSER_ENCRYPTION_KEY".to_string() @@ -373,7 +380,7 @@ pub async fn load_state(client: &CdpClient, session_id: &str, path: &str) -> Res .iter() .map(|c| serde_json::to_value(c).unwrap_or(Value::Null)) .collect(); - cookies::set_cookies(client, session_id, cookie_values, None).await?; + cookies::set_cookies(backend, session_id, cookie_values, None).await?; } // Load storage per origin diff --git a/cli/src/native/storage.rs b/cli/src/native/storage.rs index 5b6ffe5cc..f70fbbb87 100644 --- a/cli/src/native/storage.rs +++ b/cli/src/native/storage.rs @@ -1,14 +1,16 @@ use serde_json::{json, Value}; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; use super::cdp::types::EvaluateParams; pub async fn storage_get( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, storage_type: &str, key: Option<&str>, ) -> Result { + let client = backend.require_cdp()?; let st = storage_js_name(storage_type); if let Some(k) = key { @@ -38,12 +40,13 @@ pub async fn storage_get( } pub async fn storage_set( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, storage_type: &str, key: &str, value: &str, ) -> Result<(), String> { + let client = backend.require_cdp()?; let st = storage_js_name(storage_type); let js = format!( "{}.setItem({}, {})", @@ -56,10 +59,11 @@ pub async fn storage_set( } pub async fn storage_clear( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, storage_type: &str, ) -> Result<(), String> { + let client = backend.require_cdp()?; let st = storage_js_name(storage_type); let js = format!("{}.clear()", st); eval_simple(client, session_id, &js).await?; @@ -73,6 +77,9 @@ fn storage_js_name(storage_type: &str) -> &str { } } +/// Internal helper: only reachable once the caller has already extracted a +/// CDP client from `BrowserBackend::Cdp`, so the raw `&CdpClient` is an +/// enum-arm body, not a user-facing interface. async fn eval_simple(client: &CdpClient, session_id: &str, js: &str) -> Result { let result: super::cdp::types::EvaluateResult = client .send_command_typed( diff --git a/cli/src/native/stream/cdp_loop.rs b/cli/src/native/stream/cdp_loop.rs index e3ce56566..166942a87 100644 --- a/cli/src/native/stream/cdp_loop.rs +++ b/cli/src/native/stream/cdp_loop.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use tokio::sync::{broadcast, watch, Mutex, RwLock}; +use crate::native::backend::BrowserBackend; use crate::native::cdp::client::CdpClient; use crate::native::network; @@ -278,14 +279,19 @@ pub(super) async fn cdp_event_loop( } } +/// Chrome-only entry point — asserts the backend is `BrowserBackend::Cdp` +/// and surfaces `engine-incompatible` on Camoufox, which does not speak raw +/// CDP screencast. Internal helpers below still take `&CdpClient` directly +/// because they are only reachable through this assertion. pub async fn start_screencast( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, format: &str, quality: i32, max_width: i32, max_height: i32, ) -> Result<(), String> { + let client = backend.require_cdp_for("screencast")?; client .send_command( "Page.startScreencast", @@ -302,7 +308,8 @@ pub async fn start_screencast( Ok(()) } -pub async fn stop_screencast(client: &CdpClient, session_id: &str) -> Result<(), String> { +pub async fn stop_screencast(backend: &BrowserBackend, session_id: &str) -> Result<(), String> { + let client = backend.require_cdp_for("screencast")?; client .send_command_no_params("Page.stopScreencast", Some(session_id)) .await?; @@ -310,10 +317,11 @@ pub async fn stop_screencast(client: &CdpClient, session_id: &str) -> Result<(), } pub async fn ack_screencast_frame( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, screencast_session_id: i64, ) -> Result<(), String> { + let client = backend.require_cdp_for("screencast")?; client .send_command( "Page.screencastFrameAck", diff --git a/cli/src/native/tracing.rs b/cli/src/native/tracing.rs index 6737c7801..f32f12749 100644 --- a/cli/src/native/tracing.rs +++ b/cli/src/native/tracing.rs @@ -1,6 +1,7 @@ use serde_json::{json, Value}; use std::path::PathBuf; +use super::backend::BrowserBackend; use super::cdp::client::CdpClient; const MAX_PROFILE_EVENTS: usize = 5_000_000; @@ -40,10 +41,11 @@ impl TracingState { } pub async fn trace_start( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, ) -> Result { + let client = backend.require_cdp()?; if tracing_state.active { return Err("Tracing already active".to_string()); } @@ -69,11 +71,12 @@ pub async fn trace_start( } pub async fn trace_stop( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, path: Option<&str>, ) -> Result { + let client = backend.require_cdp()?; if !tracing_state.active { return Err("No tracing in progress".to_string()); } @@ -181,11 +184,12 @@ pub async fn trace_stop( } pub async fn profiler_start( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, categories: Option>, ) -> Result { + let client = backend.require_cdp()?; if tracing_state.active { return Err("Profiling/tracing already active".to_string()); } @@ -219,11 +223,12 @@ pub async fn profiler_start( } pub async fn profiler_stop( - client: &CdpClient, + backend: &BrowserBackend, session_id: &str, tracing_state: &mut TracingState, path: Option<&str>, ) -> Result { + let client = backend.require_cdp()?; if !tracing_state.active { return Err("No profiling in progress".to_string()); } diff --git a/cli/tests/backend_refactor_smoke.rs b/cli/tests/backend_refactor_smoke.rs new file mode 100644 index 000000000..342ecf437 --- /dev/null +++ b/cli/tests/backend_refactor_smoke.rs @@ -0,0 +1,125 @@ +//! Smoke + characterization tests for the `BrowserBackend` refactor (Unit 1 +//! of the Camoufox engine plan). +//! +//! These tests cover two things Unit 1 must guarantee: +//! +//! 1. `agent-browser --engine camoufox open ` reaches the stub and +//! returns a **structured** `not-yet-implemented` error, not a panic. +//! This is the plan's exit criterion for Unit 1 and also the characterization +//! snapshot: every subsequent commit must keep this error shape stable. +//! +//! 2. Unknown engines are rejected with a message that enumerates +//! `chrome, lightpanda, camoufox` — proves the launch dispatch table +//! has the new arm wired up. +//! +//! Both tests spawn the real CLI binary (no Chrome required) so they run in +//! CI without infrastructure. Chrome + Lightpanda happy-path parity is covered +//! by the existing `#[ignore]`d integration suite in `cli/src/native/e2e_tests.rs` +//! which we ran manually against this refactor to produce the characterization +//! baseline — the invariant those tests enforce (execute_command returns the +//! same response shape before/after Unit 1) is what this smoke file locks in +//! cheaply. + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +#[test] +fn camoufox_engine_returns_structured_not_yet_implemented_error() { + let tmp = TempDir::new().unwrap(); + + let output = build_cmd( + &tmp, + &["--engine", "camoufox", "--json", "open", "https://example.com"], + ) + .output() + .expect("failed to invoke agent-browser"); + + // The command must not panic. A panic surfaces as signal-death (exit code + // 101 for explicit panics, 134/137/139 for signals, or None on Unix signal + // termination). A non-zero but structured exit is fine. + assert!( + !matches!(output.status.code(), Some(101)), + "--engine camoufox open panicked (exit 101)\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + assert!( + output.status.code().is_some(), + "--engine camoufox open died from a signal (no exit code)\nstderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); + + // JSON output must parse and carry the not-yet-implemented marker. + let payload: serde_json::Value = serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("stdout was not JSON: {}\n---\n{}", e, stdout)); + + assert_eq!( + payload.get("success").and_then(|v| v.as_bool()), + Some(false), + "expected success:false for camoufox stub, got payload:\n{}", + stdout + ); + + let error = payload + .get("error") + .and_then(|v| v.as_str()) + .expect("payload must contain an error string"); + assert!( + error.contains("not-yet-implemented") && error.contains("camoufox"), + "error message did not mention not-yet-implemented/camoufox: {:?}", + error + ); +} + +#[test] +fn unknown_engine_lists_camoufox_in_supported_engines() { + let tmp = TempDir::new().unwrap(); + + let output = build_cmd( + &tmp, + &["--engine", "nonsense", "--json", "open", "https://example.com"], + ) + .output() + .expect("failed to invoke agent-browser"); + + let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); + + // Either the flag layer rejects it or the launch layer does; both should + // surface a user-visible message that enumerates the valid engines, + // including `camoufox` now that Unit 1 has wired it in. + let payload: serde_json::Value = serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("stdout was not JSON: {}\n---\n{}", e, stdout)); + let error = payload + .get("error") + .and_then(|v| v.as_str()) + .unwrap_or_default(); + + assert!( + error.contains("camoufox"), + "unknown-engine error should enumerate `camoufox` among supported engines, got: {:?}", + error + ); +} From ef9a80f14862339c649996b39bbba0df58d66df1 Mon Sep 17 00:00:00 2001 From: davide Date: Mon, 20 Apr 2026 15:58:50 -0400 Subject: [PATCH 2/9] feat(camoufox): sidecar Python package skeleton + handshake Lands Unit 2 of the Camoufox engine plan: a minimal Python sidecar that agent-browser will spawn when --engine camoufox is selected. The sidecar emits {"event": "ready"} on startup, speaks JSON-line frames over stdio, and cleanly shuts down on stdin EOF, SIGTERM, or a close command. - packages/camoufox-sidecar/ (new): protocol.py, session.py, __main__.py, __init__.py, pyproject.toml, README.md - Launch-kwarg allowlist from camoufox.com/python/usage; persistent_context and user_data_dir explicitly rejected in v1 via distinct error code - tests/test_lifecycle.py covers all 5 Unit 2 scenarios (ready+close, stdin-EOF cleanup, unknown-launch-option, camoufox-not-installed, SIGTERM child cleanup) - scripts/sync-version.js: syncs pyproject + __init__ version alongside the crate, converting the npm-style version to PEP 440 local form (0.26.0-celeria-stealth.2 -> 0.26.0+celeria.stealth.2) Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 8 + packages/camoufox-sidecar/README.md | 32 +++ .../camoufox_sidecar/__init__.py | 3 + .../camoufox_sidecar/__main__.py | 153 ++++++++++++++ .../camoufox_sidecar/protocol.py | 107 ++++++++++ .../camoufox_sidecar/session.py | 188 ++++++++++++++++++ packages/camoufox-sidecar/pyproject.toml | 33 +++ packages/camoufox-sidecar/tests/conftest.py | 106 ++++++++++ .../camoufox-sidecar/tests/test_lifecycle.py | 155 +++++++++++++++ scripts/sync-version.js | 77 +++++++ 10 files changed, 862 insertions(+) create mode 100644 packages/camoufox-sidecar/README.md create mode 100644 packages/camoufox-sidecar/camoufox_sidecar/__init__.py create mode 100644 packages/camoufox-sidecar/camoufox_sidecar/__main__.py create mode 100644 packages/camoufox-sidecar/camoufox_sidecar/protocol.py create mode 100644 packages/camoufox-sidecar/camoufox_sidecar/session.py create mode 100644 packages/camoufox-sidecar/pyproject.toml create mode 100644 packages/camoufox-sidecar/tests/conftest.py create mode 100644 packages/camoufox-sidecar/tests/test_lifecycle.py diff --git a/.gitignore b/.gitignore index 7bd71de3e..eae6d2d80 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,11 @@ docs/package-lock.json # next .next/ out/ + +# Python +__pycache__/ +*.py[cod] +*.egg-info/ +.pytest_cache/ +.venv/ +packages/camoufox-sidecar/.venv/ diff --git a/packages/camoufox-sidecar/README.md b/packages/camoufox-sidecar/README.md new file mode 100644 index 000000000..f205c1846 --- /dev/null +++ b/packages/camoufox-sidecar/README.md @@ -0,0 +1,32 @@ +# camoufox-sidecar + +Python sidecar that agent-browser spawns when `--engine camoufox` is selected. +It drives [Camoufox](https://camoufox.com/) via Playwright and speaks a +JSON-line protocol over stdio to the Rust daemon. + +This package is not meant to be used directly by humans. See +`docs/engines/camoufox.md` in the agent-browser repo for the user-facing docs. + +## Install + +``` +pip install -U "camoufox[geoip]" +python -m camoufox fetch +pip install -e packages/camoufox-sidecar +``` + +## Run + +``` +python -m camoufox_sidecar +``` + +Emits `{"event": "ready"}` on startup, then reads JSON-line commands from +stdin. + +## Test + +``` +pip install -e 'packages/camoufox-sidecar[test]' +pytest packages/camoufox-sidecar/tests/ +``` diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__init__.py b/packages/camoufox-sidecar/camoufox_sidecar/__init__.py new file mode 100644 index 000000000..03772ccb8 --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/__init__.py @@ -0,0 +1,3 @@ +"""camoufox-sidecar: Playwright+Camoufox driver process for agent-browser.""" + +__version__ = "0.26.0+celeria.stealth.2" diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py new file mode 100644 index 000000000..e2556b03b --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py @@ -0,0 +1,153 @@ +"""Sidecar entry point. + +Lifecycle: + + 1. Attach to stdin/stdout, emit `{"event": "ready"}`. + 2. Read command frames from stdin; dispatch to Session handlers. + 3. Exit cleanly on stdin EOF, SIGTERM, SIGINT, or `{"cmd": "close"}`. + +Unit 2 only ships lifecycle commands (`launch`, `close`). Anything else is +responded to with `not-yet-supported` so agents get a clear signal rather than +silent drops; later units replace those stubs. +""" + +from __future__ import annotations + +import asyncio +import signal +import sys +from typing import Any, Awaitable, Callable, Optional + +from .protocol import Protocol, log +from .session import LaunchError, Session + + +class Sidecar: + def __init__(self) -> None: + self.protocol = Protocol() + self.session = Session() + self._shutdown = asyncio.Event() + + async def run(self) -> int: + await self.protocol.start() + _install_signal_handlers(self._shutdown) + + await self.protocol.write_event("ready", {"pid": _own_pid()}) + + reader_task = asyncio.create_task(self._read_loop(), name="sidecar-reader") + shutdown_task = asyncio.create_task( + self._shutdown.wait(), name="sidecar-shutdown" + ) + try: + done, _ = await asyncio.wait( + {reader_task, shutdown_task}, + return_when=asyncio.FIRST_COMPLETED, + ) + for task in done: + exc = task.exception() + if exc is not None: + log(f"sidecar task raised: {exc!r}") + finally: + reader_task.cancel() + shutdown_task.cancel() + for task in (reader_task, shutdown_task): + try: + await task + except (asyncio.CancelledError, Exception): # noqa: BLE001 + pass + await self.session.close() + return 0 + + async def _read_loop(self) -> None: + try: + async for frame in self.protocol.messages(): + await self._dispatch(frame) + finally: + # stdin closed → daemon gone → we shut down + self._shutdown.set() + + async def _dispatch(self, frame: dict) -> None: + cmd = frame.get("cmd") + req_id = frame.get("id") + args = frame.get("args") or {} + + if cmd == "close": + await self.protocol.write_response(req_id, ok=True, result={"closed": True}) + self._shutdown.set() + return + + handler = _HANDLERS.get(cmd) # type: ignore[arg-type] + if handler is None: + await self.protocol.write_response( + req_id, + ok=False, + error={ + "code": "not-yet-supported" if isinstance(cmd, str) else "invalid-frame", + "message": ( + f"command {cmd!r} is not implemented in this sidecar version" + if isinstance(cmd, str) + else "frame is missing a 'cmd' field" + ), + }, + ) + return + + try: + result = await handler(self, args) + except LaunchError as exc: + await self.protocol.write_response( + req_id, + ok=False, + error={"code": exc.code, "message": exc.message}, + ) + return + except Exception as exc: # noqa: BLE001 + log(f"handler {cmd} raised: {exc!r}") + await self.protocol.write_response( + req_id, + ok=False, + error={"code": "internal-error", "message": str(exc)}, + ) + return + + await self.protocol.write_response(req_id, ok=True, result=result) + + +Handler = Callable[["Sidecar", dict], Awaitable[Any]] + + +async def _cmd_launch(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.launch(args) + + +_HANDLERS: dict[str, Handler] = { + "launch": _cmd_launch, +} + + +def _own_pid() -> int: + import os + + return os.getpid() + + +def _install_signal_handlers(shutdown: asyncio.Event) -> None: + loop = asyncio.get_event_loop() + for sig in (signal.SIGTERM, signal.SIGINT): + try: + loop.add_signal_handler(sig, shutdown.set) + except (NotImplementedError, RuntimeError): + # Windows / non-main thread: fall back to default disposition. + pass + + +def main(argv: Optional[list[str]] = None) -> int: + _ = argv # reserved for future flags; the sidecar takes config via stdio + try: + return asyncio.run(Sidecar().run()) + except KeyboardInterrupt: + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/packages/camoufox-sidecar/camoufox_sidecar/protocol.py b/packages/camoufox-sidecar/camoufox_sidecar/protocol.py new file mode 100644 index 000000000..18d3c30db --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/protocol.py @@ -0,0 +1,107 @@ +"""JSON-line stdio protocol used by the Rust daemon <-> Python sidecar. + +Frames are single-line JSON documents. Requests and responses carry a +monotonic `id`; events are unsolicited and carry no `id`. + + request: {"id": 42, "cmd": "", "args": {...}} + response: {"id": 42, "ok": true, "result": {...}} + {"id": 42, "ok": false, "error": {"code": "...", "message": "..."}} + event: {"event": "", "data": {...}} + +stdout is reserved for these frames. stderr is free-form diagnostic logging +that the Rust side captures when --verbose is on. +""" + +from __future__ import annotations + +import asyncio +import json +import sys +from typing import Any, AsyncIterator, Optional + + +async def _stdin_reader() -> asyncio.StreamReader: + """Attach an asyncio StreamReader to sys.stdin.""" + loop = asyncio.get_event_loop() + reader = asyncio.StreamReader() + protocol = asyncio.StreamReaderProtocol(reader) + await loop.connect_read_pipe(lambda: protocol, sys.stdin) + return reader + + +class Protocol: + """Async JSON-line protocol bound to stdin/stdout. + + Writes are synchronous and flushed — correctness beats throughput here, + since the Rust side relies on line-boundary framing and the volume is low. + """ + + def __init__(self) -> None: + self._reader: Optional[asyncio.StreamReader] = None + self._write_lock = asyncio.Lock() + + async def start(self) -> None: + if self._reader is None: + self._reader = await _stdin_reader() + + async def messages(self) -> AsyncIterator[dict]: + """Yield incoming frames until stdin EOF. + + Malformed lines are reported back as a response with + {"code": "invalid-frame"} when they carry an id, and logged to stderr + when they do not. The iterator itself does not raise on parse errors. + """ + assert self._reader is not None, "Protocol.start() must be called first" + while True: + raw = await self._reader.readline() + if not raw: + return + line = raw.decode("utf-8", errors="replace").rstrip("\r\n") + if not line.strip(): + continue + try: + frame = json.loads(line) + except json.JSONDecodeError as exc: + log(f"invalid JSON on stdin: {exc}: {line!r}") + await self.write_response( + req_id=None, + ok=False, + error={ + "code": "invalid-frame", + "message": f"could not parse JSON: {exc}", + }, + ) + continue + if not isinstance(frame, dict): + log(f"non-object frame on stdin: {line!r}") + continue + yield frame + + async def write_event(self, name: str, data: Optional[dict] = None) -> None: + await self._write({"event": name, "data": data or {}}) + + async def write_response( + self, + req_id: Optional[int], + ok: bool, + result: Optional[Any] = None, + error: Optional[dict] = None, + ) -> None: + frame: dict[str, Any] = {"id": req_id, "ok": ok} + if ok: + frame["result"] = result if result is not None else {} + else: + frame["error"] = error or {"code": "unknown", "message": ""} + await self._write(frame) + + async def _write(self, frame: dict) -> None: + encoded = json.dumps(frame, separators=(",", ":"), ensure_ascii=False) + async with self._write_lock: + sys.stdout.write(encoded + "\n") + sys.stdout.flush() + + +def log(message: str) -> None: + """Diagnostic logging. Goes to stderr; never touches the protocol pipe.""" + sys.stderr.write(f"[camoufox-sidecar] {message}\n") + sys.stderr.flush() diff --git a/packages/camoufox-sidecar/camoufox_sidecar/session.py b/packages/camoufox-sidecar/camoufox_sidecar/session.py new file mode 100644 index 000000000..54b882254 --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/session.py @@ -0,0 +1,188 @@ +"""Session holds the AsyncCamoufox browser for the sidecar's lifetime. + +Unit 2 owns the lifecycle (launch / close / cleanup); later units add the +command handlers (navigate, snapshot, click, ...). The launch-kwarg allowlist +lives here because it's the public contract with the Rust side. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from .protocol import log + +# Allowlist derived from https://camoufox.com/python/usage/ — keep in sync with +# the plan's Unit 2 Approach. New kwargs must be added deliberately so the +# Rust side knows to expose them; silently passing unknown kwargs through is a +# footgun when Camoufox bumps and adds options we haven't reviewed. +ALLOWED_LAUNCH_KWARGS: frozenset[str] = frozenset( + { + "headless", + "humanize", + "os", + "locale", + "geoip", + "screen", + "window", + "webgl_config", + "fonts", + "block_images", + "block_webrtc", + "block_webgl", + "disable_coop", + "executable_path", + "proxy", + "addons", + "exclude_default_addons", + "main_world_eval", + "enable_cache", + "config", + } +) + +# Explicitly rejected in v1 (see plan). Surfacing a distinct code makes the +# "not-yet-supported" state obvious rather than conflating it with typos. +REJECTED_LAUNCH_KWARGS: frozenset[str] = frozenset( + { + "persistent_context", + "user_data_dir", + } +) + + +class LaunchError(Exception): + """Structured error surfaced as a {"ok": false, "error": {...}} response.""" + + def __init__(self, code: str, message: str) -> None: + super().__init__(message) + self.code = code + self.message = message + + +class Session: + """Holds the single AsyncCamoufox browser + its playwright context. + + The browser is launched lazily on the first `launch` command so that + bringing up the sidecar process itself does not require Camoufox to be + installed — useful for the startup-and-close lifecycle test. + """ + + def __init__(self) -> None: + self._camoufox_cm: Optional[Any] = None # AsyncCamoufox context manager + self._browser: Optional[Any] = None + self._launched: bool = False + + @property + def is_launched(self) -> bool: + return self._launched + + async def launch(self, args: Optional[dict] = None) -> dict: + """Launch the Camoufox browser with validated kwargs. + + Returns a result dict for the response frame. Raises LaunchError for + validation or environment failures that should surface as structured + errors to the Rust side. + """ + if self._launched: + raise LaunchError( + "already-launched", + "sidecar already has an active Camoufox browser; close it first", + ) + + kwargs = _validate_launch_args(args or {}) + + try: + from camoufox.async_api import AsyncCamoufox # type: ignore + except ImportError as exc: + raise LaunchError( + "camoufox-not-installed", + ( + "camoufox Python package is not importable: " + f"{exc}. Install with `pip install -U 'camoufox[geoip]'`." + ), + ) from exc + + cm = AsyncCamoufox(**kwargs) + try: + browser = await cm.__aenter__() + except FileNotFoundError as exc: + # Camoufox raises FileNotFoundError when the browser binary has + # not been fetched. Surface the actionable message. + raise LaunchError( + "camoufox-not-installed", + ( + f"Camoufox browser binary not found: {exc}. " + "Run `python -m camoufox fetch`." + ), + ) from exc + except Exception as exc: # noqa: BLE001 + message = str(exc) + if _looks_like_missing_binary(message): + raise LaunchError( + "camoufox-not-installed", + ( + f"Camoufox browser binary not available: {message}. " + "Run `python -m camoufox fetch`." + ), + ) from exc + raise LaunchError("launch-failed", message) from exc + + self._camoufox_cm = cm + self._browser = browser + self._launched = True + log("camoufox launched") + return {"launched": True} + + async def close(self) -> dict: + """Close the browser if launched. Safe to call when never launched.""" + cm = self._camoufox_cm + self._camoufox_cm = None + self._browser = None + self._launched = False + if cm is None: + return {"closed": False} + try: + await cm.__aexit__(None, None, None) + except Exception as exc: # noqa: BLE001 + log(f"error during close: {exc}") + # Don't re-raise; the sidecar is shutting down either way and + # leaving a half-closed state just masks the root cause. + return {"closed": True} + + +def _validate_launch_args(args: dict) -> dict: + if not isinstance(args, dict): + raise LaunchError( + "invalid-args", + f"launch args must be an object, got {type(args).__name__}", + ) + rejected = sorted(set(args) & REJECTED_LAUNCH_KWARGS) + if rejected: + raise LaunchError( + "unsupported-launch-option", + ( + f"launch options not supported in v1: {rejected}. " + "persistent_context / user_data_dir are tracked as a v2 item." + ), + ) + unknown = sorted(set(args) - ALLOWED_LAUNCH_KWARGS - REJECTED_LAUNCH_KWARGS) + if unknown: + raise LaunchError( + "unknown-launch-option", + f"unknown launch option(s): {unknown}", + ) + return dict(args) + + +def _looks_like_missing_binary(message: str) -> bool: + """Heuristic for Camoufox's 'please run camoufox fetch' family of errors.""" + lowered = message.lower() + return any( + needle in lowered + for needle in ( + "camoufox fetch", + "no camoufox", + "camoufox is not installed", + "executable doesn't exist", + ) + ) diff --git a/packages/camoufox-sidecar/pyproject.toml b/packages/camoufox-sidecar/pyproject.toml new file mode 100644 index 000000000..6511fa938 --- /dev/null +++ b/packages/camoufox-sidecar/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "camoufox-sidecar" +version = "0.26.0+celeria.stealth.2" +description = "Sidecar process that drives Camoufox on behalf of agent-browser" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "Apache-2.0" } +authors = [{ name = "Celeria" }] +dependencies = [ + "camoufox[geoip]>=0.4.11", + "playwright>=1.48", +] + +[project.optional-dependencies] +test = [ + "pytest>=7.4", + "pytest-asyncio>=0.23", + "psutil>=5.9", +] + +[project.scripts] +camoufox-sidecar = "camoufox_sidecar.__main__:main" + +[tool.setuptools.packages.find] +include = ["camoufox_sidecar*"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +testpaths = ["tests"] diff --git a/packages/camoufox-sidecar/tests/conftest.py b/packages/camoufox-sidecar/tests/conftest.py new file mode 100644 index 000000000..ab90b759f --- /dev/null +++ b/packages/camoufox-sidecar/tests/conftest.py @@ -0,0 +1,106 @@ +"""Shared pytest helpers for the sidecar test suite. + +The helpers launch the real `camoufox_sidecar` module as a subprocess and +talk to it over stdio. Tests that need an actual Camoufox browser are gated +on the `camoufox_available` fixture so the suite remains informative when +run on a machine that only has the sidecar package installed. +""" + +from __future__ import annotations + +import asyncio +import importlib.util +import json +import os +import sys +from dataclasses import dataclass +from typing import Any, AsyncIterator, Optional + +import pytest + + +SIDECAR_MODULE = "camoufox_sidecar" + + +def _camoufox_importable() -> bool: + return importlib.util.find_spec("camoufox") is not None + + +@pytest.fixture +def camoufox_available() -> bool: + return _camoufox_importable() + + +@pytest.fixture +def requires_camoufox(camoufox_available: bool) -> None: + if not camoufox_available: + pytest.skip("camoufox package not installed") + + +@dataclass +class Sidecar: + proc: asyncio.subprocess.Process + + @property + def pid(self) -> int: + return self.proc.pid + + async def read_frame(self, timeout: float = 5.0) -> dict: + assert self.proc.stdout is not None + line = await asyncio.wait_for(self.proc.stdout.readline(), timeout=timeout) + if not line: + raise RuntimeError("sidecar closed stdout before sending a frame") + return json.loads(line.decode("utf-8")) + + async def expect_event(self, name: str, timeout: float = 5.0) -> dict: + frame = await self.read_frame(timeout=timeout) + assert frame.get("event") == name, f"expected event {name!r}, got {frame!r}" + return frame + + async def send(self, frame: dict) -> None: + assert self.proc.stdin is not None + self.proc.stdin.write((json.dumps(frame) + "\n").encode("utf-8")) + await self.proc.stdin.drain() + + async def close_stdin(self) -> None: + assert self.proc.stdin is not None + self.proc.stdin.close() + try: + await self.proc.stdin.wait_closed() + except Exception: # noqa: BLE001 + pass + + async def wait(self, timeout: float = 5.0) -> int: + return await asyncio.wait_for(self.proc.wait(), timeout=timeout) + + async def kill(self) -> None: + if self.proc.returncode is None: + self.proc.kill() + try: + await asyncio.wait_for(self.proc.wait(), timeout=5.0) + except asyncio.TimeoutError: + pass + + +async def spawn_sidecar(env: Optional[dict] = None) -> Sidecar: + env_vars = {**os.environ, **(env or {})} + proc = await asyncio.create_subprocess_exec( + sys.executable, + "-u", + "-m", + SIDECAR_MODULE, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + env=env_vars, + ) + return Sidecar(proc=proc) + + +@pytest.fixture +async def sidecar() -> AsyncIterator[Sidecar]: + sc = await spawn_sidecar() + try: + yield sc + finally: + await sc.kill() diff --git a/packages/camoufox-sidecar/tests/test_lifecycle.py b/packages/camoufox-sidecar/tests/test_lifecycle.py new file mode 100644 index 000000000..6621d9c2f --- /dev/null +++ b/packages/camoufox-sidecar/tests/test_lifecycle.py @@ -0,0 +1,155 @@ +"""Lifecycle tests for the Camoufox sidecar. + +Covers the 5 scenarios from Unit 2 of the plan: + + 1. ready + close happy path + 2. stdin EOF triggers cleanup + 3. Unknown launch option → structured error, session still usable + 4. Camoufox binary missing → actionable error + 5. SIGTERM cleans up the Firefox child within 5s +""" + +from __future__ import annotations + +import asyncio +import os +import signal + +import pytest + +try: + import psutil # type: ignore +except ImportError: # pragma: no cover - test-only dep + psutil = None # type: ignore + +from conftest import Sidecar, spawn_sidecar # noqa: E402 — pytest injects tests/ onto sys.path + + +pytestmark = pytest.mark.asyncio + + +async def test_ready_and_close(sidecar: Sidecar) -> None: + """#1: Sidecar starts, emits `ready`, accepts `close`, exits 0 fast.""" + frame = await asyncio.wait_for(sidecar.expect_event("ready"), timeout=2.0) + assert isinstance(frame.get("data"), dict) + assert frame["data"].get("pid") == sidecar.pid + + await sidecar.send({"id": 1, "cmd": "close"}) + + response = await sidecar.read_frame(timeout=2.0) + assert response == {"id": 1, "ok": True, "result": {"closed": True}} + + rc = await sidecar.wait(timeout=2.0) + assert rc == 0 + + +async def test_stdin_eof_triggers_cleanup(sidecar: Sidecar) -> None: + """#2: Closing stdin shuts the sidecar down within 1s even without a browser.""" + await sidecar.expect_event("ready") + + await sidecar.close_stdin() + + rc = await sidecar.wait(timeout=2.0) + assert rc == 0 + + +async def test_unknown_launch_option_returns_structured_error(sidecar: Sidecar) -> None: + """#3: An unknown kwarg is rejected and the session remains usable.""" + await sidecar.expect_event("ready") + + await sidecar.send( + {"id": 1, "cmd": "launch", "args": {"totally_made_up_option": True}} + ) + response = await sidecar.read_frame(timeout=2.0) + assert response["id"] == 1 + assert response["ok"] is False + assert response["error"]["code"] == "unknown-launch-option" + assert "totally_made_up_option" in response["error"]["message"] + + # Session still usable: close cleanly. + await sidecar.send({"id": 2, "cmd": "close"}) + close_resp = await sidecar.read_frame(timeout=2.0) + assert close_resp["id"] == 2 and close_resp["ok"] is True + + rc = await sidecar.wait(timeout=2.0) + assert rc == 0 + + +async def test_rejected_launch_option_uses_distinct_code(sidecar: Sidecar) -> None: + """#3b: persistent_context / user_data_dir are explicitly rejected in v1.""" + await sidecar.expect_event("ready") + + await sidecar.send( + {"id": 1, "cmd": "launch", "args": {"persistent_context": True}} + ) + response = await sidecar.read_frame(timeout=2.0) + assert response["ok"] is False + assert response["error"]["code"] == "unsupported-launch-option" + + +async def test_missing_camoufox_binary_reports_actionable_error( + sidecar: Sidecar, camoufox_available: bool +) -> None: + """#4: When Camoufox can't find its binary, error mentions `camoufox fetch`.""" + if not camoufox_available: + pytest.skip("requires the camoufox python package to exercise launch") + + await sidecar.expect_event("ready") + + # Force the "binary missing" failure mode by pointing Camoufox at an + # executable that doesn't exist. Camoufox itself raises when launch time + # can't find a real browser. + await sidecar.send( + { + "id": 1, + "cmd": "launch", + "args": { + "headless": True, + "executable_path": "/nonexistent/camoufox-binary-for-test", + }, + } + ) + response = await sidecar.read_frame(timeout=30.0) + assert response["id"] == 1 + assert response["ok"] is False + # Accept either the specific mapping or a launch-failed fallback whose + # message still points the user at `camoufox fetch`. + code = response["error"]["code"] + msg = response["error"]["message"].lower() + assert code in {"camoufox-not-installed", "launch-failed"}, response + assert "camoufox" in msg + + +async def test_sigterm_cleans_up_firefox_child(requires_camoufox: None) -> None: + """#5: SIGTERM tears down the sidecar and its Firefox grandchild in <5s.""" + if psutil is None: + pytest.skip("psutil is required for process-tree assertions") + + sc = await spawn_sidecar() + try: + await sc.expect_event("ready") + await sc.send({"id": 1, "cmd": "launch", "args": {"headless": True}}) + response = await sc.read_frame(timeout=60.0) + assert response["ok"] is True, response + + parent = psutil.Process(sc.pid) + children_before = parent.children(recursive=True) + assert children_before, "expected camoufox to have spawned at least one child" + child_pids = [c.pid for c in children_before] + + os.kill(sc.pid, signal.SIGTERM) + + rc = await sc.wait(timeout=5.0) + assert rc == 0 + + # Children must be gone within 5s of the parent exit. + deadline = asyncio.get_event_loop().time() + 5.0 + while asyncio.get_event_loop().time() < deadline: + alive = [pid for pid in child_pids if psutil.pid_exists(pid)] + if not alive: + break + await asyncio.sleep(0.1) + alive = [pid for pid in child_pids if psutil.pid_exists(pid)] + assert not alive, f"child processes still alive after SIGTERM: {alive}" + finally: + await sc.kill() diff --git a/scripts/sync-version.js b/scripts/sync-version.js index 5cafc0605..322bfa9bf 100644 --- a/scripts/sync-version.js +++ b/scripts/sync-version.js @@ -56,6 +56,83 @@ if (dashboardPkg.version !== version) { console.log(` packages/dashboard/package.json already up to date`); } +// Convert the npm-style version (which may contain multiple `-` separators +// like `0.26.0-celeria-stealth.2`) into a PEP 440-compliant local version +// identifier (`0.26.0+celeria.stealth.2`). PEP 440 does not allow hyphens +// inside the public release segment, but `+` followed by dot-separated +// alphanumerics is valid and preserves our pre-release labeling intent. +function toPep440(npmVersion) { + const dashIdx = npmVersion.indexOf("-"); + if (dashIdx === -1) return npmVersion; + const release = npmVersion.slice(0, dashIdx); + const local = npmVersion.slice(dashIdx + 1).replace(/-/g, "."); + return `${release}+${local}`; +} + +const pep440Version = toPep440(version); + +// Update packages/camoufox-sidecar/pyproject.toml +const sidecarPyprojectPath = join( + rootDir, + "packages", + "camoufox-sidecar", + "pyproject.toml" +); +let sidecarPyproject = readFileSync(sidecarPyprojectPath, "utf-8"); +const sidecarVersionRegex = /^version\s*=\s*"[^"]*"/m; +const newSidecarVersion = `version = "${pep440Version}"`; +if (sidecarVersionRegex.test(sidecarPyproject)) { + const oldMatch = sidecarPyproject.match(sidecarVersionRegex)?.[0]; + if (oldMatch !== newSidecarVersion) { + sidecarPyproject = sidecarPyproject.replace( + sidecarVersionRegex, + newSidecarVersion + ); + writeFileSync(sidecarPyprojectPath, sidecarPyproject); + console.log( + ` Updated packages/camoufox-sidecar/pyproject.toml: ${oldMatch} -> ${newSidecarVersion}` + ); + } else { + console.log(` packages/camoufox-sidecar/pyproject.toml already up to date`); + } +} else { + console.error( + " Could not find version field in packages/camoufox-sidecar/pyproject.toml" + ); + process.exit(1); +} + +// Update packages/camoufox-sidecar/camoufox_sidecar/__init__.py +const sidecarInitPath = join( + rootDir, + "packages", + "camoufox-sidecar", + "camoufox_sidecar", + "__init__.py" +); +let sidecarInit = readFileSync(sidecarInitPath, "utf-8"); +const sidecarInitRegex = /^__version__\s*=\s*"[^"]*"/m; +const newSidecarInit = `__version__ = "${pep440Version}"`; +if (sidecarInitRegex.test(sidecarInit)) { + const oldMatch = sidecarInit.match(sidecarInitRegex)?.[0]; + if (oldMatch !== newSidecarInit) { + sidecarInit = sidecarInit.replace(sidecarInitRegex, newSidecarInit); + writeFileSync(sidecarInitPath, sidecarInit); + console.log( + ` Updated packages/camoufox-sidecar/camoufox_sidecar/__init__.py: ${oldMatch} -> ${newSidecarInit}` + ); + } else { + console.log( + ` packages/camoufox-sidecar/camoufox_sidecar/__init__.py already up to date` + ); + } +} else { + console.error( + " Could not find __version__ in packages/camoufox-sidecar/camoufox_sidecar/__init__.py" + ); + process.exit(1); +} + // Update Cargo.lock to match Cargo.toml if (cargoTomlUpdated) { try { From a08660f2305956acdab9ec349860fc419e44169d Mon Sep 17 00:00:00 2001 From: davide Date: Mon, 20 Apr 2026 18:17:54 -0400 Subject: [PATCH 3/9] feat(camoufox): wire engine=camoufox through BrowserManager (Unit 3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fills in the stubs from Unit 1: CamoufoxClient now drives the Python sidecar over a JSON-line protocol, and BrowserManager::launch spawns the sidecar, waits for its ready event, and hands back a working BrowserManager whose navigate/close dispatch through the sidecar. After this unit, `agent-browser --engine camoufox open ` actually launches Camoufox and loads the page; open/close/reopen in a loop produces zero leaked processes. Process lifetime: the sidecar is spawned as its own process-group leader via setpgid, so SIGTERM → (graceful wait) → SIGKILL on the group tears down the full tree (Python → Camoufox → plugin-containers). waitpid is used directly in Drop so teardown stays synchronous and deterministic even when callers drop BrowserManager outside a tokio context. The refactor drops the ambient `pub client: Arc` field on BrowserManager in favour of a `fn client()` accessor that pulls from the backend enum — actions.rs callers already go through `backend.require_cdp()?` for engine-aware paths, so the change narrows the "accidentally reach into CDP on a Camoufox backend" surface to a single clearly-named assertion point. Integration tests (cli/tests/camoufox_launch.rs) gated behind the `camoufox-integration` cargo feature cover open+close cleanup, a 10x loop smoke test, --stealth redundancy, and the two rust-only error paths that run unconditionally. All 696 existing tests stay green. --- cli/Cargo.lock | 20 + cli/Cargo.toml | 9 + cli/src/native/actions.rs | 173 ++--- cli/src/native/browser.rs | 317 ++++++++-- cli/src/native/camoufox_client.rs | 482 +++++++++++++- cli/src/native/camoufox_embed/mod.rs | 171 +++++ cli/src/native/cdp/camoufox.rs | 591 +++++++++++++++++- cli/src/native/e2e_tests.rs | 2 +- cli/src/native/mod.rs | 2 + cli/src/native/stream/chat.rs | 2 +- cli/src/output.rs | 4 +- cli/tests/backend_refactor_smoke.rs | 36 +- cli/tests/camoufox_launch.rs | 309 +++++++++ .../camoufox_sidecar/__main__.py | 5 + .../camoufox_sidecar/session.py | 41 ++ 15 files changed, 1974 insertions(+), 190 deletions(-) create mode 100644 cli/src/native/camoufox_embed/mod.rs create mode 100644 cli/tests/camoufox_launch.rs diff --git a/cli/Cargo.lock b/cli/Cargo.lock index 461663f2c..ebe215a54 100644 --- a/cli/Cargo.lock +++ b/cli/Cargo.lock @@ -57,6 +57,7 @@ dependencies = [ "hex", "hmac", "image", + "include_dir", "libc", "reqwest", "rust-embed", @@ -1047,6 +1048,25 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7c5cedc30da3a610cac6b4ba17597bdf7152cf974e8aab3afb3d54455e371c8" +[[package]] +name = "include_dir" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "indexmap" version = "2.13.0" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 19262fd7f..ea025a1ba 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -35,6 +35,15 @@ hex = "0.4" chrono = "0.4" urlencoding = "2" rust-embed = "8" +include_dir = "0.7" + +[features] +# Enables the `cli/tests/camoufox_launch.rs` integration suite. Off by default +# because it requires a working Python 3 install with the `camoufox` package +# and the browser binary fetched. Turn on locally with +# `cargo test --features camoufox-integration` when testing the Camoufox +# engine end-to-end. +camoufox-integration = [] [target.'cfg(unix)'.dependencies] libc = "0.2" diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index 205fcf871..d4775c866 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -344,7 +344,13 @@ impl DaemonState { fn subscribe_to_browser_events(&mut self) { if let Some(ref browser) = self.browser { - self.event_rx = Some(browser.client.subscribe()); + // Camoufox events are surfaced through `CamoufoxClient::subscribe` + // rather than a CDP broadcast. Units 4/5 wire up a sidecar + // event bridge; Unit 3's open+close flow doesn't depend on it, + // so skip silently here on the Camoufox backend. + if let Ok(client) = browser.backend.require_cdp() { + self.event_rx = Some(client.subscribe()); + } } } @@ -362,8 +368,14 @@ impl DaemonState { return; }; - let client = browser.client.clone(); - let mut rx = browser.client.subscribe(); + // Fetch.* is a CDP-only surface; Camoufox routes requests through + // Playwright's Route API (Unit 4+). Leave the handler idle on + // non-CDP backends. + let Ok(cdp) = browser.backend.require_cdp() else { + return; + }; + let client = cdp.clone(); + let mut rx = cdp.subscribe(); let domain_filter = self.domain_filter.clone(); let routes = self.routes.clone(); let origin_headers = self.origin_headers.clone(); @@ -477,8 +489,15 @@ impl DaemonState { return; }; - let client = browser.client.clone(); - let mut rx = browser.client.subscribe(); + // Dialog handling is wired up through CDP's Page.javascriptDialog + // events. Camoufox will eventually surface dialogs through the + // sidecar (Unit 4/5); for now Unit 3's open/close flow doesn't + // depend on this handler. + let Ok(cdp) = browser.backend.require_cdp() else { + return; + }; + let client = cdp.clone(); + let mut rx = cdp.subscribe(); self.dialog_handler_task = Some(tokio::spawn(async move { loop { @@ -524,7 +543,11 @@ impl DaemonState { pub async fn update_stream_client(&self) { if let Some(ref slot) = self.stream_client { let mut guard = slot.write().await; - *guard = self.browser.as_ref().map(|m| Arc::clone(&m.client)); + *guard = self + .browser + .as_ref() + .filter(|m| m.backend.is_cdp()) + .map(|m| Arc::clone(m.client())); } if let Some(ref server) = self.stream_server { // Update the CDP page session ID so screencast commands target the right page @@ -607,23 +630,23 @@ impl DaemonState { .insert(frame_id.clone(), iframe_sid.clone()); if let Some(ref mgr) = self.browser { let _ = mgr - .client + .client() .send_command_no_params( "Runtime.runIfWaitingForDebugger", Some(iframe_sid.as_str()), ) .await; let _ = mgr - .client + .client() .send_command_no_params("DOM.enable", Some(iframe_sid.as_str())) .await; let _ = mgr - .client + .client() .send_command_no_params("Accessibility.enable", Some(iframe_sid.as_str())) .await; if self.har_recording || self.request_tracking { let _ = mgr - .client + .client() .send_command_no_params("Network.enable", Some(iframe_sid.as_str())) .await; } @@ -637,7 +660,7 @@ impl DaemonState { for te in &drained.new_targets { if let Some(ref mut mgr) = self.browser { let attach_result: Result = mgr - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -1494,7 +1517,7 @@ async fn connect_auto_with_fresh_tab() -> Result { mgr.tab_new(None, None).await?; let session_id = mgr.active_session_id()?.to_string(); let _ = mgr - .client + .client() .send_command("Page.bringToFront", None, Some(&session_id)) .await; Ok(mgr) @@ -2211,7 +2234,7 @@ async fn handle_navigate(cmd: &Value, state: &mut DaemonState) -> Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - mgr.client + mgr.client() .send_command_no_params("Page.reload", Some(&session_id)) .await?; - let mut rx = mgr.client.subscribe(); + let mut rx = mgr.client().subscribe(); let _ = tokio::time::timeout(tokio::time::Duration::from_secs(10), async { loop { match rx.recv().await { @@ -3571,7 +3594,7 @@ async fn handle_mouse(cmd: &Value, state: &DaemonState) -> Result let button = cmd.get("button").and_then(|v| v.as_str()).unwrap_or("none"); let click_count = cmd.get("clickCount").and_then(|v| v.as_i64()).unwrap_or(0); - mgr.client + mgr.client() .send_command( "Input.dispatchMouseEvent", Some(json!({ @@ -3607,7 +3630,7 @@ async fn handle_keyboard(cmd: &Value, state: &DaemonState) -> Result Result Result Result< // Capture current cookies let cookies_result = mgr - .client + .client() .send_command_no_params("Network.getAllCookies", Some(&old_session_id)) .await .ok(); // Create new browser context let ctx_result = mgr - .client + .client() .send_command_no_params("Target.createBrowserContext", None) .await?; let context_id = ctx_result @@ -4018,7 +4041,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Create page in new context let create_result: CreateTargetResult = mgr - .client + .client() .send_command_typed( "Target.createTarget", &json!({ "url": "about:blank", "browserContextId": context_id }), @@ -4027,7 +4050,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< .await?; let attach_result: AttachToTargetResult = mgr - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -4046,7 +4069,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // because Browser.setDownloadBehavior at launch only applies to the default context. if let Some(ref dl_path) = mgr.download_path { let _ = mgr - .client + .client() .send_command( "Browser.setDownloadBehavior", Some(json!({ @@ -4064,7 +4087,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Security.setIgnoreCertificateErrors at launch only applies to the session it was sent on. if mgr.ignore_https_errors { let _ = mgr - .client + .client() .send_command( "Security.setIgnoreCertificateErrors", Some(json!({ "ignore": true })), @@ -4078,7 +4101,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< if let Some(cookie_arr) = cr.get("cookies").and_then(|v| v.as_array()) { if !cookie_arr.is_empty() { let _ = mgr - .client + .client() .send_command( "Network.setCookies", Some(json!({ "cookies": cookie_arr })), @@ -4108,7 +4131,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< // Navigate to URL if nav_url != "about:blank" { let _ = mgr - .client + .client() .send_command( "Page.navigate", Some(json!({ "url": nav_url })), @@ -4118,7 +4141,7 @@ async fn handle_recording_start(cmd: &Value, state: &mut DaemonState) -> Result< tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; } - (mgr.client.clone(), new_session_id) + (mgr.client().clone(), new_session_id) }; let result = recording::recording_start(&mut state.recording_state, path)?; @@ -4154,7 +4177,7 @@ async fn handle_recording_restart(cmd: &Value, state: &mut DaemonState) -> Resul if let Some(ref browser) = state.browser { let session_id = browser.active_session_id()?.to_string(); state - .start_recording_task(browser.client.clone(), session_id) + .start_recording_task(browser.client().clone(), session_id) .await?; } @@ -4172,7 +4195,7 @@ async fn handle_pdf(cmd: &Value, state: &DaemonState) -> Result { }); let result = mgr - .client + .client() .send_command("Page.printToPDF", Some(params), Some(&session_id)) .await?; @@ -4764,7 +4787,7 @@ async fn handle_wheel(cmd: &Value, state: &DaemonState) -> Result let delta_x = cmd.get("deltaX").and_then(|v| v.as_f64()).unwrap_or(0.0); let delta_y = cmd.get("deltaY").and_then(|v| v.as_f64()).unwrap_or(0.0); - mgr.client + mgr.client() .send_command( "Input.dispatchMouseEvent", Some(json!({ @@ -5105,7 +5128,7 @@ async fn handle_waitforfunction(cmd: &Value, state: &DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result Result { async fn handle_har_start(state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; let session_id = mgr.active_session_id()?.to_string(); - mgr.client + mgr.client() .send_command_no_params("Network.enable", Some(&session_id)) .await?; // Also enable Network on cross-origin iframe sessions so their // requests are captured in the HAR output. for iframe_sid in state.iframe_sessions.values() { let _ = mgr - .client + .client() .send_command_no_params("Network.enable", Some(iframe_sid.as_str())) .await; } @@ -6493,7 +6516,7 @@ async fn har_browser_metadata(state: &DaemonState) -> Option { } let version = mgr - .client + .client() .send_command_no_params("Browser.getVersion", None) .await .ok()?; @@ -6774,7 +6797,7 @@ async fn handle_route(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result Result Result Result Result Result Result Result Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "dispatched": event_type })) @@ -7571,7 +7594,7 @@ async fn handle_input_keyboard(cmd: &Value, state: &DaemonState) -> Result Result Result Result .and_then(|v| v.as_str()) .ok_or("Missing 'key' parameter")?; - mgr.client + mgr.client() .send_command( "Input.dispatchKeyEvent", Some(json!({ "type": "keyUp", "key": key })), @@ -7642,7 +7665,7 @@ async fn handle_inserttext(cmd: &Value, state: &DaemonState) -> Result Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "moved": true })) @@ -7693,7 +7716,7 @@ async fn handle_mousedown(cmd: &Value, state: &mut DaemonState) -> Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "pressed": true })) @@ -7716,7 +7739,7 @@ async fn handle_mouseup(cmd: &Value, state: &mut DaemonState) -> Result("Input.dispatchMouseEvent", ¶ms, Some(&session_id)) .await?; Ok(json!({ "released": true })) diff --git a/cli/src/native/browser.rs b/cli/src/native/browser.rs index 4de241f27..27d6535c0 100644 --- a/cli/src/native/browser.rs +++ b/cli/src/native/browser.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::{broadcast, Mutex}; -use super::backend::{not_yet_implemented_error, BrowserBackend}; +use super::backend::BrowserBackend; use super::cdp::camoufox::CamoufoxProcess; use super::cdp::chrome::{auto_connect_cdp, launch_chrome, ChromeProcess, LaunchOptions}; use super::cdp::client::CdpClient; @@ -90,6 +90,38 @@ fn validate_lightpanda_options(options: &LaunchOptions) -> Result<(), String> { Ok(()) } +/// Mirrors `validate_lightpanda_options`: rejects options that have no +/// equivalent on the Camoufox path. The sidecar re-validates launch kwargs +/// against its own allowlist, but these are shaped at the Rust launch-option +/// level and are easier to reject up-front with a clear message. +fn validate_camoufox_options(options: &LaunchOptions) -> Result<(), String> { + if options + .extensions + .as_ref() + .map(|e| !e.is_empty()) + .unwrap_or(false) + { + return Err("Extensions are not supported with Camoufox".to_string()); + } + if options.profile.is_some() { + return Err("Profiles are not supported with Camoufox".to_string()); + } + if options.storage_state.is_some() { + return Err("Storage state is not supported with Camoufox".to_string()); + } + if options.allow_file_access { + return Err("File access is not supported with Camoufox".to_string()); + } + if !options.args.is_empty() { + return Err( + "Custom Chrome arguments (--args) are not supported with Camoufox; \ + pass engine-specific kwargs through the sidecar config instead." + .to_string(), + ); + } + Ok(()) +} + /// Returns true for Chrome internal targets that should not be selected /// during auto-connect (e.g. chrome://, chrome-extension://, devtools://). fn is_internal_chrome_target(url: &str) -> bool { @@ -295,23 +327,16 @@ impl BrowserProcess { match self { BrowserProcess::Chrome(p) => p.has_exited(), BrowserProcess::Lightpanda(_) => false, - BrowserProcess::Camoufox(_) => false, + BrowserProcess::Camoufox(p) => p.has_exited(), } } } pub struct BrowserManager { - /// Engine-tagged backend. In Unit 1 this is always `BrowserBackend::Cdp` - /// whenever a `BrowserManager` exists, since `launch` for engine=camoufox - /// returns a structured `not-yet-implemented` error before constructing - /// one. Unit 3 begins returning `BrowserBackend::Camoufox` here. + /// Engine-tagged backend. Either a `Cdp` arm (Chrome / Lightpanda) or a + /// `Camoufox` arm (Python sidecar). All engine-specific transport lives + /// here; the rest of the struct is engine-agnostic. pub backend: BrowserBackend, - /// Direct handle to the CDP client. Kept as a convenience for the - /// Chrome-only code inside this module; always matches the `Cdp` arm of - /// `backend`. Once Camoufox launches land (Unit 3), methods that reach for - /// `self.client` on a non-CDP backend will move to `self.backend` dispatch - /// or early-return with `backend.require_cdp()?`. - pub client: Arc, browser_process: Option, ws_url: String, pages: Vec, @@ -334,6 +359,37 @@ const LIGHTPANDA_CDP_CONNECT_POLL_INTERVAL: Duration = Duration::from_millis(100 const LIGHTPANDA_TARGET_INIT_TIMEOUT: Duration = Duration::from_secs(10); impl BrowserManager { + /// CDP client accessor. Most internal `BrowserManager` methods still + /// assume a CDP backend (Chrome/Lightpanda); this helper centralises the + /// lookup so a future conversion of those methods to `backend`-dispatch + /// only has to touch one site. + /// + /// Panics if called on a Camoufox backend. Callers on mixed-engine paths + /// must guard with `self.backend.require_cdp()?` (or `.is_cdp()`) first; + /// reaching this accessor on Camoufox is a programmer bug, not a runtime + /// failure mode. + pub fn client(&self) -> &Arc { + match &self.backend { + BrowserBackend::Cdp(c) => c, + BrowserBackend::Camoufox(_) => panic!( + "BrowserManager::client() called on Camoufox backend; use self.backend.require_cdp()? first" + ), + } + } + + /// Camoufox sidecar client accessor. Symmetric with `client()` above; + /// panics on a CDP backend. Camoufox-specific code paths in this module + /// (e.g. navigate-via-sidecar) use this after a `backend.is_camoufox()` + /// check. + fn camoufox_client(&self) -> &Arc { + match &self.backend { + BrowserBackend::Camoufox(c) => c, + BrowserBackend::Cdp(_) => panic!( + "BrowserManager::camoufox_client() called on CDP backend; check backend.is_camoufox() first" + ), + } + } + pub async fn launch(options: LaunchOptions, engine: Option<&str>) -> Result { let engine = engine.unwrap_or("chrome"); @@ -352,10 +408,7 @@ impl BrowserManager { validate_lightpanda_options(&options)?; } "camoufox" => { - // Unit 1: validation is deferred to Unit 3; we stop here with a - // structured error so `agent-browser --engine camoufox open ` - // reaches this arm, surfaces a clean failure, and does not panic. - return Err(not_yet_implemented_error(Some("launch"))); + validate_camoufox_options(&options)?; } _ => { return Err(format!( @@ -371,6 +424,14 @@ impl BrowserManager { let download_path = options.download_path.clone(); let stealth = options.stealth; + if engine == "camoufox" { + // Camoufox has its own process lifecycle (Python sidecar driving + // a Playwright/Camoufox browser) and doesn't share the CDP + // WebSocket path. Return early with a fully-constructed + // BrowserManager from the sidecar. + return initialize_camoufox_manager(&options).await; + } + let (ws_url, process) = match engine { "lightpanda" => { let lp_options = LightpandaLaunchOptions { @@ -398,7 +459,6 @@ impl BrowserManager { let backend = BrowserBackend::Cdp(client.clone()); let mut manager = Self { backend, - client, browser_process: Some(process), ws_url, pages: Vec::new(), @@ -418,7 +478,7 @@ impl BrowserManager { if ignore_https_errors { let _ = manager - .client + .client() .send_command( "Security.setIgnoreCertificateErrors", Some(json!({ "ignore": true })), @@ -429,7 +489,7 @@ impl BrowserManager { if let Some(ref ua) = user_agent { let _ = manager - .client + .client() .send_command( "Emulation.setUserAgentOverride", Some(json!({ "userAgent": ua })), @@ -440,7 +500,7 @@ impl BrowserManager { if let Some(ref scheme) = color_scheme { let _ = manager - .client + .client() .send_command( "Emulation.setEmulatedMedia", Some(json!({ "features": [{ "name": "prefers-color-scheme", "value": scheme }] })), @@ -451,7 +511,7 @@ impl BrowserManager { if let Some(ref path) = download_path { let _ = manager - .client + .client() .send_command( "Browser.setDownloadBehavior", Some(json!({ "behavior": "allow", "downloadPath": path })), @@ -487,13 +547,12 @@ impl BrowserManager { ) -> Result { let ws_url = resolve_cdp_url(url).await?; let client = Arc::new(CdpClient::connect_with_headers(&ws_url, headers).await?); - let backend = BrowserBackend::Cdp(client.clone()); + let backend = BrowserBackend::Cdp(client); let stealth = std::env::var("AGENT_BROWSER_STEALTH") .map(|v| matches!(v.as_str(), "1" | "true" | "TRUE" | "True")) .unwrap_or(false); let mut manager = Self { backend, - client, browser_process: None, ws_url, pages: Vec::new(), @@ -531,7 +590,7 @@ impl BrowserManager { } async fn discover_and_attach_targets(&mut self) -> Result<(), String> { - self.client + self.client() .send_command_typed::<_, Value>( "Target.setDiscoverTargets", &SetDiscoverTargetsParams { discover: true }, @@ -540,7 +599,7 @@ impl BrowserManager { .await?; let result: GetTargetsResult = self - .client + .client() .send_command_typed("Target.getTargets", &json!({}), None) .await?; @@ -553,7 +612,7 @@ impl BrowserManager { if page_targets.is_empty() { // Create a new tab let result: CreateTargetResult = self - .client + .client() .send_command_typed( "Target.createTarget", &CreateTargetParams { @@ -564,7 +623,7 @@ impl BrowserManager { .await?; let attach_result: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -591,7 +650,7 @@ impl BrowserManager { } else { for target in &page_targets { let attach_result: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -628,12 +687,12 @@ impl BrowserManager { } async fn enable_domains(&self, session_id: &str) -> Result<(), String> { - self.client + self.client() .send_command_no_params("Page.enable", Some(session_id)) .await?; if self.stealth { let _ = self - .client + .client() .send_command( "Page.addScriptToEvaluateOnNewDocument", Some(json!({ "source": super::stealth::STEALTH_INIT_SCRIPT })), @@ -641,24 +700,24 @@ impl BrowserManager { ) .await; } - self.client + self.client() .send_command_no_params("Runtime.enable", Some(session_id)) .await?; // Resume the target if it is paused waiting for the debugger. // This is needed for real browser sessions (Chrome 144+) where targets // are paused after attach until explicitly resumed. No-op otherwise. let _ = self - .client + .client() .send_command_no_params("Runtime.runIfWaitingForDebugger", Some(session_id)) .await; - self.client + self.client() .send_command_no_params("Network.enable", Some(session_id)) .await?; // Enable auto-attach for cross-origin iframe support. // flatten: true gives each iframe its own session_id. // Ignored on engines that don't support it (e.g. Lightpanda). let _ = self - .client + .client() .send_command( "Target.setAutoAttach", Some(json!({ @@ -674,12 +733,12 @@ impl BrowserManager { /// Enable domains on a direct page connection (no session_id needed). async fn enable_domains_direct(&self) -> Result<(), String> { - self.client + self.client() .send_command_no_params("Page.enable", None) .await?; if self.stealth { let _ = self - .client + .client() .send_command( "Page.addScriptToEvaluateOnNewDocument", Some(json!({ "source": super::stealth::STEALTH_INIT_SCRIPT })), @@ -687,14 +746,14 @@ impl BrowserManager { ) .await; } - self.client + self.client() .send_command_no_params("Runtime.enable", None) .await?; let _ = self - .client + .client() .send_command_no_params("Runtime.runIfWaitingForDebugger", None) .await; - self.client + self.client() .send_command_no_params("Network.enable", None) .await?; Ok(()) @@ -708,11 +767,14 @@ impl BrowserManager { } pub async fn navigate(&mut self, url: &str, wait_until: WaitUntil) -> Result { + if self.backend.is_camoufox() { + return self.camoufox_navigate(url, wait_until).await; + } let session_id = self.active_session_id()?.to_string(); - let mut lifecycle_rx = self.client.subscribe(); + let mut lifecycle_rx = self.client().subscribe(); let nav_result: PageNavigateResult = self - .client + .client() .send_command_typed( "Page.navigate", &PageNavigateParams { @@ -754,6 +816,45 @@ impl BrowserManager { Ok(json!({ "url": page_url, "title": title })) } + /// Camoufox path for `navigate`: delegates to the sidecar's `page.goto` + /// command. In Unit 3 the response carries `{url, title}` so the CLI + /// output shape matches the Chrome path; richer parity (wait_until + /// options, redirect tracking) lands in Unit 4. + async fn camoufox_navigate( + &mut self, + url: &str, + wait_until: WaitUntil, + ) -> Result { + let wait_until_str = match wait_until { + WaitUntil::Load => "load", + WaitUntil::DomContentLoaded => "domcontentloaded", + WaitUntil::NetworkIdle => "networkidle", + WaitUntil::None => "none", + }; + let args = json!({ "url": url, "waitUntil": wait_until_str }); + let result = self.camoufox_client().call("page.goto", args).await?; + + let page_url = result + .get("url") + .and_then(|v| v.as_str()) + .unwrap_or(url) + .to_string(); + let title = result + .get("title") + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + + if let Ok(parsed) = url::Url::parse(&page_url) { + let origin = parsed.origin().ascii_serialization(); + if origin != "null" { + self.visited_origins.insert(origin); + } + } + + Ok(json!({ "url": page_url, "title": title })) + } + async fn wait_for_lifecycle( &self, wait_until: WaitUntil, @@ -819,7 +920,7 @@ impl BrowserManager { let session_id = self.active_session_id()?.to_string(); let result: EvaluateResult = self - .client + .client() .send_command_typed( "Runtime.evaluate", &EvaluateParams { @@ -852,18 +953,34 @@ impl BrowserManager { wait_until: WaitUntil, session_id: &str, ) -> Result<(), String> { - let mut rx = self.client.subscribe(); + let mut rx = self.client().subscribe(); self.wait_for_lifecycle(wait_until, session_id, &mut rx) .await } pub async fn close(&mut self) -> Result<(), String> { + if self.backend.is_camoufox() { + // Graceful path: tell the sidecar to close its browser and exit. + // The sidecar acknowledges the `close` command and then drops its + // shutdown event; we still need to wait on the child process + // separately (see CamoufoxProcess::wait_or_kill below). + let _ = self.camoufox_client().close().await; + if let Some(mut process) = self.browser_process.take() { + let timeout = std::time::Duration::from_secs(5); + let _ = tokio::task::spawn_blocking(move || { + process.wait_or_kill(timeout); + }) + .await; + } + return Ok(()); + } + if self.browser_process.is_some() { // Only send Browser.close when we launched the browser ourselves. // For external connections (--auto-connect, --cdp) we just disconnect // without shutting down the user's browser. let _ = self - .client + .client() .send_command_no_params("Browser.close", None) .await; } @@ -890,10 +1007,17 @@ impl BrowserManager { /// Checks if the CDP connection is alive by sending a simple command. /// Returns false if the command times out or fails. pub async fn is_connection_alive(&self) -> bool { + // Camoufox has no CDP connection; we use the sidecar child liveness + // as the proxy for "connection alive". If the handshake completed + // and the child hasn't been reaped, consider the session healthy. + if self.backend.is_camoufox() { + return self.browser_process.is_some(); + } + let timeout = tokio::time::Duration::from_secs(3); let result = tokio::time::timeout( timeout, - self.client + self.client() .send_command_no_params("Browser.getVersion", None), ) .await; @@ -948,8 +1072,16 @@ impl BrowserManager { return Ok(()); } + // Camoufox manages its page set via Playwright inside the sidecar; + // tab ids sync in Unit 5. In Unit 3 we short-circuit so the open + // flow (which calls `ensure_page` before navigate) doesn't panic — + // `camoufox_navigate` creates the page lazily in the sidecar. + if self.backend.is_camoufox() { + return Ok(()); + } + let result: CreateTargetResult = self - .client + .client() .send_command_typed( "Target.createTarget", &CreateTargetParams { @@ -960,7 +1092,7 @@ impl BrowserManager { .await?; let attach_result: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -1087,7 +1219,7 @@ impl BrowserManager { let target_url = url.unwrap_or("about:blank"); let result: CreateTargetResult = self - .client + .client() .send_command_typed( "Target.createTarget", &CreateTargetParams { @@ -1098,7 +1230,7 @@ impl BrowserManager { .await?; let attach: AttachToTargetResult = self - .client + .client() .send_command_typed( "Target.attachToTarget", &AttachToTargetParams { @@ -1149,7 +1281,7 @@ impl BrowserManager { // Bring tab to front let _ = self - .client + .client() .send_command("Page.bringToFront", None, Some(&session_id)) .await; @@ -1186,7 +1318,7 @@ impl BrowserManager { let closed_tab_id = page.tab_id; let closed_label = page.label.clone(); let _ = self - .client + .client() .send_command_typed::<_, Value>( "Target.closeTarget", &CloseTargetParams { @@ -1218,7 +1350,7 @@ impl BrowserManager { mobile: bool, ) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setDeviceMetricsOverride", Some(json!({ @@ -1235,7 +1367,7 @@ impl BrowserManager { // viewport, so resize the content area to match. if let Ok(target_id) = self.active_target_id() { if let Ok(window_info) = self - .client + .client() .send_command( "Browser.getWindowForTarget", Some(json!({ "targetId": target_id })), @@ -1245,7 +1377,7 @@ impl BrowserManager { { if let Some(window_id) = window_info.get("windowId").and_then(|v| v.as_i64()) { if let Err(e) = self - .client + .client() .send_command( "Browser.setContentsSize", Some(json!({ @@ -1268,7 +1400,7 @@ impl BrowserManager { pub async fn set_user_agent(&self, user_agent: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setUserAgentOverride", Some(json!({ "userAgent": user_agent })), @@ -1295,7 +1427,7 @@ impl BrowserManager { .collect(); params["features"] = Value::Array(features_arr); } - self.client + self.client() .send_command("Emulation.setEmulatedMedia", Some(params), Some(session_id)) .await?; Ok(()) @@ -1303,7 +1435,7 @@ impl BrowserManager { pub async fn bring_to_front(&self) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command("Page.bringToFront", None, Some(session_id)) .await?; Ok(()) @@ -1311,7 +1443,7 @@ impl BrowserManager { pub async fn set_timezone(&self, timezone_id: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setTimezoneOverride", Some(json!({ "timezoneId": timezone_id })), @@ -1323,7 +1455,7 @@ impl BrowserManager { pub async fn set_locale(&self, locale: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setLocaleOverride", Some(json!({ "locale": locale })), @@ -1340,7 +1472,7 @@ impl BrowserManager { accuracy: Option, ) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Emulation.setGeolocationOverride", Some(json!({ @@ -1355,7 +1487,7 @@ impl BrowserManager { } pub async fn grant_permissions(&self, permissions: &[String]) -> Result<(), String> { - self.client + self.client() .send_command( "Browser.grantPermissions", Some(json!({ "permissions": permissions })), @@ -1375,7 +1507,7 @@ impl BrowserManager { if let Some(text) = prompt_text { params["promptText"] = Value::String(text.to_string()); } - self.client + self.client() .send_command( "Page.handleJavaScriptDialog", Some(params), @@ -1399,7 +1531,7 @@ impl BrowserManager { .await?; let describe: Value = self - .client + .client() .send_command( "DOM.describeNode", Some(json!({ "objectId": object_id })), @@ -1413,7 +1545,7 @@ impl BrowserManager { .and_then(|v| v.as_i64()) .ok_or("Could not get backendNodeId for file input")?; - self.client + self.client() .send_command( "DOM.setFileInputFiles", Some(json!({ @@ -1430,7 +1562,7 @@ impl BrowserManager { pub async fn add_script_to_evaluate(&self, source: &str) -> Result { let session_id = self.active_session_id()?; let result = self - .client + .client() .send_command( "Page.addScriptToEvaluateOnNewDocument", Some(json!({ "source": source })), @@ -1517,7 +1649,7 @@ impl BrowserManager { pub async fn set_download_behavior(&self, download_path: &str) -> Result<(), String> { let session_id = self.active_session_id()?; - self.client + self.client() .send_command( "Browser.setDownloadBehavior", Some(json!({ @@ -1627,6 +1759,56 @@ async fn connect_cdp_with_retry( } } +async fn initialize_camoufox_manager(options: &LaunchOptions) -> Result { + use super::cdp::camoufox::{launch_camoufox_sidecar, CamoufoxLaunchOptions}; + + let mut extra = serde_json::Map::new(); + if let Some(ref scheme) = options.color_scheme { + // Camoufox accepts `locale` / various knobs; colour-scheme is exposed + // as a kwarg-level hint that the sidecar forwards to Playwright's + // BrowserContext options. The sidecar validates the key against its + // allowlist, so the launch fails cleanly if the kwarg is not supported. + extra.insert("color_scheme".to_string(), json!(scheme)); + } + if options.ignore_https_errors { + extra.insert("ignore_https_errors".to_string(), json!(true)); + } + + let cf_options = CamoufoxLaunchOptions { + headless: options.headless, + executable_path: options.executable_path.clone(), + proxy: options.proxy.as_ref().map(|s| json!({ "server": s })), + extra, + }; + + if options.stealth { + eprintln!( + "[agent-browser] warning: --stealth is redundant with --engine camoufox; \ + Camoufox's C++ stealth supersedes the JS init script — proceeding without \ + injected scripts." + ); + } + + let (process, client) = launch_camoufox_sidecar(&cf_options).await?; + let backend = BrowserBackend::Camoufox(client); + + Ok(BrowserManager { + backend, + browser_process: Some(BrowserProcess::Camoufox(process)), + ws_url: String::new(), + pages: Vec::new(), + active_page_index: 0, + default_timeout_ms: 25_000, + download_path: options.download_path.clone(), + ignore_https_errors: options.ignore_https_errors, + visited_origins: HashSet::new(), + next_tab_id: 1, + // Camoufox supersedes JS-injection stealth. Always false so + // `enable_domains` never tries to inject `STEALTH_INIT_SCRIPT`. + stealth: false, + }) +} + async fn initialize_lightpanda_manager( ws_url: String, process: BrowserProcess, @@ -1653,10 +1835,9 @@ async fn initialize_lightpanda_manager( }; let client = Arc::new(client); - let backend = BrowserBackend::Cdp(client.clone()); + let backend = BrowserBackend::Cdp(client); let mut manager = BrowserManager { backend, - client, browser_process: None, ws_url: ws_url.clone(), pages: Vec::new(), diff --git a/cli/src/native/camoufox_client.rs b/cli/src/native/camoufox_client.rs index 1261c8185..f74ab8287 100644 --- a/cli/src/native/camoufox_client.rs +++ b/cli/src/native/camoufox_client.rs @@ -1,24 +1,212 @@ -//! Placeholder client for the Camoufox backend. +//! Client for the Camoufox Python sidecar. //! -//! Unit 1 defines the type so that `BrowserBackend::Camoufox(Arc)` -//! compiles and action-layer dispatch can grow a `Camoufox` arm. The real -//! sidecar-driven implementation (reader/writer tasks, JSON-line protocol, -//! request/response demux) lands in Unit 3. - -/// Marker client for engine=camoufox. No state in Unit 1 — Unit 3 fills in -/// stdio handles to the Python sidecar, a pending-request map, and broadcast -/// channels for asynchronous `{"event": ...}` frames. +//! Speaks the JSON-line protocol documented in +//! `packages/camoufox-sidecar/camoufox_sidecar/protocol.py`: +//! +//! ```text +//! request: {"id": N, "cmd": "", "args": {...}} +//! response: {"id": N, "ok": true, "result": {...}} +//! {"id": N, "ok": false, "error": {"code": "...", "message": "..."}} +//! event: {"event": "", "data": {...}} +//! ``` +//! +//! The client owns the subprocess's stdin (writer) and stdout (reader task). +//! A monotonic request id plus a pending `HashMap` +//! demultiplexes responses back to the matching `call`. Asynchronous frames +//! (the `ready` event carries the sidecar pid; other events like +//! `page.console` are forwarded in later units) fan out on a broadcast +//! channel that callers can `subscribe` to. +//! +//! Errors from the sidecar arrive as `{code, message}` objects; we surface +//! them as `": "` strings to match the rest of the Rust +//! daemon's `Result<_, String>` convention. The error-code catalog the +//! sidecar may emit today is: +//! +//! - `invalid-frame` — malformed JSON on the wire +//! - `not-yet-supported` — unknown command (post-Unit 3 this is the +//! dominant failure while more commands are +//! ported in Units 4–5) +//! - `invalid-args` — well-formed frame with a bad args shape +//! - `unknown-launch-option` — launch kwarg not on the sidecar's allowlist +//! - `unsupported-launch-option` — explicitly rejected launch kwarg +//! (`persistent_context`, `user_data_dir`) +//! - `already-launched` — second `launch` without a `close` +//! - `camoufox-not-installed` — `import camoufox` failed or binary missing +//! - `launch-failed` — any other launch-time failure +//! - `internal-error` — uncaught exception inside a handler + +use std::collections::HashMap; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; + +use serde_json::{json, Value}; +use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader}; +use tokio::process::{ChildStdin, ChildStdout}; +use tokio::sync::{broadcast, oneshot}; + +const EVENT_CHANNEL_CAPACITY: usize = 64; +const DEFAULT_CALL_TIMEOUT: Duration = Duration::from_secs(30); +const CLOSE_TIMEOUT: Duration = Duration::from_secs(5); + +/// A named event forwarded from the sidecar. Callers that don't care about +/// events can simply never subscribe; the broadcast channel is bounded, so a +/// slow consumer that lags drops old events rather than backpressuring the +/// reader. +#[derive(Debug, Clone)] +pub struct CamoufoxEvent { + pub name: String, + pub data: Value, +} + +type PendingMap = Arc>>>>; + +/// Sidecar client. Cheap to clone via `Arc`; construct once per session. pub struct CamoufoxClient { - _private: (), + writer: tokio::sync::Mutex, + pending: PendingMap, + next_id: AtomicU64, + events: broadcast::Sender, + /// Signals the reader loop to shut down on Drop. + shutdown: Arc, + _reader: std::sync::Mutex>>, } impl CamoufoxClient { - /// Construct a stub client. This is the only way to produce a - /// `CamoufoxClient` in Unit 1; action-layer code that matches on - /// `BrowserBackend::Camoufox` surfaces a structured - /// `not-yet-implemented` error rather than touching this value. - pub fn stub() -> Self { - Self { _private: () } + /// Consume the sidecar's stdin/stdout handles, wait for the `ready` + /// event (with the reported pid), and spawn the background reader task. + /// On success returns `(Arc, Option)`. The pid is + /// `None` only if the sidecar omits it — older sidecars may, but current + /// ones always attach it. + pub async fn start( + stdin: ChildStdin, + stdout: ChildStdout, + ready_timeout: Duration, + ) -> Result<(Arc, Option), String> { + let mut reader = BufReader::new(stdout); + + // Read the first frame: must be the `ready` event. Anything else is + // either a protocol bug on the sidecar side or a premature exit, + // both of which we treat as a readiness failure so the error is + // actionable. + let ready_line = + tokio::time::timeout(ready_timeout, read_one_nonblank_line(&mut reader)) + .await + .map_err(|_| { + format!( + "timed out after {}ms waiting for camoufox-sidecar `ready` event", + ready_timeout.as_millis() + ) + })? + .map_err(|e| format!("reading first sidecar frame: {}", e))?; + + let ready_frame: Value = serde_json::from_str(&ready_line).map_err(|e| { + format!( + "first sidecar frame was not valid JSON: {} (frame: {:?})", + e, ready_line + ) + })?; + let pid = parse_ready_frame(&ready_frame)?; + + let (events_tx, _) = broadcast::channel(EVENT_CHANNEL_CAPACITY); + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let shutdown = Arc::new(tokio::sync::Notify::new()); + + let reader_task = spawn_reader(reader, pending.clone(), events_tx.clone(), shutdown.clone()); + + let client = Arc::new(Self { + writer: tokio::sync::Mutex::new(stdin), + pending, + next_id: AtomicU64::new(1), + events: events_tx, + shutdown, + _reader: std::sync::Mutex::new(Some(reader_task)), + }); + Ok((client, pid)) + } + + /// Send a request and await the response. Times out after + /// `DEFAULT_CALL_TIMEOUT` so a misbehaving sidecar cannot wedge a caller + /// indefinitely. + pub async fn call(&self, cmd: &str, args: Value) -> Result { + self.call_with_timeout(cmd, args, DEFAULT_CALL_TIMEOUT).await + } + + pub async fn call_with_timeout( + &self, + cmd: &str, + args: Value, + timeout: Duration, + ) -> Result { + let id = self.next_id.fetch_add(1, Ordering::SeqCst); + + let (tx, rx) = oneshot::channel(); + { + let mut pending = self.pending.lock().expect("camoufox pending map poisoned"); + pending.insert(id, tx); + } + + let frame = json!({ "id": id, "cmd": cmd, "args": args }); + let serialized = serde_json::to_string(&frame).expect("frame serializes"); + { + let mut writer = self.writer.lock().await; + writer + .write_all(serialized.as_bytes()) + .await + .map_err(|e| format!("sending `{}` to camoufox-sidecar: {}", cmd, e))?; + writer + .write_all(b"\n") + .await + .map_err(|e| format!("sending newline to camoufox-sidecar: {}", e))?; + writer + .flush() + .await + .map_err(|e| format!("flushing camoufox-sidecar stdin: {}", e))?; + } + + match tokio::time::timeout(timeout, rx).await { + Ok(Ok(result)) => result, + Ok(Err(_canceled)) => { + self.drop_pending(id); + Err(format!( + "camoufox-sidecar dropped response for `{}` (reader task exited)", + cmd + )) + } + Err(_) => { + self.drop_pending(id); + Err(format!( + "camoufox-sidecar `{}` timed out after {}ms", + cmd, + timeout.as_millis() + )) + } + } + } + + fn drop_pending(&self, id: u64) { + if let Ok(mut pending) = self.pending.lock() { + pending.remove(&id); + } + } + + /// Subscribe to events emitted by the sidecar. New subscribers only see + /// events sent after they subscribe (consistent with `tokio::broadcast` + /// semantics); the `ready` event is consumed during `start()` and does + /// not reach subscribers. + pub fn subscribe(&self) -> broadcast::Receiver { + self.events.subscribe() + } + + /// Send the `close` command and wait for its response. Note that + /// receiving the response does NOT imply the sidecar process has + /// exited — the sidecar sets its shutdown event right after responding, + /// and the OS-level reap happens shortly after. Callers must wait on the + /// process handle separately if they need that guarantee (see + /// `CamoufoxProcess::wait_or_kill`). + pub async fn close(&self) -> Result { + self.call_with_timeout("close", json!({}), CLOSE_TIMEOUT) + .await } } @@ -27,3 +215,265 @@ impl std::fmt::Debug for CamoufoxClient { f.debug_struct("CamoufoxClient").finish_non_exhaustive() } } + +impl Drop for CamoufoxClient { + fn drop(&mut self) { + // Signal the reader task to exit and fail any in-flight calls so + // awaiters get a clean error instead of hanging forever. + self.shutdown.notify_waiters(); + if let Ok(mut pending) = self.pending.lock() { + for (_, tx) in pending.drain() { + let _ = tx.send(Err( + "camoufox-sidecar client dropped while request was in flight".to_string(), + )); + } + } + if let Ok(mut slot) = self._reader.lock() { + if let Some(handle) = slot.take() { + handle.abort(); + } + } + } +} + +async fn read_one_nonblank_line( + reader: &mut R, +) -> std::io::Result { + loop { + let mut buf = String::new(); + let n = reader.read_line(&mut buf).await?; + if n == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "camoufox-sidecar stdout closed before first frame", + )); + } + let trimmed = buf.trim_end_matches(['\r', '\n']).to_string(); + if trimmed.trim().is_empty() { + continue; + } + return Ok(trimmed); + } +} + +fn parse_ready_frame(frame: &Value) -> Result, String> { + let event = frame + .get("event") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + format!( + "expected first frame to be a ready event, got: {}", + frame + ) + })?; + if event != "ready" { + return Err(format!( + "expected first frame to be `ready`, got event `{}`", + event + )); + } + let pid = frame + .get("data") + .and_then(|d| d.get("pid")) + .and_then(|v| v.as_u64()) + .and_then(|v| u32::try_from(v).ok()); + Ok(pid) +} + +fn spawn_reader( + mut reader: BufReader, + pending: PendingMap, + events: broadcast::Sender, + shutdown: Arc, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let mut line = String::new(); + loop { + line.clear(); + let read = tokio::select! { + biased; + _ = shutdown.notified() => { return; } + r = reader.read_line(&mut line) => r, + }; + match read { + Ok(0) => break, // stdout closed → sidecar exited + Ok(_) => { + let trimmed = line.trim_end_matches(['\r', '\n']).trim(); + if trimmed.is_empty() { + continue; + } + dispatch_frame(trimmed, &pending, &events); + } + Err(_) => break, + } + } + // Sidecar exited: fail every pending call so callers don't hang. + if let Ok(mut p) = pending.lock() { + for (_, tx) in p.drain() { + let _ = tx.send(Err( + "camoufox-sidecar closed stdout before responding".to_string() + )); + } + } + }) +} + +fn dispatch_frame(line: &str, pending: &PendingMap, events: &broadcast::Sender) { + let frame: Value = match serde_json::from_str(line) { + Ok(v) => v, + Err(e) => { + eprintln!( + "[agent-browser] camoufox-sidecar sent malformed JSON on stdout: {} ({:?})", + e, line + ); + return; + } + }; + + if let Some(event) = frame.get("event").and_then(|v| v.as_str()) { + let data = frame.get("data").cloned().unwrap_or(Value::Null); + // Ignore send errors: if no subscribers are attached we just drop the + // event, which is the intended behavior. + let _ = events.send(CamoufoxEvent { + name: event.to_string(), + data, + }); + return; + } + + let Some(id) = frame.get("id").and_then(|v| v.as_u64()) else { + // Responses must carry an id; if not, log and drop. + eprintln!( + "[agent-browser] camoufox-sidecar response had no id: {:?}", + line + ); + return; + }; + + let tx = { + let mut p = match pending.lock() { + Ok(g) => g, + Err(_) => return, + }; + p.remove(&id) + }; + + let Some(tx) = tx else { + // Late response for a request we already timed out on. + return; + }; + + let ok = frame.get("ok").and_then(|v| v.as_bool()).unwrap_or(false); + let result = if ok { + let value = frame.get("result").cloned().unwrap_or(Value::Null); + Ok(value) + } else { + let err = frame.get("error"); + let code = err + .and_then(|e| e.get("code")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + let message = err + .and_then(|e| e.get("message")) + .and_then(|v| v.as_str()) + .unwrap_or("no message provided"); + Err(format!("{}: {}", code, message)) + }; + let _ = tx.send(result); +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_ready_frame_extracts_pid() { + let frame = json!({"event": "ready", "data": {"pid": 12345}}); + assert_eq!(parse_ready_frame(&frame).unwrap(), Some(12345)); + } + + #[test] + fn parse_ready_frame_rejects_wrong_event() { + let frame = json!({"event": "closed", "data": {}}); + let err = parse_ready_frame(&frame).unwrap_err(); + assert!(err.contains("expected first frame to be `ready`")); + } + + #[test] + fn parse_ready_frame_rejects_response_frame() { + let frame = json!({"id": 1, "ok": true, "result": {}}); + let err = parse_ready_frame(&frame).unwrap_err(); + assert!(err.contains("ready event")); + } + + #[test] + fn parse_ready_frame_tolerates_missing_pid() { + let frame = json!({"event": "ready", "data": {}}); + assert_eq!(parse_ready_frame(&frame).unwrap(), None); + } + + #[test] + fn dispatch_frame_routes_response_to_pending() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, _) = broadcast::channel::(8); + + let (tx, rx) = oneshot::channel(); + pending.lock().unwrap().insert(7, tx); + + dispatch_frame( + r#"{"id":7,"ok":true,"result":{"hello":"world"}}"#, + &pending, + &events_tx, + ); + let got = rx.blocking_recv().unwrap().unwrap(); + assert_eq!(got["hello"], json!("world")); + assert!(pending.lock().unwrap().is_empty()); + } + + #[test] + fn dispatch_frame_surfaces_error_code() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, _) = broadcast::channel::(8); + + let (tx, rx) = oneshot::channel(); + pending.lock().unwrap().insert(9, tx); + + dispatch_frame( + r#"{"id":9,"ok":false,"error":{"code":"launch-failed","message":"boom"}}"#, + &pending, + &events_tx, + ); + let err = rx.blocking_recv().unwrap().unwrap_err(); + assert_eq!(err, "launch-failed: boom"); + } + + #[test] + fn dispatch_frame_fans_events() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, mut events_rx) = broadcast::channel::(8); + + dispatch_frame( + r#"{"event":"page.console","data":{"level":"warn","text":"hi"}}"#, + &pending, + &events_tx, + ); + + let evt = events_rx.try_recv().unwrap(); + assert_eq!(evt.name, "page.console"); + assert_eq!(evt.data["text"], json!("hi")); + } + + #[test] + fn dispatch_frame_ignores_unknown_id() { + let pending: PendingMap = Arc::new(Mutex::new(HashMap::new())); + let (events_tx, _) = broadcast::channel::(8); + + // Should not panic or crash. + dispatch_frame( + r#"{"id":999,"ok":true,"result":{}}"#, + &pending, + &events_tx, + ); + assert!(pending.lock().unwrap().is_empty()); + } +} diff --git a/cli/src/native/camoufox_embed/mod.rs b/cli/src/native/camoufox_embed/mod.rs new file mode 100644 index 000000000..09aded3f5 --- /dev/null +++ b/cli/src/native/camoufox_embed/mod.rs @@ -0,0 +1,171 @@ +//! Embedded Camoufox sidecar package. +//! +//! The full `camoufox_sidecar` Python package (multi-file, sibling imports) +//! is baked into the agent-browser binary via `include_dir!` so users who +//! install only the Rust binary still get a working sidecar to spawn. On +//! first launch we extract the tree into a version-keyed cache directory and +//! spawn `python3 /__main__.py` with `PYTHONPATH` pointed at the +//! extraction dir so sibling imports resolve. +//! +//! The extraction dir is keyed by the crate version so upgrades re-extract +//! deterministically. A `.extracted` sentinel marks a completed extraction; +//! subsequent launches observing the sentinel skip re-extraction so process +//! startup stays fast and the files' mtimes are stable. +//! +//! In E2B (and other environments where the sidecar is `pip install`'d) we +//! prefer `python3 -m camoufox_sidecar` and only fall back to the extracted +//! tree if the module import fails — handled in `camoufox_client.rs`. + +use std::fs; +use std::io; +use std::path::{Path, PathBuf}; + +use include_dir::{include_dir, Dir}; + +/// Embedded Python package. Path is resolved at compile time by `include_dir!` +/// against `$CARGO_MANIFEST_DIR` (the `cli/` crate root). +static SIDECAR_PACKAGE: Dir<'_> = + include_dir!("$CARGO_MANIFEST_DIR/../packages/camoufox-sidecar/camoufox_sidecar"); + +/// Filename written inside the extracted tree once extraction has completed +/// successfully. Its presence is the signal that the tree is safe to use. +const EXTRACTED_SENTINEL: &str = ".extracted"; + +/// Root of the version-keyed extraction tree for this crate build. The +/// sidecar package itself lives in a `camoufox_sidecar/` subdirectory of +/// this root; callers point `PYTHONPATH` at the root and spawn +/// `python3 -m camoufox_sidecar`. +pub fn extraction_root() -> io::Result { + let base = dirs::cache_dir().ok_or_else(|| { + io::Error::new( + io::ErrorKind::NotFound, + "no user cache directory available (dirs::cache_dir returned None)", + ) + })?; + Ok(base.join(format!( + "agent-browser/camoufox-sidecar-{}", + env!("CARGO_PKG_VERSION") + ))) +} + +/// Path to the extracted `camoufox_sidecar` Python package directory. This +/// is `extraction_root()/camoufox_sidecar/` — the name must stay in sync +/// with the Python import name (hence the underscore rather than the dash +/// the outer directory uses for the crate version). +pub fn package_dir() -> io::Result { + Ok(extraction_root()?.join("camoufox_sidecar")) +} + +/// Ensure the embedded sidecar package is laid out on disk and return the +/// PYTHONPATH root (the directory that contains the `camoufox_sidecar` +/// package). If the sentinel file is already present we skip extraction so +/// mtimes stay stable (see the "running twice in a row" test scenario in +/// the Camoufox engine plan). +/// +/// Extraction is best-effort atomic: we extract into a staging directory +/// and rename into place, so a crash mid-extraction cannot leave a +/// half-populated tree that is then reused on the next launch. +pub fn ensure_extracted() -> io::Result { + let root = extraction_root()?; + if is_already_extracted(&root) { + return Ok(root); + } + + if let Some(parent) = root.parent() { + fs::create_dir_all(parent)?; + } + + let staging = staging_dir_for(&root); + let _ = fs::remove_dir_all(&staging); + fs::create_dir_all(&staging)?; + let package_in_staging = staging.join("camoufox_sidecar"); + fs::create_dir_all(&package_in_staging)?; + + SIDECAR_PACKAGE.extract(&package_in_staging)?; + fs::write( + staging.join(EXTRACTED_SENTINEL), + env!("CARGO_PKG_VERSION").as_bytes(), + )?; + + if root.exists() { + let _ = fs::remove_dir_all(&root); + } + fs::rename(&staging, &root)?; + + Ok(root) +} + +/// True if `path` already hosts a successfully-extracted package. We check +/// the sentinel specifically because `camoufox_sidecar/__main__.py` alone +/// could be the remnant of an interrupted extraction. +fn is_already_extracted(path: &Path) -> bool { + path.join(EXTRACTED_SENTINEL).is_file() + && path.join("camoufox_sidecar").join("__main__.py").is_file() +} + +fn staging_dir_for(target: &Path) -> PathBuf { + let mut staging = target.as_os_str().to_owned(); + staging.push(format!(".staging-{}", std::process::id())); + PathBuf::from(staging) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Mutex; + + // `std::env::set_var` is process-global. Serialise the tests that touch + // $XDG_CACHE_HOME so parallel cargo test runs don't race. + static ENV_LOCK: Mutex<()> = Mutex::new(()); + + fn with_temp_cache(f: F) { + let guard = ENV_LOCK.lock().unwrap(); + let tmp = tempfile::tempdir().unwrap(); + let prev_xdg = std::env::var_os("XDG_CACHE_HOME"); + let prev_home = std::env::var_os("HOME"); + std::env::set_var("XDG_CACHE_HOME", tmp.path()); + std::env::set_var("HOME", tmp.path()); + f(tmp.path()); + if let Some(v) = prev_xdg { + std::env::set_var("XDG_CACHE_HOME", v); + } else { + std::env::remove_var("XDG_CACHE_HOME"); + } + if let Some(v) = prev_home { + std::env::set_var("HOME", v); + } else { + std::env::remove_var("HOME"); + } + drop(guard); + } + + #[test] + fn extracts_all_expected_files() { + with_temp_cache(|_| { + let root = ensure_extracted().unwrap(); + let pkg = root.join("camoufox_sidecar"); + assert!(pkg.join("__main__.py").is_file()); + assert!(pkg.join("protocol.py").is_file()); + assert!(pkg.join("session.py").is_file()); + assert!(root.join(EXTRACTED_SENTINEL).is_file()); + }); + } + + #[test] + fn second_call_is_idempotent_and_preserves_mtime() { + with_temp_cache(|_| { + let dir = ensure_extracted().unwrap(); + let marker = dir.join(EXTRACTED_SENTINEL); + let first = fs::metadata(&marker).unwrap().modified().unwrap(); + + // Small sleep so a re-extract would show up as a newer mtime on + // filesystems with second-level precision. + std::thread::sleep(std::time::Duration::from_millis(1100)); + + let dir2 = ensure_extracted().unwrap(); + assert_eq!(dir, dir2); + let second = fs::metadata(&marker).unwrap().modified().unwrap(); + assert_eq!(first, second, "sentinel mtime should be unchanged"); + }); + } +} diff --git a/cli/src/native/cdp/camoufox.rs b/cli/src/native/cdp/camoufox.rs index fb63b416a..5b40eed1d 100644 --- a/cli/src/native/cdp/camoufox.rs +++ b/cli/src/native/cdp/camoufox.rs @@ -1,33 +1,594 @@ -//! Camoufox sidecar process lifecycle (stubbed in Unit 1). +//! Camoufox Python sidecar: process lifecycle + launch pipeline. //! -//! Unit 1 only defines the type so `BrowserProcess::Camoufox(CamoufoxProcess)` -//! compiles. `BrowserManager::launch` returns a structured -//! `not-yet-implemented` error before anything tries to construct this value. -//! Unit 3 fills in the real Python-sidecar child-process lifecycle, mirroring -//! `LightpandaProcess`. +//! The sidecar is a long-lived `python3` child that holds a +//! Playwright+Camoufox browser open and speaks the JSON-line protocol +//! documented in `packages/camoufox-sidecar/camoufox_sidecar/protocol.py`. +//! This module mirrors `lightpanda.rs` in shape (process ownership, bounded +//! log drainer, structured readiness error) and adds the Python-specific +//! dispatch logic: `python3 -m camoufox_sidecar` first, with a fallback to +//! `python3 /__main__.py` + `PYTHONPATH` when the package is +//! not pip-installed. +//! +//! `CamoufoxProcess` owns the `Child` (and kills it on drop). The +//! `CamoufoxClient` that rides on top is constructed inside +//! `launch_camoufox_sidecar` from the child's stdio and returned alongside. +use std::collections::VecDeque; +use std::path::{Path, PathBuf}; +use std::process::Stdio; +use std::sync::{Arc, Mutex}; use std::time::Duration; -/// Placeholder for the Python sidecar subprocess. In Unit 3 this gains -/// ownership of the `std::process::Child`, the stdio handles, and bounded -/// log-drainer threads (mirrors `LightpandaProcess`). +use serde_json::{json, Value}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::{Child, Command}; + +use crate::native::camoufox_client::CamoufoxClient; +use crate::native::camoufox_embed; + +const READY_TIMEOUT: Duration = Duration::from_secs(15); +const LAUNCH_TIMEOUT: Duration = Duration::from_secs(60); +const MAX_LOG_LINES: usize = 40; +const GRACEFUL_EXIT_WAIT: Duration = Duration::from_millis(500); + +#[cfg(unix)] +const SIGNAL_TERMINATE: i32 = libc::SIGTERM; +#[cfg(unix)] +const SIGNAL_FORCE_KILL: i32 = libc::SIGKILL; +#[cfg(not(unix))] +const SIGNAL_TERMINATE: i32 = 15; +#[cfg(not(unix))] +const SIGNAL_FORCE_KILL: i32 = 9; + +/// Send `signal` to the process group led by `pid`. Because the sidecar is +/// spawned with `setpgid(0, 0)`, its pid and pgid are the same and this +/// hits every descendant (Python → Camoufox → plugin-container helpers). +#[cfg(unix)] +fn send_signal_to_group(pid: u32, signal: i32) { + unsafe { + libc::killpg(pid as libc::pid_t, signal); + } +} + +#[cfg(not(unix))] +fn send_signal_to_group(_pid: u32, _signal: i32) { + // Windows path not yet supported. The sidecar is not expected to run on + // Windows in Unit 3 (E2B is Linux, dev is Linux/macOS); when Windows + // support is added, use `TerminateProcess` + job objects here. +} + +/// Owns the Python sidecar subprocess and its stderr log drainer. +/// +/// `CamoufoxClient` owns the stdio half of the relationship (writer + +/// demultiplexing reader). `CamoufoxProcess` owns the OS-level child: it is +/// responsible for killing the process on drop so a panicking daemon cannot +/// leak a Python+Firefox grandchild tree. pub struct CamoufoxProcess { - _private: (), + child: Option, + /// PID reported by the sidecar's `ready` event. Mostly useful for + /// integration tests that assert the process tree is gone after close. + pub sidecar_pid: Option, + _stderr_drainer: Option>, + stderr_log: SharedLog, } impl CamoufoxProcess { + /// Best-effort terminate. Sends SIGTERM to the sidecar's process group + /// so the entire descendant tree (Python → Camoufox → plugin-container + /// helpers) shuts down together. Call `wait_or_kill` afterwards if you + /// need the OS-level reap to complete before returning. pub fn kill(&mut self) { - // No-op: Unit 1 cannot construct a live sidecar. Unit 3 replaces this - // with `child.kill()` + drainer teardown. + if let Some(pid) = self.child.as_ref().and_then(|c| c.id()) { + send_signal_to_group(pid, SIGNAL_TERMINATE); + } + } + + /// Graceful-then-forceful shutdown. Sends SIGTERM, waits up to + /// `timeout` for the sidecar (and its descendants) to exit, then + /// SIGKILLs the process group if anything is still alive. + /// + /// Purely synchronous — the caller is expected to invoke this from a + /// blocking context (e.g. `tokio::task::spawn_blocking`). We use raw + /// `libc::waitpid` rather than tokio's `Child::wait` because wiring + /// futures through a potentially-detached `tokio::spawn` was a + /// persistent source of racy teardown where the process wasn't + /// actually gone by the time `mgr.close()` returned. Synchronous + /// waitpid blocks on the kernel and returns deterministically. + pub fn wait_or_kill(&mut self, timeout: Duration) { + let Some(child) = self.child.take() else { + return; + }; + let Some(pid) = child.id() else { + // Already reaped; tokio may have taken the exit status. + return; + }; + // We own the Child here; dropping it at the end of this function + // is fine because we've already reaped the kernel-level process + // entry below. We don't need to hold it across the wait. + drop(child); + + send_signal_to_group(pid, SIGNAL_TERMINATE); + + #[cfg(unix)] + { + const POLL: Duration = Duration::from_millis(100); + let start = std::time::Instant::now(); + while start.elapsed() < timeout { + let mut status: libc::c_int = 0; + let ret = unsafe { + libc::waitpid(pid as libc::pid_t, &mut status, libc::WNOHANG) + }; + if ret == pid as libc::pid_t || ret == -1 { + return; + } + std::thread::sleep(POLL); + } + + send_signal_to_group(pid, SIGNAL_FORCE_KILL); + let mut status: libc::c_int = 0; + unsafe { libc::waitpid(pid as libc::pid_t, &mut status, 0) }; + } + } + + /// Snapshot of the last few stderr lines — used to build a detailed + /// error message when readiness times out or the child exits early. + pub fn snapshot_stderr(&self) -> Vec { + self.stderr_log + .lock() + .expect("stderr log poisoned") + .iter() + .cloned() + .collect() } - pub fn wait_or_kill(&mut self, _timeout: Duration) { - // No-op for the same reason as `kill`. + /// Non-blocking probe: has the sidecar subprocess exited? Also reaps + /// the zombie if so, matching Chrome/Lightpanda semantics. + pub fn has_exited(&mut self) -> bool { + let Some(child) = self.child.as_mut() else { + return true; + }; + matches!(child.try_wait(), Ok(Some(_))) } } impl Drop for CamoufoxProcess { + /// Synchronous cleanup path for the ungraceful case (daemon panic, the + /// `BrowserManager` being dropped without a `close()` call). Sends + /// SIGTERM to the sidecar's process group, waits briefly for its + /// asyncio cleanup + Playwright Firefox teardown to complete, and + /// escalates to SIGKILL if that times out. We use `libc::waitpid` + /// directly rather than `Child::wait` so this stays cheap in Drop — + /// spinning up a fresh tokio runtime from a destructor has historically + /// been a source of subtle deadlocks. fn drop(&mut self) { - self.kill(); + let Some(pid) = self.child.as_ref().and_then(|c| c.id()) else { + return; + }; + + send_signal_to_group(pid, SIGNAL_TERMINATE); + + #[cfg(unix)] + { + const DROP_GRACEFUL_WAIT: Duration = Duration::from_secs(3); + const DROP_POLL: Duration = Duration::from_millis(100); + + let start = std::time::Instant::now(); + let mut reaped = false; + while start.elapsed() < DROP_GRACEFUL_WAIT { + let mut status: libc::c_int = 0; + let ret = unsafe { + libc::waitpid(pid as libc::pid_t, &mut status, libc::WNOHANG) + }; + if ret == pid as libc::pid_t { + reaped = true; + break; + } + if ret == -1 { + // ECHILD = already reaped (e.g. by tokio), which is fine. + reaped = true; + break; + } + std::thread::sleep(DROP_POLL); + } + + if !reaped { + send_signal_to_group(pid, SIGNAL_FORCE_KILL); + let mut status: libc::c_int = 0; + unsafe { libc::waitpid(pid as libc::pid_t, &mut status, 0) }; + } + } + } +} + +type SharedLog = Arc>>; + +fn empty_log() -> SharedLog { + Arc::new(Mutex::new(VecDeque::with_capacity(MAX_LOG_LINES))) +} + +fn push_bounded(log: &SharedLog, line: String) { + let mut g = log.lock().expect("stderr log poisoned"); + if g.len() >= MAX_LOG_LINES { + g.pop_front(); + } + g.push_back(line); +} + +/// Validated Camoufox launch kwargs passed through to the sidecar `launch` +/// command. `args` contains exactly the object the sidecar will feed into +/// `AsyncCamoufox(**kwargs)`; the Python side re-validates against its own +/// allowlist so new options can be rolled out from the sidecar without a +/// Rust release. +#[derive(Debug, Default, Clone)] +pub struct CamoufoxLaunchOptions { + pub headless: bool, + pub executable_path: Option, + pub proxy: Option, + /// Extra allowed kwargs forwarded verbatim to the sidecar. Left open + /// (instead of strongly typed) because the sidecar already enforces the + /// allowlist; adding fields here just duplicates validation. + pub extra: serde_json::Map, +} + +impl CamoufoxLaunchOptions { + fn to_launch_args(&self) -> Value { + let mut args = serde_json::Map::new(); + args.insert("headless".to_string(), json!(self.headless)); + if let Some(path) = &self.executable_path { + args.insert("executable_path".to_string(), json!(path)); + } + if let Some(proxy) = &self.proxy { + args.insert("proxy".to_string(), proxy.clone()); + } + for (k, v) in &self.extra { + args.insert(k.clone(), v.clone()); + } + Value::Object(args) + } +} + +/// Launch the Python sidecar, wait for its `ready` event, then send the +/// `launch` command to bring up the Camoufox browser. Returns the owning +/// process handle paired with the client that the rest of the daemon drives. +/// +/// Failure cleans up the subprocess before returning; callers never receive +/// a `CamoufoxProcess` whose `ready` handshake did not complete. +pub async fn launch_camoufox_sidecar( + options: &CamoufoxLaunchOptions, +) -> Result<(CamoufoxProcess, Arc), String> { + let python = resolve_python_executable()?; + let extracted = camoufox_embed::ensure_extracted() + .map_err(|e| format!("Failed to extract embedded camoufox-sidecar: {}", e))?; + + let (mut child, dispatch) = spawn_sidecar(&python, &extracted).await?; + + let stdin = child + .stdin + .take() + .ok_or_else(|| "Failed to capture camoufox-sidecar stdin".to_string())?; + let stdout = child + .stdout + .take() + .ok_or_else(|| "Failed to capture camoufox-sidecar stdout".to_string())?; + let stderr = child + .stderr + .take() + .ok_or_else(|| "Failed to capture camoufox-sidecar stderr".to_string())?; + + let stderr_log = empty_log(); + let stderr_drainer = spawn_stderr_drainer(stderr, stderr_log.clone()); + + let (client, ready_pid) = match CamoufoxClient::start(stdin, stdout, READY_TIMEOUT).await { + Ok(c) => c, + Err(e) => { + let _ = child.start_kill(); + let _ = child.wait().await; + let stderr = snapshot(&stderr_log); + return Err(decorate_error( + format!("camoufox-sidecar failed readiness handshake: {}", e), + dispatch, + &stderr, + )); + } + }; + + let launch_args = options.to_launch_args(); + let launch_result = tokio::time::timeout(LAUNCH_TIMEOUT, client.call("launch", launch_args)) + .await + .map_err(|_| "Camoufox launch timed out after 60s".to_string()) + .and_then(|r| r); + + if let Err(err) = launch_result { + // Attempt a graceful close; if that fails, kill. + let _ = tokio::time::timeout(GRACEFUL_EXIT_WAIT, client.close()).await; + let _ = child.start_kill(); + let _ = child.wait().await; + let stderr = snapshot(&stderr_log); + return Err(decorate_error( + format!("Camoufox launch failed: {}", err), + dispatch, + &stderr, + )); + } + + Ok(( + CamoufoxProcess { + child: Some(child), + sidecar_pid: ready_pid, + _stderr_drainer: Some(stderr_drainer), + stderr_log, + }, + client, + )) +} + +/// Describes which invocation path the sidecar used. Retained only for the +/// error message — callers don't care beyond that. +#[derive(Debug, Clone)] +enum SidecarDispatch { + Module(String), + Script { script: PathBuf }, +} + +impl SidecarDispatch { + fn describe(&self) -> String { + match self { + SidecarDispatch::Module(m) => format!("python3 -m {}", m), + SidecarDispatch::Script { script } => { + format!("python3 {}", script.display()) + } + } + } +} + +/// Spawn the sidecar, trying `-m camoufox_sidecar` first (works when the +/// package is pip-installed, as in E2B) and falling back to the embedded +/// copy extracted to the user cache (works when only the Rust binary is +/// installed). +async fn spawn_sidecar( + python: &Path, + extracted: &Path, +) -> Result<(Child, SidecarDispatch), String> { + // Probe: can Python find `camoufox_sidecar` on its own? We do a cheap + // `-c "import camoufox_sidecar"` first so the fallback doesn't require + // swallowing a startup crash. + let probe_ok = tokio::time::timeout( + Duration::from_secs(5), + Command::new(python) + .args(["-c", "import camoufox_sidecar"]) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status(), + ) + .await + .ok() + .and_then(|r| r.ok()) + .map(|s| s.success()) + .unwrap_or(false); + + if probe_ok { + let child = build_command(python) + .args(["-m", "camoufox_sidecar"]) + .spawn() + .map_err(|e| format!("Failed to spawn `python3 -m camoufox_sidecar`: {}", e))?; + return Ok(( + child, + SidecarDispatch::Module("camoufox_sidecar".to_string()), + )); + } + + // Fallback: `extracted` is the PYTHONPATH root — it contains a + // `camoufox_sidecar/` package directory. We set PYTHONPATH and invoke + // `python3 -m camoufox_sidecar` so Python loads the module as a proper + // package (relative imports like `from .protocol import ...` resolve). + let package_init = extracted.join("camoufox_sidecar").join("__main__.py"); + if !package_init.is_file() { + return Err(format!( + "Embedded camoufox-sidecar is missing __main__.py at {}", + package_init.display() + )); + } + let pythonpath = prepend_pythonpath(extracted); + + let child = build_command(python) + .args(["-m", "camoufox_sidecar"]) + .env("PYTHONPATH", pythonpath) + .spawn() + .map_err(|e| { + format!( + "Failed to spawn fallback `python3 -m camoufox_sidecar` (PYTHONPATH={}): {}", + extracted.display(), + e + ) + })?; + Ok(( + child, + SidecarDispatch::Script { + script: package_init, + }, + )) +} + +fn build_command(python: &Path) -> Command { + let mut cmd = Command::new(python); + cmd.stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + // Python must flush stdout after every frame — otherwise the sidecar + // protocol deadlocks on buffered output. The sidecar itself calls + // `sys.stdout.flush()` but we set this too as belt-and-braces. + .env("PYTHONUNBUFFERED", "1"); + + // Make the sidecar the leader of its own process group so we can signal + // the entire descendant tree (Python → Camoufox → plugin-container + // helpers) with one kill. Without this, on macOS the Firefox + // grandchildren survive when we SIGKILL only the Python parent and + // leak across test runs. `kill_on_drop` is deliberately NOT set — it + // uses SIGKILL, which gives the sidecar no chance to run its asyncio + // cleanup (which is how Playwright closes Firefox cleanly). + #[cfg(unix)] + { + unsafe { + cmd.pre_exec(|| { + if libc::setpgid(0, 0) != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + }); + } + } + cmd +} + +fn prepend_pythonpath(dir: &Path) -> std::ffi::OsString { + let existing = std::env::var_os("PYTHONPATH"); + let sep = if cfg!(windows) { ";" } else { ":" }; + let mut out = std::ffi::OsString::from(dir.as_os_str()); + if let Some(v) = existing { + if !v.is_empty() { + out.push(sep); + out.push(v); + } + } + out +} + +/// Discovery order per the plan: env var → `python3` on PATH → error. +fn resolve_python_executable() -> Result { + if let Ok(v) = std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") { + if !v.is_empty() { + let p = PathBuf::from(v); + if p.exists() { + return Ok(p); + } + return Err(format!( + "AGENT_BROWSER_CAMOUFOX_PYTHON points to a path that does not exist: {}", + p.display() + )); + } + } + + #[cfg(unix)] + { + for candidate in ["python3", "python"] { + if let Ok(output) = std::process::Command::new("which").arg(candidate).output() { + if output.status.success() { + let path = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if !path.is_empty() { + return Ok(PathBuf::from(path)); + } + } + } + } + } + #[cfg(windows)] + { + for candidate in ["python3", "python"] { + if let Ok(output) = std::process::Command::new("where").arg(candidate).output() { + if output.status.success() { + let path = String::from_utf8_lossy(&output.stdout) + .lines() + .next() + .unwrap_or("") + .trim() + .to_string(); + if !path.is_empty() { + return Ok(PathBuf::from(path)); + } + } + } + } + } + + Err( + "Camoufox requires a Python 3 runtime with the `camoufox` package installed. \ + Set AGENT_BROWSER_CAMOUFOX_PYTHON to your python3 binary or install python3 on PATH. \ + See docs/engines/camoufox.md." + .to_string(), + ) +} + +fn spawn_stderr_drainer( + stderr: tokio::process::ChildStderr, + log: SharedLog, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + let mut reader = BufReader::new(stderr).lines(); + while let Ok(Some(line)) = reader.next_line().await { + push_bounded(&log, line); + } + }) +} + +fn snapshot(log: &SharedLog) -> Vec { + log.lock() + .expect("stderr log poisoned") + .iter() + .cloned() + .collect() +} + +fn decorate_error(message: String, dispatch: SidecarDispatch, stderr: &[String]) -> String { + let mut out = format!("{}\n dispatch: {}", message, dispatch.describe()); + if !stderr.is_empty() { + out.push_str(&format!( + "\n sidecar stderr (last {} lines):\n {}", + stderr.len(), + stderr.join("\n ") + )); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn launch_options_marshals_headless() { + let opts = CamoufoxLaunchOptions { + headless: true, + executable_path: Some("/tmp/cf".into()), + proxy: None, + extra: serde_json::Map::new(), + }; + let args = opts.to_launch_args(); + assert_eq!(args["headless"], json!(true)); + assert_eq!(args["executable_path"], json!("/tmp/cf")); + } + + #[test] + fn launch_options_preserves_extra() { + let mut extra = serde_json::Map::new(); + extra.insert("humanize".into(), json!(true)); + let opts = CamoufoxLaunchOptions { + headless: false, + executable_path: None, + proxy: None, + extra, + }; + let args = opts.to_launch_args(); + assert_eq!(args["humanize"], json!(true)); + } + + #[test] + fn resolve_python_returns_env_var_when_set() { + let tmp = tempfile::NamedTempFile::new().unwrap(); + std::env::set_var("AGENT_BROWSER_CAMOUFOX_PYTHON", tmp.path()); + let got = resolve_python_executable().unwrap(); + assert_eq!(got, tmp.path()); + std::env::remove_var("AGENT_BROWSER_CAMOUFOX_PYTHON"); + } + + #[test] + fn resolve_python_rejects_missing_env_path() { + std::env::set_var( + "AGENT_BROWSER_CAMOUFOX_PYTHON", + "/nonexistent/python3-no-such-file", + ); + let err = resolve_python_executable().unwrap_err(); + assert!(err.contains("does not exist")); + std::env::remove_var("AGENT_BROWSER_CAMOUFOX_PYTHON"); } } diff --git a/cli/src/native/e2e_tests.rs b/cli/src/native/e2e_tests.rs index 21b0d7a5b..76db6521d 100644 --- a/cli/src/native/e2e_tests.rs +++ b/cli/src/native/e2e_tests.rs @@ -4127,7 +4127,7 @@ async fn e2e_externally_opened_tab_detected() { // opens a tab while agent-browser is connected via --cdp. let browser = state.browser.as_ref().expect("browser should be launched"); let _: Value = browser - .client + .client() .send_command( "Target.createTarget", Some(json!({ "url": "data:text/html,

External Tab

" })), diff --git a/cli/src/native/mod.rs b/cli/src/native/mod.rs index f32a649df..202edab79 100644 --- a/cli/src/native/mod.rs +++ b/cli/src/native/mod.rs @@ -9,6 +9,8 @@ pub mod browser; #[allow(dead_code)] pub mod camoufox_client; #[allow(dead_code)] +pub mod camoufox_embed; +#[allow(dead_code)] pub mod cdp; #[allow(dead_code)] pub mod cookies; diff --git a/cli/src/native/stream/chat.rs b/cli/src/native/stream/chat.rs index 884f22a8d..5336c16d5 100644 --- a/cli/src/native/stream/chat.rs +++ b/cli/src/native/stream/chat.rs @@ -145,7 +145,7 @@ RULES: - Keep responses concise. - For screenshots, omit the path argument so they save to the default location (which will be displayed inline). Screenshots from tool calls are ALREADY shown to the user. Do NOT re-display them with markdown image syntax in your text response. Never use `![...]()` to reference screenshots. - To create a new session: add `--session ` to any command (e.g. `agent-browser --session my-session open https://example.com`). If the session does not exist, it will be created automatically. -- To use a different browser engine: add `--engine ` (e.g. `agent-browser --session lp-session --engine lightpanda open https://example.com`). Supported engines: chrome (default), lightpanda. +- To use a different browser engine: add `--engine ` (e.g. `agent-browser --session lp-session --engine lightpanda open https://example.com`). Supported engines: chrome (default), lightpanda, camoufox. The following skill references describe agent-browser capabilities in detail. Use them when deciding which commands to run and how to approach tasks. {sections}"#, diff --git a/cli/src/output.rs b/cli/src/output.rs index 743638d85..db5f02d6f 100644 --- a/cli/src/output.rs +++ b/cli/src/output.rs @@ -3069,7 +3069,7 @@ Options: --action-policy Action policy JSON file (or AGENT_BROWSER_ACTION_POLICY) --confirm-actions Categories requiring confirmation (or AGENT_BROWSER_CONFIRM_ACTIONS) --confirm-interactive Interactive confirmation prompts; auto-denies if stdin is not a TTY (or AGENT_BROWSER_CONFIRM_INTERACTIVE) - --engine Browser engine: chrome (default), lightpanda (or AGENT_BROWSER_ENGINE) + --engine Browser engine: chrome (default), lightpanda, camoufox (or AGENT_BROWSER_ENGINE) --no-auto-dialog Disable automatic dismissal of alert/beforeunload dialogs (or AGENT_BROWSER_NO_AUTO_DIALOG) --stealth Mask common bot-detection signals (webdriver, chrome.runtime, plugins, languages, WebGL) (or AGENT_BROWSER_STEALTH) @@ -3133,7 +3133,7 @@ Environment: AGENT_BROWSER_CONFIRM_ACTIONS Action categories requiring confirmation AGENT_BROWSER_CONFIRM_INTERACTIVE Enable interactive confirmation prompts AGENT_BROWSER_NO_AUTO_DIALOG Disable automatic dismissal of alert/beforeunload dialogs - AGENT_BROWSER_ENGINE Browser engine: chrome (default), lightpanda + AGENT_BROWSER_ENGINE Browser engine: chrome (default), lightpanda, camoufox HTTP_PROXY / HTTPS_PROXY Standard proxy env vars (fallback if AGENT_BROWSER_PROXY not set) ALL_PROXY SOCKS proxy (fallback for proxy) NO_PROXY Bypass proxy for hosts (fallback for proxy-bypass) diff --git a/cli/tests/backend_refactor_smoke.rs b/cli/tests/backend_refactor_smoke.rs index 342ecf437..00678fdcd 100644 --- a/cli/tests/backend_refactor_smoke.rs +++ b/cli/tests/backend_refactor_smoke.rs @@ -1,12 +1,13 @@ -//! Smoke + characterization tests for the `BrowserBackend` refactor (Unit 1 -//! of the Camoufox engine plan). +//! Smoke + characterization tests for the `BrowserBackend` refactor (Units 1 +//! and 3 of the Camoufox engine plan). //! -//! These tests cover two things Unit 1 must guarantee: +//! These tests cover two things the refactor must guarantee: //! -//! 1. `agent-browser --engine camoufox open ` reaches the stub and -//! returns a **structured** `not-yet-implemented` error, not a panic. -//! This is the plan's exit criterion for Unit 1 and also the characterization -//! snapshot: every subsequent commit must keep this error shape stable. +//! 1. `agent-browser --engine camoufox open ` does **not** panic. It +//! must exit cleanly with a structured JSON error whose message +//! mentions Camoufox — either the Unit 1 `not-yet-implemented` stub or +//! the Unit 3 launch-failure message (Python missing / sidecar failed +//! readiness) depending on how much of the plan has landed. //! //! 2. Unknown engines are rejected with a message that enumerates //! `chrome, lightpanda, camoufox` — proves the launch dispatch table @@ -45,13 +46,20 @@ fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { } #[test] -fn camoufox_engine_returns_structured_not_yet_implemented_error() { +fn camoufox_engine_returns_structured_error_without_panic() { let tmp = TempDir::new().unwrap(); + // Point the sidecar at a non-existent python so Unit 3's launch path + // exits cleanly on environments that don't have Camoufox installed — + // CI will otherwise spend minutes in the Python probe. let output = build_cmd( &tmp, &["--engine", "camoufox", "--json", "open", "https://example.com"], ) + .env( + "AGENT_BROWSER_CAMOUFOX_PYTHON", + "/definitely/not/a/real/python3", + ) .output() .expect("failed to invoke agent-browser"); @@ -72,14 +80,14 @@ fn camoufox_engine_returns_structured_not_yet_implemented_error() { let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8"); - // JSON output must parse and carry the not-yet-implemented marker. + // JSON output must parse and carry a failure payload. let payload: serde_json::Value = serde_json::from_str(&stdout) .unwrap_or_else(|e| panic!("stdout was not JSON: {}\n---\n{}", e, stdout)); assert_eq!( payload.get("success").and_then(|v| v.as_bool()), Some(false), - "expected success:false for camoufox stub, got payload:\n{}", + "expected success:false for camoufox launch failure, got payload:\n{}", stdout ); @@ -87,9 +95,13 @@ fn camoufox_engine_returns_structured_not_yet_implemented_error() { .get("error") .and_then(|v| v.as_str()) .expect("payload must contain an error string"); + // Accept either the Unit 1 stub shape or the Unit 3 "python missing" + // shape; both are characterised by a mention of camoufox or the python + // env var we set above. assert!( - error.contains("not-yet-implemented") && error.contains("camoufox"), - "error message did not mention not-yet-implemented/camoufox: {:?}", + error.to_lowercase().contains("camoufox") + || error.contains("AGENT_BROWSER_CAMOUFOX_PYTHON"), + "error message did not mention camoufox/python: {:?}", error ); } diff --git a/cli/tests/camoufox_launch.rs b/cli/tests/camoufox_launch.rs new file mode 100644 index 000000000..c22d477ef --- /dev/null +++ b/cli/tests/camoufox_launch.rs @@ -0,0 +1,309 @@ +//! Camoufox engine integration tests (Unit 3 of the engine plan). +//! +//! Feature-gated: requires `--features camoufox-integration` to run, since +//! they spawn a real Python sidecar + Camoufox browser. On a development +//! machine set `AGENT_BROWSER_CAMOUFOX_PYTHON` to the venv under +//! `packages/camoufox-sidecar/.venv/bin/python3` so the tests don't depend +//! on the system Python. +//! +//! The non-gated tests in this file (error/validation paths that don't need +//! Camoufox installed) always run so regressions in Rust-side wiring surface +//! in CI. + +#![cfg_attr( + not(feature = "camoufox-integration"), + allow(dead_code, unused_imports) +)] + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +/// These tests run unconditionally — they exercise error paths that don't +/// depend on Camoufox being installed, so they catch plumbing regressions +/// without the integration harness. +mod rust_only { + use super::*; + + /// `--engine camoufox --extension foo.crx` must be rejected by + /// `validate_camoufox_options` with a clear message. This is the + /// "Error path" R4-parity test from the plan. + #[test] + fn rejects_extensions_with_camoufox() { + let tmp = TempDir::new().unwrap(); + let output = build_cmd( + &tmp, + &[ + "--engine", + "camoufox", + "--extension", + "/nonexistent/ext", + "--json", + "open", + "https://example.com", + ], + ) + // Pointing at a missing python short-circuits the launch path on + // test environments that don't have Camoufox installed, so the + // error comes from `validate_camoufox_options` rather than the + // sidecar spawn probe. + .env( + "AGENT_BROWSER_CAMOUFOX_PYTHON", + "/definitely/not/a/real/python3", + ) + .output() + .expect("invoke agent-browser"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("Extensions are not supported with Camoufox"), + "expected extensions-rejection error message, got: {}", + stdout + ); + } + + /// `AGENT_BROWSER_CAMOUFOX_PYTHON=/nonexistent` must surface an + /// actionable error and not partially start any process. + #[test] + fn missing_python_surfaces_actionable_error() { + let tmp = TempDir::new().unwrap(); + let output = build_cmd( + &tmp, + &["--engine", "camoufox", "--json", "open", "https://example.com"], + ) + .env("AGENT_BROWSER_CAMOUFOX_PYTHON", "/nonexistent/python3-xyz") + .output() + .expect("invoke agent-browser"); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("does not exist") + || stdout.contains("AGENT_BROWSER_CAMOUFOX_PYTHON"), + "expected python-not-found error, got: {}", + stdout + ); + // Must return a structured error (non-panic, non-signal exit). + assert_ne!(output.status.code(), Some(101), "should not panic"); + } +} + +// ----------------------------------------------------------------------------- +// Feature-gated integration tests. These require a real Camoufox install. +// ----------------------------------------------------------------------------- + +#[cfg(feature = "camoufox-integration")] +mod integration { + use super::*; + use std::sync::Mutex; + use std::thread::sleep; + use std::time::Duration; + + /// Integration tests share the Camoufox browser binary cache and can + /// each leak stray sidecar / Firefox processes if they run concurrently. + /// Cargo's default parallel runner would also make "no process leaked" + /// assertions non-deterministic because each test's ps snapshot would + /// see other tests' in-flight sidecars. Serialise the whole integration + /// suite behind this mutex so each test sees a clean slate. + static INTEGRATION_LOCK: Mutex<()> = Mutex::new(()); + + fn acquire() -> std::sync::MutexGuard<'static, ()> { + match INTEGRATION_LOCK.lock() { + Ok(g) => g, + Err(poisoned) => poisoned.into_inner(), + } + } + + fn fixture_python() -> Option { + // Prefer the package's dev venv if it exists — faster than spinning up + // a new environment per run. + let crate_root = env!("CARGO_MANIFEST_DIR"); + let repo_root = std::path::Path::new(crate_root).parent()?; + let venv_python = repo_root + .join("packages/camoufox-sidecar/.venv/bin/python3"); + if venv_python.is_file() { + return Some(venv_python); + } + std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") + .ok() + .map(std::path::PathBuf::from) + } + + fn cmd_with_python(tmp: &TempDir, args: &[&str]) -> Command { + let mut cmd = build_cmd(tmp, args); + if let Some(py) = fixture_python() { + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", py); + } + cmd + } + + /// Happy path: open + close completes, and the child Python/Firefox + /// processes belonging to our daemon are gone afterwards. + #[test] + fn open_and_close_cleans_up_children() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + + let open = cmd_with_python( + &tmp, + &[ + "--engine", + "camoufox", + "--session", + "ce_open", + "--json", + "open", + "https://example.com", + ], + ) + .output() + .expect("open"); + assert!( + open.status.success(), + "open failed: stdout={} stderr={}", + String::from_utf8_lossy(&open.stdout), + String::from_utf8_lossy(&open.stderr) + ); + let out = String::from_utf8_lossy(&open.stdout); + assert!( + out.contains("\"success\":true") || out.contains("\"success\": true"), + "open output did not indicate success: {}", + out + ); + + let close = cmd_with_python(&tmp, &["--session", "ce_open", "close"]) + .output() + .expect("close"); + assert!(close.status.success(), "close failed"); + + // Give the OS a moment to reap the grandchildren. + sleep(Duration::from_secs(2)); + let daemon_pids = pgrep_contains("agent-browser --daemon"); + assert!( + daemon_pids.is_empty(), + "daemon process survived close: {:?}", + daemon_pids + ); + let sidecar_pids = pgrep_contains("camoufox_sidecar"); + assert!( + sidecar_pids.is_empty(), + "camoufox_sidecar process survived close: {:?}", + sidecar_pids + ); + } + + /// Loop smoke test from the plan: open → close → reopen 10× with no + /// process leak between iterations. + #[test] + fn loop_smoke_no_process_leaks() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + + for iteration in 0..10 { + let open = cmd_with_python( + &tmp, + &[ + "--engine", + "camoufox", + "--session", + "ce_loop", + "--json", + "open", + "about:blank", + ], + ) + .output() + .unwrap_or_else(|e| panic!("iter {}: open failed: {}", iteration, e)); + assert!( + open.status.success(), + "iter {}: open non-zero: {}", + iteration, + String::from_utf8_lossy(&open.stdout) + ); + + let close = cmd_with_python(&tmp, &["--session", "ce_loop", "close"]) + .output() + .unwrap_or_else(|e| panic!("iter {}: close failed: {}", iteration, e)); + assert!(close.status.success(), "iter {}: close non-zero", iteration); + + sleep(Duration::from_secs(2)); + let sidecar_pids = pgrep_contains("camoufox_sidecar"); + assert!( + sidecar_pids.is_empty(), + "iter {}: camoufox_sidecar survived close: {:?}", + iteration, + sidecar_pids + ); + } + } + + /// `--stealth --engine camoufox` should still succeed. The warning + /// itself is emitted from the daemon process (not the CLI client), so + /// asserting on its text would require parsing the daemon debug log — + /// we leave the warning's string contents locked in by the unit tests + /// on `initialize_camoufox_manager` and limit this integration check + /// to the observable outcome: the combination does not fail. + #[test] + fn stealth_plus_camoufox_still_succeeds() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let out = cmd_with_python( + &tmp, + &[ + "--engine", + "camoufox", + "--stealth", + "--session", + "ce_stealth", + "--json", + "open", + "about:blank", + ], + ) + .output() + .expect("open"); + assert!( + out.status.success(), + "open with --stealth failed: {}", + String::from_utf8_lossy(&out.stdout) + ); + let _ = cmd_with_python(&tmp, &["--session", "ce_stealth", "close"]).output(); + } +} + +/// `pgrep -f ` returning the matching PIDs as strings. We prefer +/// `pgrep` over parsing `ps -A` output because `pgrep`'s exit code is +/// unambiguous (0 = found, 1 = none) and its matching scope is the full +/// command line, which is what we need to pick up `python -m camoufox_sidecar`. +#[cfg(feature = "camoufox-integration")] +fn pgrep_contains(needle: &str) -> Vec { + let output = Command::new("pgrep") + .args(["-f", needle]) + .output() + .expect("pgrep"); + String::from_utf8_lossy(&output.stdout) + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + .map(str::to_string) + .collect() +} diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py index e2556b03b..8abc6d7be 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py @@ -120,8 +120,13 @@ async def _cmd_launch(sidecar: "Sidecar", args: dict) -> dict: return await sidecar.session.launch(args) +async def _cmd_page_goto(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.goto(args) + + _HANDLERS: dict[str, Handler] = { "launch": _cmd_launch, + "page.goto": _cmd_page_goto, } diff --git a/packages/camoufox-sidecar/camoufox_sidecar/session.py b/packages/camoufox-sidecar/camoufox_sidecar/session.py index 54b882254..186fc8b70 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/session.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/session.py @@ -70,6 +70,7 @@ class Session: def __init__(self) -> None: self._camoufox_cm: Optional[Any] = None # AsyncCamoufox context manager self._browser: Optional[Any] = None + self._page: Optional[Any] = None # lazily created on first goto self._launched: bool = False @property @@ -138,6 +139,7 @@ async def close(self) -> dict: cm = self._camoufox_cm self._camoufox_cm = None self._browser = None + self._page = None self._launched = False if cm is None: return {"closed": False} @@ -149,6 +151,45 @@ async def close(self) -> dict: # leaving a half-closed state just masks the root cause. return {"closed": True} + async def goto(self, args: Optional[dict] = None) -> dict: + """Navigate the single session page to ``args['url']``. + + Unit 3 covers only single-tab open+close+goto as the smoke flow for + `agent-browser --engine camoufox open `. Multi-tab routing and + ref-aware snapshot/click ride on top of this in Units 4 and 5. + """ + if not self._launched or self._browser is None: + raise LaunchError( + "not-launched", + "Camoufox browser is not launched; send `launch` first", + ) + args = args or {} + url = args.get("url") + if not isinstance(url, str) or not url: + raise LaunchError( + "invalid-args", + "`page.goto` requires a non-empty `url` string", + ) + wait_until = args.get("waitUntil", "load") + if wait_until == "none": + wait_until = "commit" + + if self._page is None: + self._page = await self._browser.new_page() + + try: + response = await self._page.goto(url, wait_until=wait_until) + except Exception as exc: # noqa: BLE001 + raise LaunchError("navigation-failed", str(exc)) from exc + + try: + title = await self._page.title() + except Exception: # noqa: BLE001 + title = "" + final_url = self._page.url + status = response.status if response is not None else None + return {"url": final_url, "title": title, "status": status} + def _validate_launch_args(args: dict) -> dict: if not isinstance(args, dict): From 921311d47a36ce8598a3cb14ec0e7b8c5ba67afb Mon Sep 17 00:00:00 2001 From: davide Date: Mon, 20 Apr 2026 22:15:06 -0400 Subject: [PATCH 4/9] =?UTF-8?q?feat(camoufox):=20core=20command=20surface?= =?UTF-8?q?=20=E2=80=94=20navigate,=20snapshot,=20click,=20fill,=20gettext?= =?UTF-8?q?=20(Unit=204)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports the five command families the celeria coding sandbox relies on onto the Camoufox sidecar path. Both @eN-ref and CSS-selector invocations work for click/fill/gettext, and navigation invalidates the sidecar-owned ElementHandle cache so stale refs surface as ``{"code": "ref-stale"}`` instead of silently rebinding to reloaded elements. Python sidecar: - refs.py owns a ``RefCache`` keyed by @eN with frame.navigated invalidation - snapshot.py walks the DOM in a single ``page.evaluate``, tags ref-worthy elements with ``data-__ab-ref``, and re-resolves each ref to an ElementHandle on the Python side - session.py grows ``snapshot``, ``click``, ``fill``, ``get_text`` handlers with Playwright-error → structured-code translation (ambiguous-selector, selector-not-found, element-detached, timeout) - __main__.py wires the new commands and a ``page.navigate`` alias Rust action layer: - handle_snapshot, handle_click, handle_fill, handle_gettext each grow a Camoufox arm that forwards to the sidecar via ``BrowserManager::camoufox_client().call(...)``; the Rust RefMap is mirrored from the sidecar response so screenshot-annotation / diff paths keep working - handle_navigate rejects per-request --headers on Camoufox (Fetch.* is Chrome-only) rather than panicking through ``mgr.client()`` Tests: - cli/tests/fixtures/form.html + form-chrome-golden.json — shared Chrome↔Camoufox parity fixture - cli/tests/camoufox_parity.rs (gated on camoufox-integration feature): structural role/name set parity against the Chrome golden, ref-based click/fill/get-text roundtrip, CSS-selector path without snapshot, ref-stale after navigation - packages/camoufox-sidecar/tests/test_commands.py — all 9 Unit 4 scenarios from the plan (snapshot, click-by-ref, fill-by-ref, click-by-selector, gettext, ref-stale, ambiguous-selector, selector-not-found, cross-nav integration) --- cli/src/native/actions.rs | 75 +++- cli/src/native/browser.rs | 6 +- cli/tests/camoufox_parity.rs | 313 ++++++++++++++++ cli/tests/fixtures/form-chrome-golden.json | 34 ++ cli/tests/fixtures/form.html | 24 ++ .../camoufox_sidecar/__main__.py | 21 ++ .../camoufox-sidecar/camoufox_sidecar/refs.py | 110 ++++++ .../camoufox_sidecar/session.py | 268 +++++++++++++- .../camoufox_sidecar/snapshot.py | 334 ++++++++++++++++++ .../camoufox-sidecar/tests/test_commands.py | 264 ++++++++++++++ 10 files changed, 1432 insertions(+), 17 deletions(-) create mode 100644 cli/tests/camoufox_parity.rs create mode 100644 cli/tests/fixtures/form-chrome-golden.json create mode 100644 cli/tests/fixtures/form.html create mode 100644 packages/camoufox-sidecar/camoufox_sidecar/refs.py create mode 100644 packages/camoufox-sidecar/camoufox_sidecar/snapshot.py create mode 100644 packages/camoufox-sidecar/tests/test_commands.py diff --git a/cli/src/native/actions.rs b/cli/src/native/actions.rs index d4775c866..dbd41a059 100644 --- a/cli/src/native/actions.rs +++ b/cli/src/native/actions.rs @@ -2208,6 +2208,11 @@ async fn handle_navigate(cmd: &Value, state: &mut DaemonState) -> Result Result { async fn handle_snapshot(cmd: &Value, state: &mut DaemonState) -> Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; + + // Camoufox path: the sidecar owns snapshot + ref assignment. The Rust + // side mirrors the returned `refs` map into `state.ref_map` so that + // anything on the Rust side that introspects ref metadata (diffing, + // screenshot annotation, etc.) keeps working — even though click/fill + // themselves don't use the Rust ref_map on this engine. + if mgr.backend.is_camoufox() { + state.ref_map.clear(); + let args = json!({ + "interactive": cmd.get("interactive").and_then(|v| v.as_bool()).unwrap_or(false), + "selector": cmd.get("selector").and_then(|v| v.as_str()), + }); + let result = mgr.camoufox_client().call("page.snapshot", args).await?; + mirror_camoufox_refs_into(&result, &mut state.ref_map); + return Ok(result); + } + let session_id = mgr.active_session_id()?.to_string(); let options = SnapshotOptions { @@ -2469,6 +2491,27 @@ async fn handle_snapshot(cmd: &Value, state: &mut DaemonState) -> Result Result { let annotate = cmd .get("annotate") @@ -2591,6 +2634,25 @@ async fn handle_click(cmd: &Value, state: &mut DaemonState) -> Result Result Result Result { let mgr = state.browser.as_ref().ok_or("Browser not launched")?; - let session_id = mgr.active_session_id()?.to_string(); let selector = cmd .get("selector") .and_then(|v| v.as_str()) .ok_or("Missing 'selector' parameter")?; + if mgr.backend.is_camoufox() { + let args = json!({ "selector": selector }); + return mgr.camoufox_client().call("page.getText", args).await; + } + + let session_id = mgr.active_session_id()?.to_string(); let text = super::element::get_element_text( &mgr.backend, &session_id, diff --git a/cli/src/native/browser.rs b/cli/src/native/browser.rs index 27d6535c0..0ed142a29 100644 --- a/cli/src/native/browser.rs +++ b/cli/src/native/browser.rs @@ -379,9 +379,9 @@ impl BrowserManager { /// Camoufox sidecar client accessor. Symmetric with `client()` above; /// panics on a CDP backend. Camoufox-specific code paths in this module - /// (e.g. navigate-via-sidecar) use this after a `backend.is_camoufox()` - /// check. - fn camoufox_client(&self) -> &Arc { + /// and in `actions.rs` (e.g. handle_snapshot's Camoufox arm) use this + /// after a `backend.is_camoufox()` check. + pub fn camoufox_client(&self) -> &Arc { match &self.backend { BrowserBackend::Camoufox(c) => c, BrowserBackend::Cdp(_) => panic!( diff --git a/cli/tests/camoufox_parity.rs b/cli/tests/camoufox_parity.rs new file mode 100644 index 000000000..0a89faf28 --- /dev/null +++ b/cli/tests/camoufox_parity.rs @@ -0,0 +1,313 @@ +//! Rust-level parity + command-surface tests for the Camoufox engine (Unit 4 +//! of the engine plan). +//! +//! The *happy-path* tests drive an actual Camoufox browser via the `--engine +//! camoufox` CLI surface and compare the snapshot output against the Chrome +//! golden at `cli/tests/fixtures/form-chrome-golden.json`. They're gated on +//! `--features camoufox-integration` to match the existing Unit 3 suite at +//! `cli/tests/camoufox_launch.rs`. +//! +//! Structural parity is the contract we care about: +//! +//! - same number of `@eN` refs, +//! - same set of `(role, name)` pairs, +//! +//! **not** identical ref ordering. Chrome's accessibility tree walk ends up +//! visiting cursor-interactive elements after AX-native ones, so the Submit +//! button lands at `e3` on Chrome and `e6` on Camoufox for this fixture. +//! Comparing anything finer than "did both engines see the same set of +//! interactive things?" is a recipe for flakes on engine upgrades. + +#![cfg_attr( + not(feature = "camoufox-integration"), + allow(dead_code, unused_imports) +)] + +use std::process::Command; +use tempfile::TempDir; + +const BIN: &str = env!("CARGO_BIN_EXE_agent-browser"); + +fn build_cmd(tmp: &TempDir, args: &[&str]) -> Command { + let socket_dir = tmp.path().join("sockets"); + let home = tmp.path().join("home"); + std::fs::create_dir_all(&socket_dir).unwrap(); + std::fs::create_dir_all(&home).unwrap(); + + let mut cmd = Command::new(BIN); + cmd.args(args) + .env("AGENT_BROWSER_SOCKET_DIR", &socket_dir) + .env("HOME", &home) + .env("USERPROFILE", &home) + .env_remove("AGENT_BROWSER_PROVIDER") + .env_remove("AGENT_BROWSER_CDP") + .env_remove("AGENT_BROWSER_AUTO_CONNECT") + .env_remove("AGENT_BROWSER_ENGINE") + .env("NO_COLOR", "1"); + cmd +} + +#[cfg(feature = "camoufox-integration")] +mod integration { + use super::*; + use serde_json::Value; + use std::collections::BTreeSet; + use std::path::PathBuf; + use std::sync::Mutex; + use std::thread::sleep; + use std::time::Duration; + + /// Camoufox integration tests cannot run in parallel — they share the + /// same Camoufox browser cache and any overlapping ``ps`` probes (the + /// ``leak`` assertions in Unit 3) would race. Mirror the + /// ``camoufox_launch.rs`` INTEGRATION_LOCK so Cargo's default parallel + /// runner doesn't wedge the suite. + static INTEGRATION_LOCK: Mutex<()> = Mutex::new(()); + + fn acquire() -> std::sync::MutexGuard<'static, ()> { + match INTEGRATION_LOCK.lock() { + Ok(g) => g, + Err(poisoned) => poisoned.into_inner(), + } + } + + fn fixture_python() -> Option { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let repo_root = std::path::Path::new(crate_root).parent()?; + let venv_python = repo_root.join("packages/camoufox-sidecar/.venv/bin/python3"); + if venv_python.is_file() { + return Some(venv_python); + } + std::env::var("AGENT_BROWSER_CAMOUFOX_PYTHON") + .ok() + .map(PathBuf::from) + } + + fn cmd_with_python(tmp: &TempDir, args: &[&str]) -> Command { + let mut cmd = build_cmd(tmp, args); + if let Some(py) = fixture_python() { + cmd.env("AGENT_BROWSER_CAMOUFOX_PYTHON", py); + } + cmd + } + + fn fixture_url() -> String { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let p = std::path::Path::new(crate_root).join("tests/fixtures/form.html"); + format!("file://{}", p.display()) + } + + fn chrome_golden() -> Value { + let crate_root = env!("CARGO_MANIFEST_DIR"); + let p = std::path::Path::new(crate_root).join("tests/fixtures/form-chrome-golden.json"); + let raw = std::fs::read_to_string(p).expect("read chrome golden"); + serde_json::from_str(&raw).expect("parse chrome golden") + } + + fn role_name_set(refs: &Value) -> BTreeSet<(String, String)> { + let obj = refs.as_object().expect("refs is object"); + obj.values() + .map(|entry| { + let role = entry + .get("role") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let name = entry + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or("") + .trim() + .to_string(); + (role, name) + }) + .collect() + } + + fn session_args<'a>(session: &'a str, extras: &'a [&'a str]) -> Vec<&'a str> { + let mut v: Vec<&str> = vec!["--engine", "camoufox", "--session", session, "--json"]; + v.extend(extras); + v + } + + fn open_fixture(tmp: &TempDir, session: &str) { + let url = fixture_url(); + let open_args = ["open", url.as_str()]; + let args = session_args(session, &open_args); + let out = cmd_with_python(tmp, &args).output().expect("open"); + assert!( + out.status.success(), + "open failed: stdout={} stderr={}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + } + + fn run_json(tmp: &TempDir, session: &str, extras: &[&str]) -> Value { + let args: Vec<&str> = { + let mut v: Vec<&str> = vec!["--session", session, "--json"]; + v.extend(extras); + v + }; + let out = cmd_with_python(tmp, &args).output().expect("run_json"); + assert!( + out.status.success(), + "cmd {:?} failed: stdout={} stderr={}", + extras, + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr), + ); + let stdout = String::from_utf8_lossy(&out.stdout).to_string(); + serde_json::from_str(&stdout) + .unwrap_or_else(|e| panic!("invalid JSON response: {} — body: {}", e, stdout)) + } + + fn close(tmp: &TempDir, session: &str) { + let _ = cmd_with_python(tmp, &["--session", session, "close"]).output(); + sleep(Duration::from_secs(1)); + } + + /// Parity: snapshot role/name set matches Chrome golden on the form fixture. + #[test] + fn snapshot_refs_match_chrome_golden_on_fixture() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_refs"; + open_fixture(&tmp, session); + + let snap = run_json(&tmp, session, &["snapshot"]); + close(&tmp, session); + + let refs = snap + .get("data") + .and_then(|d| d.get("refs")) + .expect("response has data.refs"); + let got = role_name_set(refs); + + let golden = chrome_golden(); + let golden_refs = golden + .get("data") + .and_then(|d| d.get("refs")) + .expect("golden has data.refs"); + let expected = role_name_set(golden_refs); + + assert_eq!( + got, expected, + "Camoufox snapshot refs diverge from Chrome golden (set-level parity)", + ); + } + + /// Ref-based click+fill+gettext pipeline exercises the sidecar's ref cache + /// via the CLI surface. + #[test] + fn click_fill_gettext_by_ref_roundtrip() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_click"; + open_fixture(&tmp, session); + + let snap = run_json(&tmp, session, &["snapshot"]); + let refs = snap + .get("data") + .and_then(|d| d.get("refs")) + .and_then(|v| v.as_object()) + .expect("refs"); + let email_ref = refs + .iter() + .find(|(_, v)| { + v.get("role").and_then(|r| r.as_str()) == Some("textbox") + && v.get("name").and_then(|n| n.as_str()).map(str::trim) == Some("Email") + }) + .map(|(k, _)| k.clone()) + .expect("email textbox ref"); + let submit_ref = refs + .iter() + .find(|(_, v)| { + v.get("role").and_then(|r| r.as_str()) == Some("button") + && v.get("name").and_then(|n| n.as_str()).map(str::trim) == Some("Submit") + }) + .map(|(k, _)| k.clone()) + .expect("submit button ref"); + + let email_token = format!("@{}", email_ref); + let submit_token = format!("@{}", submit_ref); + + let _ = run_json( + &tmp, + session, + &["fill", &email_token, "test@example.com"], + ); + let _ = run_json(&tmp, session, &["click", &submit_token]); + + let status = run_json(&tmp, session, &["get", "text", "#status"]); + close(&tmp, session); + + let text = status + .get("data") + .and_then(|d| d.get("text")) + .and_then(|v| v.as_str()) + .unwrap_or_default(); + assert_eq!(text, "Submitted", "status didn't update after ref-click"); + } + + /// CSS-selector path: ``click "#submit"`` must work without a prior + /// snapshot. + #[test] + fn click_by_css_selector_without_snapshot() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_css"; + open_fixture(&tmp, session); + + let _ = run_json(&tmp, session, &["click", "#submit"]); + let status = run_json(&tmp, session, &["get", "text", "#status"]); + close(&tmp, session); + + let text = status + .get("data") + .and_then(|d| d.get("text")) + .and_then(|v| v.as_str()) + .unwrap_or_default(); + assert_eq!(text, "Submitted"); + } + + /// Stale-ref contract: refs from before a navigation must surface + /// ``ref-stale`` rather than silently acting on a reloaded element. + #[test] + fn ref_stale_after_navigation() { + let _guard = acquire(); + let tmp = TempDir::new().unwrap(); + let session = "cam_parity_stale"; + open_fixture(&tmp, session); + + let snap = run_json(&tmp, session, &["snapshot"]); + let refs = snap + .get("data") + .and_then(|d| d.get("refs")) + .and_then(|v| v.as_object()) + .expect("refs"); + let any_ref = refs.keys().next().cloned().expect("at least one ref"); + let token = format!("@{}", any_ref); + + // data: URL dodges the "navigating to about:blank from about:blank" + // Playwright interruption. + let _ = run_json( + &tmp, + session, + &["navigate", "data:text/html,after"], + ); + + // The CLI wraps non-zero `success:false` responses into a non-zero + // exit status, so we can't use `run_json`. Use a direct command. + let out = cmd_with_python(&tmp, &["--session", session, "--json", "click", &token]) + .output() + .expect("click after nav"); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("ref-stale"), + "expected ref-stale error, got: {}", + stdout + ); + close(&tmp, session); + } +} diff --git a/cli/tests/fixtures/form-chrome-golden.json b/cli/tests/fixtures/form-chrome-golden.json new file mode 100644 index 000000000..426511d73 --- /dev/null +++ b/cli/tests/fixtures/form-chrome-golden.json @@ -0,0 +1,34 @@ +{ + "success": true, + "data": { + "origin": "file:///Users/davide/git/agent-browser/cli/tests/fixtures/form.html", + "refs": { + "e1": { + "name": "Contact Form", + "role": "heading" + }, + "e2": { + "name": " Subscribe to updates", + "role": "checkbox" + }, + "e3": { + "name": "Submit", + "role": "button" + }, + "e4": { + "name": "Name", + "role": "textbox" + }, + "e5": { + "name": "Email", + "role": "textbox" + }, + "e6": { + "name": "Message", + "role": "textbox" + } + }, + "snapshot": "- heading \"Contact Form\" [level=1, ref=e1]\n- paragraph\n - StaticText \"Fill out the form below.\"\n- generic\n - LabelText\n - StaticText \"Name\"\n - textbox \"Name\" [ref=e4]\n - LabelText\n - StaticText \"Email\"\n - textbox \"Email\" [ref=e5]\n - LabelText\n - StaticText \"Message\"\n - textbox \"Message\" [ref=e6]\n - checkbox \" Subscribe to updates\" [checked=false, ref=e2]\n - button \"Submit\" [ref=e3]\n- paragraph\n - StaticText \"Idle\"" + }, + "error": null +} diff --git a/cli/tests/fixtures/form.html b/cli/tests/fixtures/form.html new file mode 100644 index 000000000..314bd84a9 --- /dev/null +++ b/cli/tests/fixtures/form.html @@ -0,0 +1,24 @@ + + + + + Form Parity Fixture + + +

Contact Form

+

Fill out the form below.

+
+ + + + + +
+

Idle

+ + + diff --git a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py index 8abc6d7be..fcdc53356 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/__main__.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/__main__.py @@ -124,9 +124,30 @@ async def _cmd_page_goto(sidecar: "Sidecar", args: dict) -> dict: return await sidecar.session.goto(args) +async def _cmd_page_snapshot(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.snapshot(args) + + +async def _cmd_page_click(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.click(args) + + +async def _cmd_page_fill(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.fill(args) + + +async def _cmd_page_get_text(sidecar: "Sidecar", args: dict) -> dict: + return await sidecar.session.get_text(args) + + _HANDLERS: dict[str, Handler] = { "launch": _cmd_launch, "page.goto": _cmd_page_goto, + "page.navigate": _cmd_page_goto, # alias for CDP-side naming parity + "page.snapshot": _cmd_page_snapshot, + "page.click": _cmd_page_click, + "page.fill": _cmd_page_fill, + "page.getText": _cmd_page_get_text, } diff --git a/packages/camoufox-sidecar/camoufox_sidecar/refs.py b/packages/camoufox-sidecar/camoufox_sidecar/refs.py new file mode 100644 index 000000000..fbfa83f48 --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/refs.py @@ -0,0 +1,110 @@ +"""`@eN` ref cache for the Camoufox sidecar. + +The CDP path in agent-browser hands agents ``@e1``, ``@e2`` … tokens that +survive beyond the snapshot that created them, backed by Chrome's cross-tree +``backend_node_id`` identity. Playwright exposes no equivalent — an +``ElementHandle`` is the closest thing, and it has narrower semantics: handles +only remain valid while their element stays attached to the same document. + +Per the plan's Key Technical Decisions, the sidecar therefore: + + * caches an ``ElementHandle`` per ``@eN`` during ``page.snapshot``; + * clears the cache on ``frame.navigated`` so cross-navigation refs become + structurally unavailable rather than silently pointing at a new element; + * surfaces ``{"code": "ref-stale"}`` when a caller reaches for a ref that + is either missing from the cache or whose handle Playwright reports as + detached. + +This is a narrower semantic than Chrome's ``backend_node_id``. The narrower +shape is documented in ``docs/engines/camoufox.md`` (planned Unit 8) and +surfaces as ``ref-stale`` rather than a silent cross-navigation mismatch. +""" + +from __future__ import annotations + +import re +from typing import Any, Optional + + +# Recognise ``@e1``, ``e1``, or ``ref=e1`` — mirrors ``parse_ref`` on the +# Rust side (``cli/src/native/element.rs``) so both engines accept the same +# agent-facing token shapes. +_REF_RE = re.compile(r"^(?:@|ref=)?(e[0-9]+)$") + + +def parse_ref(selector_or_ref: str) -> Optional[str]: + """Return ``"eN"`` if the input looks like an agent-browser ref, else ``None``.""" + if not isinstance(selector_or_ref, str): + return None + match = _REF_RE.match(selector_or_ref.strip()) + return match.group(1) if match else None + + +class RefStale(Exception): + """Raised by ``RefCache.require`` when a ref is missing or detached.""" + + def __init__(self, message: str) -> None: + super().__init__(message) + self.message = message + + +class RefCache: + """ElementHandle cache keyed by ``@eN``. + + Cheap to construct; no background tasks. The owner is expected to call + :meth:`invalidate` whenever the browser navigates so callers see an honest + ``ref-stale`` error instead of a silently-rebound handle. + """ + + def __init__(self) -> None: + self._handles: dict[str, Any] = {} + self._metadata: dict[str, dict[str, Any]] = {} + self._next_id: int = 1 + + def __contains__(self, ref_id: str) -> bool: + return ref_id in self._handles + + def invalidate(self) -> None: + """Drop all cached handles. + + We don't ``await handle.dispose()`` here because callers hit this on + the sync ``framenavigated`` event path; Playwright cleans up detached + handles on its own. This method is cheap to call more than once. + """ + self._handles.clear() + self._metadata.clear() + self._next_id = 1 + + def next_ref_id(self) -> str: + ref_id = f"e{self._next_id}" + self._next_id += 1 + return ref_id + + def put(self, ref_id: str, handle: Any, *, role: str, name: str) -> None: + self._handles[ref_id] = handle + self._metadata[ref_id] = {"role": role, "name": name} + + def get(self, ref_id: str) -> Optional[Any]: + return self._handles.get(ref_id) + + def metadata(self, ref_id: str) -> Optional[dict[str, Any]]: + return self._metadata.get(ref_id) + + def entries(self) -> dict[str, dict[str, Any]]: + """Return a ``{ref_id: {role, name}}`` view suitable for the ``refs`` response field.""" + return {k: dict(v) for k, v in self._metadata.items()} + + def require(self, ref_id: str) -> Any: + """Return the handle for ``ref_id`` or raise :class:`RefStale`. + + Callers should catch Playwright errors when *using* the returned handle + and translate them into ``RefStale`` as well — this method only covers + the "not in cache" failure mode. + """ + handle = self._handles.get(ref_id) + if handle is None: + raise RefStale( + f"ref {ref_id!r} is not in the snapshot cache " + "(may have been invalidated by a navigation; re-snapshot)" + ) + return handle diff --git a/packages/camoufox-sidecar/camoufox_sidecar/session.py b/packages/camoufox-sidecar/camoufox_sidecar/session.py index 186fc8b70..5d3e12dec 100644 --- a/packages/camoufox-sidecar/camoufox_sidecar/session.py +++ b/packages/camoufox-sidecar/camoufox_sidecar/session.py @@ -1,8 +1,9 @@ """Session holds the AsyncCamoufox browser for the sidecar's lifetime. -Unit 2 owns the lifecycle (launch / close / cleanup); later units add the -command handlers (navigate, snapshot, click, ...). The launch-kwarg allowlist -lives here because it's the public contract with the Rust side. +Unit 2 owned lifecycle (launch / close); Unit 4 grows the per-page command +surface: snapshot, click, fill, get_text, navigate. The single +``self._page`` held here is a stopgap — Unit 5 (tabs) will replace it with a +per-tab map. """ from __future__ import annotations @@ -10,6 +11,8 @@ from typing import Any, Optional from .protocol import log +from .refs import RefCache, RefStale, parse_ref +from .snapshot import SnapshotError, take_snapshot # Allowlist derived from https://camoufox.com/python/usage/ — keep in sync with # the plan's Unit 2 Approach. New kwargs must be added deliberately so the @@ -49,6 +52,10 @@ } ) +# Default timeout for per-element actions (ms). Matches agent-browser's +# default_timeout_ms on the Rust side (see BrowserManager::launch). +DEFAULT_ACTION_TIMEOUT_MS: int = 25_000 + class LaunchError(Exception): """Structured error surfaced as a {"ok": false, "error": {...}} response.""" @@ -72,6 +79,7 @@ def __init__(self) -> None: self._browser: Optional[Any] = None self._page: Optional[Any] = None # lazily created on first goto self._launched: bool = False + self._ref_cache: RefCache = RefCache() @property def is_launched(self) -> bool: @@ -141,6 +149,7 @@ async def close(self) -> dict: self._browser = None self._page = None self._launched = False + self._ref_cache.invalidate() if cm is None: return {"closed": False} try: @@ -151,6 +160,37 @@ async def close(self) -> dict: # leaving a half-closed state just masks the root cause. return {"closed": True} + async def _ensure_page(self) -> Any: + if not self._launched or self._browser is None: + raise LaunchError( + "not-launched", + "Camoufox browser is not launched; send `launch` first", + ) + if self._page is None: + self._page = await self._browser.new_page() + self._wire_page_events(self._page) + return self._page + + def _wire_page_events(self, page: Any) -> None: + """Invalidate the ref cache on navigation and forward lifecycle events. + + Playwright's ``framenavigated`` fires for every frame, including + subframes, so we scope invalidation to main-frame navigations only. + Unit 4/5 adds ``page.console``/``page.crashed`` forwarding. + """ + + def _on_framenavigated(frame: Any) -> None: + try: + if frame == page.main_frame: + self._ref_cache.invalidate() + except Exception as exc: # noqa: BLE001 + log(f"framenavigated handler: {exc}") + + try: + page.on("framenavigated", _on_framenavigated) + except Exception as exc: # noqa: BLE001 + log(f"could not attach framenavigated handler: {exc}") + async def goto(self, args: Optional[dict] = None) -> dict: """Navigate the single session page to ``args['url']``. @@ -158,11 +198,6 @@ async def goto(self, args: Optional[dict] = None) -> dict: `agent-browser --engine camoufox open `. Multi-tab routing and ref-aware snapshot/click ride on top of this in Units 4 and 5. """ - if not self._launched or self._browser is None: - raise LaunchError( - "not-launched", - "Camoufox browser is not launched; send `launch` first", - ) args = args or {} url = args.get("url") if not isinstance(url, str) or not url: @@ -174,22 +209,229 @@ async def goto(self, args: Optional[dict] = None) -> dict: if wait_until == "none": wait_until = "commit" - if self._page is None: - self._page = await self._browser.new_page() + page = await self._ensure_page() + # Any navigation request invalidates prior refs, even before + # ``framenavigated`` fires; clearing here closes the window in which + # an agent could click on a stale ref after issuing ``navigate``. + self._ref_cache.invalidate() try: - response = await self._page.goto(url, wait_until=wait_until) + response = await page.goto(url, wait_until=wait_until) except Exception as exc: # noqa: BLE001 raise LaunchError("navigation-failed", str(exc)) from exc try: - title = await self._page.title() + title = await page.title() except Exception: # noqa: BLE001 title = "" - final_url = self._page.url + final_url = page.url status = response.status if response is not None else None return {"url": final_url, "title": title, "status": status} + # ------------------------------------------------------------------ + # Unit 4: core command surface + # ------------------------------------------------------------------ + + async def snapshot(self, args: Optional[dict] = None) -> dict: + args = args or {} + page = await self._ensure_page() + try: + return await take_snapshot( + page, + self._ref_cache, + interactive_only=bool(args.get("interactive", False)), + selector=args.get("selector"), + ) + except SnapshotError as exc: + raise LaunchError(exc.code, exc.message) from exc + + async def click(self, args: Optional[dict] = None) -> dict: + args = args or {} + selector_or_ref = _require_str(args, "selector") + button = args.get("button", "left") + click_count = int(args.get("clickCount", 1) or 1) + timeout = int(args.get("timeoutMs") or DEFAULT_ACTION_TIMEOUT_MS) + + page = await self._ensure_page() + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = self._require_ref(ref_id) + await _try_click_handle(handle, button, click_count, timeout) + else: + await _try_click_locator(page.locator(selector_or_ref), selector_or_ref, button, click_count, timeout) + return {"clicked": selector_or_ref} + + async def fill(self, args: Optional[dict] = None) -> dict: + args = args or {} + selector_or_ref = _require_str(args, "selector") + value = args.get("value") + if not isinstance(value, str): + raise LaunchError("invalid-args", "`fill` requires a string `value` argument") + timeout = int(args.get("timeoutMs") or DEFAULT_ACTION_TIMEOUT_MS) + + page = await self._ensure_page() + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = self._require_ref(ref_id) + await _try_fill_handle(handle, value, timeout) + else: + await _try_fill_locator(page.locator(selector_or_ref), selector_or_ref, value, timeout) + return {"filled": selector_or_ref} + + async def get_text(self, args: Optional[dict] = None) -> dict: + args = args or {} + selector_or_ref = _require_str(args, "selector") + timeout = int(args.get("timeoutMs") or DEFAULT_ACTION_TIMEOUT_MS) + + page = await self._ensure_page() + ref_id = parse_ref(selector_or_ref) + if ref_id is not None: + handle = self._require_ref(ref_id) + text = await _handle_text(handle, timeout) + else: + text = await _locator_text(page.locator(selector_or_ref), selector_or_ref, timeout) + return {"text": text, "origin": page.url} + + def _require_ref(self, ref_id: str) -> Any: + try: + return self._ref_cache.require(ref_id) + except RefStale as exc: + raise LaunchError("ref-stale", exc.message) from exc + + +# --------------------------------------------------------------------------- +# Internal helpers — kept module-level so Session stays focused on lifecycle +# and command dispatch, not Playwright error translation. +# --------------------------------------------------------------------------- + + +def _require_str(args: dict, key: str) -> str: + value = args.get(key) + if not isinstance(value, str) or not value: + raise LaunchError("invalid-args", f"missing required `{key}` string argument") + return value + + +def _classify_playwright_error(exc: Exception, selector_or_ref: str) -> LaunchError: + """Translate Playwright errors into agent-browser error codes. + + Keeping this logic in one place means new error codes (e.g. ``timeout``) + pick up the same behaviour across click/fill/get_text without each handler + reimplementing the pattern match. + """ + msg = str(exc) + lowered = msg.lower() + if "strict mode violation" in lowered or "resolved to" in lowered and "elements" in lowered: + # Try to parse the match count from the message ("resolved to N elements"). + import re + + match = re.search(r"resolved to\s+(\d+)\s+elements", msg) + count = int(match.group(1)) if match else 0 + return LaunchError( + "ambiguous-selector", + f"Selector {selector_or_ref!r} matched {count} elements; refine it or use a ref", + ) + if "element is not attached" in lowered or "node is detached" in lowered or "detached" in lowered: + return LaunchError("element-detached", msg) + if "timeout" in lowered and "exceeded" in lowered: + return LaunchError("timeout", msg) + if "no element matches" in lowered or "no elements match" in lowered: + return LaunchError("selector-not-found", msg) + return LaunchError("action-failed", msg) + + +async def _try_click_handle(handle: Any, button: str, click_count: int, timeout: int) -> None: + try: + await handle.click(button=button, click_count=click_count, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + + +async def _try_click_locator( + locator: Any, selector: str, button: str, click_count: int, timeout: int +) -> None: + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector!r} matched {count} elements; refine it or use a ref", + ) + try: + await locator.click(button=button, click_count=click_count, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + + +async def _try_fill_handle(handle: Any, value: str, timeout: int) -> None: + try: + await handle.fill(value, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + + +async def _try_fill_locator(locator: Any, selector: str, value: str, timeout: int) -> None: + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector!r} matched {count} elements; refine it or use a ref", + ) + try: + await locator.fill(value, timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + + +async def _handle_text(handle: Any, timeout: int) -> str: + # ElementHandle.text_content does not accept a ``timeout`` kwarg (only the + # Locator variant does) — if the handle is still live in the cache we + # already know the element is attached, so no additional timeout gymnastics + # are needed. + _ = timeout + try: + raw = await handle.text_content() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, "") from exc + return (raw or "").strip() + + +async def _locator_text(locator: Any, selector: str, timeout: int) -> str: + try: + count = await locator.count() + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + if count == 0: + raise LaunchError( + "selector-not-found", + f"Selector {selector!r} did not match any element", + ) + if count > 1: + raise LaunchError( + "ambiguous-selector", + f"Selector {selector!r} matched {count} elements; refine it or use a ref", + ) + try: + raw = await locator.text_content(timeout=timeout) + except Exception as exc: # noqa: BLE001 + raise _classify_playwright_error(exc, selector) from exc + return (raw or "").strip() + def _validate_launch_args(args: dict) -> dict: if not isinstance(args, dict): diff --git a/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py b/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py new file mode 100644 index 000000000..e4eb8fbde --- /dev/null +++ b/packages/camoufox-sidecar/camoufox_sidecar/snapshot.py @@ -0,0 +1,334 @@ +"""Accessibility snapshot for the Camoufox sidecar. + +Playwright's ``page.accessibility.snapshot()`` gives us a Firefox-side AX tree, +but it doesn't return ``ElementHandle`` instances, so we can't use it to drive +subsequent ``click``/``fill``/``gettext`` commands. + +Instead we run a single ``page.evaluate`` that: + + * walks the DOM; + * classifies each element using a minimal ARIA role mapping that mirrors + what Chrome's AX tree produces for the same markup; + * tags every ref-worthy element with ``data-__ab-ref="eN"``; + * returns one metadata row per tagged element. + +The Python side then resolves each row back to an ``ElementHandle`` via +``page.query_selector("[data-__ab-ref='eN']")`` and populates the +:class:`~camoufox_sidecar.refs.RefCache`. Future ``click``/``fill`` calls by +``@eN`` pull the handle out of the cache; by CSS selector they hit +``page.locator(selector)`` directly. + +The public contract is the same JSON shape the Chrome path emits on +``{"success": true, "data": { snapshot, origin, refs }}``: a preformatted text +tree, a navigation origin, and a ``{ref: {role, name}}`` map for agent +consumption. Parity is measured at the ``refs`` level — exact text matches +aren't expected because Firefox's AX tree differs structurally from Chrome's. +""" + +from __future__ import annotations + +from typing import Any, Optional + +from .refs import RefCache + + +# Interactive roles that always get a ref. Mirrors the Chrome-path +# ``INTERACTIVE_ROLES`` list in ``cli/src/native/snapshot.rs`` so the sidecar +# emits the same agent-facing role names Chrome does. +INTERACTIVE_ROLES: frozenset[str] = frozenset( + { + "button", + "link", + "textbox", + "checkbox", + "radio", + "combobox", + "listbox", + "menuitem", + "menuitemcheckbox", + "menuitemradio", + "option", + "searchbox", + "slider", + "spinbutton", + "switch", + "tab", + "treeitem", + } +) + +# Content roles that get a ref only when they carry a non-empty accessible name. +# Chrome includes ``heading`` + several landmarks here; v1 keeps the list small +# so parity against Firefox is tractable. The list intentionally does *not* +# include ``generic`` / ``group`` — those produce noise without names. +CONTENT_ROLES_WITH_NAMES: frozenset[str] = frozenset( + { + "heading", + "cell", + "gridcell", + "columnheader", + "rowheader", + "listitem", + "article", + "region", + "main", + "navigation", + } +) + + +# Executed inside the browser context. Returns a list of metadata dicts, one +# per ref-worthy element. The element retains a ``data-__ab-ref`` attribute so +# Python can re-resolve an ``ElementHandle`` for each ref via +# ``page.query_selector("[data-__ab-ref='eN']")``. The attribute is deliberately +# left in place until the next snapshot — Playwright ``Locator`` objects built +# from a ref selector stay valid as long as the page doesn't mutate the +# attribute away, and any mutation (navigation, innerHTML overwrite) is covered +# by the ``framenavigated`` invalidation. +_SNAPSHOT_JS = r""" +(({ interactiveRoles, contentRolesWithNames }) => { + const IMPLICIT_ROLES = { + 'a': 'link', + 'button': 'button', + 'select': 'combobox', + 'textarea': 'textbox', + 'h1': 'heading', 'h2': 'heading', 'h3': 'heading', 'h4': 'heading', + 'h5': 'heading', 'h6': 'heading', + 'nav': 'navigation', + 'main': 'main', + 'article': 'article', + 'li': 'listitem', + }; + const INTERACTIVE = new Set(interactiveRoles); + const CONTENT_WITH_NAMES = new Set(contentRolesWithNames); + + const roleFor = (el) => { + const explicit = el.getAttribute('role'); + if (explicit) return explicit.trim().toLowerCase(); + const tag = el.tagName.toLowerCase(); + if (tag === 'a') return el.hasAttribute('href') ? 'link' : null; + if (tag === 'input') { + const t = (el.getAttribute('type') || 'text').toLowerCase(); + if (t === 'checkbox') return 'checkbox'; + if (t === 'radio') return 'radio'; + if (t === 'button' || t === 'submit' || t === 'reset') return 'button'; + if (t === 'range') return 'slider'; + if (t === 'search') return 'searchbox'; + if (t === 'number') return 'spinbutton'; + if (t === 'hidden' || t === 'file') return null; + return 'textbox'; + } + return IMPLICIT_ROLES[tag] || null; + }; + + const stripRefAttr = (node) => { + // Clone and strip our own marker attribute plus any nested inputs so the + // wrapping