From 81433823914a91425441e592ac0f106ec215253f Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:13:17 -0700 Subject: [PATCH 01/16] feat(browser): ToolMiddleware extension point in ToolContext Co-Authored-By: Arch Linux --- src/browser/middleware.rs | 33 +++++++++ src/browser/mod.rs | 5 +- src/query_engine.rs | 60 +++++++++++++--- src/tools/mod.rs | 5 ++ tests/middleware_smoke.rs | 140 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 231 insertions(+), 12 deletions(-) create mode 100644 src/browser/middleware.rs create mode 100644 tests/middleware_smoke.rs diff --git a/src/browser/middleware.rs b/src/browser/middleware.rs new file mode 100644 index 0000000..7805214 --- /dev/null +++ b/src/browser/middleware.rs @@ -0,0 +1,33 @@ +//! ToolMiddleware — pluggable pre/post hook trait around every tool call. +//! +//! Default empty chain (no middlewares = existing behavior unchanged). +//! The approval gate and loop detector implement this trait. + +use async_trait::async_trait; +use serde_json::Value; +use std::sync::Arc; + +/// Result of a middleware's `before_tool` check. +#[derive(Debug, Clone)] +pub enum MiddlewareVerdict { + /// Allow the tool to proceed. + Allow, + /// Block the tool with an error reason. + Deny { reason: String }, + /// Request confirmation before proceeding. + /// Treated as Deny until the approval gate resolves it internally. + RequireConfirmation { reason: String, detail: String }, +} + +/// Extension point invoked before and after every tool execution. +#[async_trait] +pub trait ToolMiddleware: Send + Sync { + /// Called before a tool runs. Return `Deny` or `RequireConfirmation` to block. + async fn before_tool(&self, tool_name: &str, input: &Value) -> MiddlewareVerdict; + + /// Called after a tool runs with its output text. + async fn after_tool(&self, tool_name: &str, output: &str); +} + +/// Ordered list of middlewares applied to every tool call. +pub type MiddlewareChain = Vec>; diff --git a/src/browser/mod.rs b/src/browser/mod.rs index 21ee48b..6bc8889 100644 --- a/src/browser/mod.rs +++ b/src/browser/mod.rs @@ -1,9 +1,10 @@ //! Browser automation via Chrome DevTools Protocol. -pub mod cdp; -pub mod snapshot; pub mod actions; +pub mod cdp; pub mod element; pub mod extraction; +pub mod middleware; +pub mod snapshot; use anyhow::{Result, bail}; use cdp::CdpClient; diff --git a/src/query_engine.rs b/src/query_engine.rs index 66abad9..5732a31 100644 --- a/src/query_engine.rs +++ b/src/query_engine.rs @@ -5,6 +5,7 @@ use crate::api::{ApiBackend, MessagesRequest}; use crate::compact::{CompactNeeded, compact_needed, snip_compact, summarize_compact}; use crate::config::Config; use crate::rag; +use crate::browser::middleware::MiddlewareVerdict; use crate::tools::{DynTool, ToolContext}; use anyhow::{Context, Result}; use colored::Colorize; @@ -413,6 +414,40 @@ impl QueryEngine { } } + // ── Middleware before_tool check ────────────────────── + let mut middleware_denied = false; + for mw in &ctx.middlewares { + match mw.before_tool(name, input).await { + MiddlewareVerdict::Allow => {} + MiddlewareVerdict::Deny { reason } => { + results.push(ContentBlock::ToolResult { + tool_use_id: id.clone(), + content: vec![ToolResultContent::text(format!( + "Middleware denied: {reason}" + ))], + is_error: Some(true), + }); + middleware_denied = true; + break; + } + MiddlewareVerdict::RequireConfirmation { reason, .. } => { + // Treat as Deny until the approval gate resolves internally. + results.push(ContentBlock::ToolResult { + tool_use_id: id.clone(), + content: vec![ToolResultContent::text(format!( + "Middleware requires confirmation: {reason}" + ))], + is_error: Some(true), + }); + middleware_denied = true; + break; + } + } + } + if middleware_denied { + continue; + } + let tool = self.tools.iter().find(|t| t.name() == name); let output = match tool { @@ -423,6 +458,20 @@ impl QueryEngine { None => crate::tools::ToolOutput::error(format!("Unknown tool: {name}")), }; + // ── Middleware after_tool ───────────────────────────── + let output_text: String = output + .content + .iter() + .map(|c| { + let ToolResultContent::Text { text } = c; + text.as_str() + }) + .collect::>() + .join("\n"); + for mw in &ctx.middlewares { + mw.after_tool(name, &output_text).await; + } + if output.is_error && !self.stream_json_output { eprintln!( "{} {}", @@ -441,20 +490,11 @@ impl QueryEngine { // Emit tool_result event for stream-json + hook-events mode if self.include_hook_events && self.stream_json_output { - let result_text = output - .content - .iter() - .map(|c| { - let ToolResultContent::Text { text } = c; - text.as_str() - }) - .collect::>() - .join("\n"); let event = serde_json::json!({ "type": "tool_result", "name": name, "is_error": output.is_error, - "content": result_text, + "content": output_text, }); println!("{}", event); } diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 530a2c6..dc8de14 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -97,6 +97,10 @@ pub struct ToolContext { pub live_model: Option, pub live_api_key: Option, pub live_ollama_host: Option, + + /// Middleware chain: pre/post hooks around every tool call. + /// Default empty = no-op (existing behavior unchanged). + pub middlewares: crate::browser::middleware::MiddlewareChain, } impl std::fmt::Debug for ToolContext { @@ -126,6 +130,7 @@ impl ToolContext { live_model: None, live_api_key: None, live_ollama_host: None, + middlewares: Vec::new(), } } diff --git a/tests/middleware_smoke.rs b/tests/middleware_smoke.rs new file mode 100644 index 0000000..f640b1c --- /dev/null +++ b/tests/middleware_smoke.rs @@ -0,0 +1,140 @@ +//! Smoke tests for the ToolMiddleware trait. + +use async_trait::async_trait; +use rustyclaw::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use serde_json::Value; +use std::sync::{Arc, Mutex}; + +// ── RecordingMiddleware ────────────────────────────────────────────── + +/// Records all before/after calls for assertion. +struct RecordingMiddleware { + log: Arc>>, +} + +#[async_trait] +impl ToolMiddleware for RecordingMiddleware { + async fn before_tool(&self, tool_name: &str, _input: &Value) -> MiddlewareVerdict { + self.log + .lock() + .unwrap() + .push(format!("before:{tool_name}")); + MiddlewareVerdict::Allow + } + + async fn after_tool(&self, tool_name: &str, output: &str) { + self.log + .lock() + .unwrap() + .push(format!("after:{tool_name}:{output}")); + } +} + +#[tokio::test] +async fn recording_middleware_fires_before_and_after() { + let log = Arc::new(Mutex::new(Vec::::new())); + let mw = RecordingMiddleware { log: log.clone() }; + + let verdict = mw.before_tool("bash", &serde_json::json!({"cmd": "ls"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Allow)); + + mw.after_tool("bash", "file1.rs\nfile2.rs").await; + + let entries = log.lock().unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(entries[0], "before:bash"); + assert_eq!(entries[1], "after:bash:file1.rs\nfile2.rs"); +} + +// ── Denier ─────────────────────────────────────────────────────────── + +/// Always denies tool execution. +struct Denier { + reason: String, +} + +#[async_trait] +impl ToolMiddleware for Denier { + async fn before_tool(&self, _tool_name: &str, _input: &Value) -> MiddlewareVerdict { + MiddlewareVerdict::Deny { + reason: self.reason.clone(), + } + } + + async fn after_tool(&self, _tool_name: &str, _output: &str) { + // Never called when denied. + } +} + +#[tokio::test] +async fn denier_middleware_returns_deny() { + let mw = Denier { + reason: "not allowed".into(), + }; + + let verdict = mw.before_tool("file_write", &serde_json::json!({})).await; + match verdict { + MiddlewareVerdict::Deny { reason } => { + assert_eq!(reason, "not allowed"); + } + other => panic!("Expected Deny, got {other:?}"), + } +} + +// ── RequireConfirmation ────────────────────────────────────────────── + +struct ConfirmationRequester; + +#[async_trait] +impl ToolMiddleware for ConfirmationRequester { + async fn before_tool(&self, tool_name: &str, _input: &Value) -> MiddlewareVerdict { + MiddlewareVerdict::RequireConfirmation { + reason: format!("{tool_name} needs approval"), + detail: "destructive action".into(), + } + } + + async fn after_tool(&self, _tool_name: &str, _output: &str) {} +} + +#[tokio::test] +async fn require_confirmation_variant() { + let mw = ConfirmationRequester; + let verdict = mw.before_tool("bash", &serde_json::json!({})).await; + match verdict { + MiddlewareVerdict::RequireConfirmation { reason, detail } => { + assert_eq!(reason, "bash needs approval"); + assert_eq!(detail, "destructive action"); + } + other => panic!("Expected RequireConfirmation, got {other:?}"), + } +} + +// ── Chain ordering ─────────────────────────────────────────────────── + +#[tokio::test] +async fn middleware_chain_short_circuits_on_deny() { + let log = Arc::new(Mutex::new(Vec::::new())); + let recorder = Arc::new(RecordingMiddleware { log: log.clone() }); + let denier = Arc::new(Denier { + reason: "blocked".into(), + }); + + // Denier first, recorder second — recorder should never fire. + let chain: Vec> = vec![denier, recorder]; + + let input = serde_json::json!({}); + let mut denied = false; + for mw in &chain { + match mw.before_tool("bash", &input).await { + MiddlewareVerdict::Allow => {} + MiddlewareVerdict::Deny { .. } | MiddlewareVerdict::RequireConfirmation { .. } => { + denied = true; + break; + } + } + } + + assert!(denied); + assert!(log.lock().unwrap().is_empty(), "recorder should not have fired"); +} From 338758998bcfe3b7e87bfad274d7473dd54aa835 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:18:23 -0700 Subject: [PATCH 02/16] feat(browser): loop detector for autonomous mode stagnation Co-Authored-By: Arch Linux --- src/browser/loop_detector.rs | 95 ++++++++++++++++++++++++++++++++ src/browser/mod.rs | 1 + tests/loop_detector_tests.rs | 103 +++++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+) create mode 100644 src/browser/loop_detector.rs create mode 100644 tests/loop_detector_tests.rs diff --git a/src/browser/loop_detector.rs b/src/browser/loop_detector.rs new file mode 100644 index 0000000..41437f2 --- /dev/null +++ b/src/browser/loop_detector.rs @@ -0,0 +1,95 @@ +use sha2::{Digest, Sha256}; +use std::collections::VecDeque; + +const WINDOW_SIZE: usize = 10; +const REPEAT_THRESHOLD: usize = 3; + +const NUDGES: [&str; 3] = [ + "You seem to be repeating the same action with no effect. Try a different approach — \ + perhaps a different element, a different selector, or navigate to a different section.", + "This action has failed multiple times on the same page state. Consider: \ + (1) the element may be disabled or overlaid, (2) you may need to scroll first, \ + (3) try using JavaScript evaluation as a fallback, (4) the page may require authentication.", + "Stopping — the browser agent has repeated the same action 3 times with no progress. \ + The page may be stuck, require a CAPTCHA, or the target element may not be interactable.", +]; + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ActionFingerprint { + action_hash: String, + page_hash: String, +} + +pub struct LoopDetector { + window: VecDeque, + nudge_level: usize, +} + +impl LoopDetector { + pub fn new() -> Self { + Self { + window: VecDeque::new(), + nudge_level: 0, + } + } + + /// Record an action. Hashes action_type+target and page_text, appends to window, + /// pruning to WINDOW_SIZE. + pub fn record_action(&mut self, action_type: &str, target: &str, page_text: &str) { + let fp = ActionFingerprint { + action_hash: hash_string(&format!("{action_type}:{target}")), + page_hash: hash_string(page_text), + }; + self.window.push_back(fp); + if self.window.len() > WINDOW_SIZE { + self.window.pop_front(); + } + } + + /// Check whether the last REPEAT_THRESHOLD entries are identical. + /// Returns an escalating nudge string if stagnation is detected, None otherwise. + /// Nudge level advances each time stagnation is confirmed (capped at NUDGES.len()-1). + pub fn check_stagnation(&mut self) -> Option { + if self.window.len() < REPEAT_THRESHOLD { + return None; + } + let tail_start = self.window.len() - REPEAT_THRESHOLD; + let tail: Vec<_> = self.window.range(tail_start..).collect(); + let first = &tail[0]; + let all_same = tail.iter().all(|fp| fp == first); + if !all_same { + return None; + } + let level = self.nudge_level.min(NUDGES.len() - 1); + let nudge = NUDGES[level].to_string(); + self.nudge_level = (self.nudge_level + 1).min(NUDGES.len() - 1); + Some(nudge) + } + + /// Clear the window and reset escalation state. + pub fn reset(&mut self) { + self.window.clear(); + self.nudge_level = 0; + } + + pub fn window_len(&self) -> usize { + self.window.len() + } +} + +impl Default for LoopDetector { + fn default() -> Self { + Self::new() + } +} + +/// Public helper: SHA-256 of "{action_type}:{target}". +pub fn fingerprint_action(action_type: &str, target: &str, _extra: &str) -> String { + hash_string(&format!("{action_type}:{target}")) +} + +fn hash_string(s: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(s.as_bytes()); + format!("{:x}", hasher.finalize()) +} diff --git a/src/browser/mod.rs b/src/browser/mod.rs index 6bc8889..3faa5ee 100644 --- a/src/browser/mod.rs +++ b/src/browser/mod.rs @@ -3,6 +3,7 @@ pub mod actions; pub mod cdp; pub mod element; pub mod extraction; +pub mod loop_detector; pub mod middleware; pub mod snapshot; diff --git a/tests/loop_detector_tests.rs b/tests/loop_detector_tests.rs new file mode 100644 index 0000000..e72ab6d --- /dev/null +++ b/tests/loop_detector_tests.rs @@ -0,0 +1,103 @@ +use rustyclaw::browser::loop_detector::{fingerprint_action, LoopDetector}; + +// 1. Same inputs → same hash +#[test] +fn fingerprint_equality_same_inputs() { + let h1 = fingerprint_action("click", "#submit", ""); + let h2 = fingerprint_action("click", "#submit", ""); + assert_eq!(h1, h2); +} + +// 2. Different target → different hash +#[test] +fn fingerprint_differs_on_target() { + let h1 = fingerprint_action("click", "#submit", ""); + let h2 = fingerprint_action("click", "#cancel", ""); + assert_ne!(h1, h2); +} + +// 3. 11 records → window capped at 10 +#[test] +fn window_prunes_oldest_at_eleven() { + let mut d = LoopDetector::new(); + for i in 0..11 { + d.record_action("click", &format!("#{i}"), "page"); + } + assert_eq!(d.window_len(), 10); +} + +// 4. Three identical actions → level-1 nudge ("different approach") +#[test] +fn three_identical_trips_level_one_nudge() { + let mut d = LoopDetector::new(); + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected a nudge"); + assert!(nudge.contains("different approach"), "got: {nudge}"); +} + +// 5. After level 1, three more identical → level-2 nudge ("multiple times") +#[test] +fn three_more_identical_escalate_to_level_two() { + let mut d = LoopDetector::new(); + // Trigger level 1 + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); + // Trigger level 2 — window already has 3 identical, check_stagnation consumed level 0. + // We need to push 3 more identical entries so tail is still identical. + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected escalated nudge"); + assert!(nudge.contains("multiple times"), "got: {nudge}"); +} + +// 6. Third check → terminal "Stopping" nudge +#[test] +fn level_three_is_terminal_stop() { + let mut d = LoopDetector::new(); + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); // level 0 → emit NUDGES[0], nudge_level becomes 1 + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); // level 1 → emit NUDGES[1], nudge_level becomes 2 + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected terminal nudge"); + assert!(nudge.contains("Stopping"), "got: {nudge}"); +} + +// 7. reset() clears window and resets nudge level +#[test] +fn reset_clears_window_and_nudge_level() { + let mut d = LoopDetector::new(); + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); // consumes level 0 + d.reset(); + assert_eq!(d.window_len(), 0); + // After reset, 3 identical actions should produce level-1 nudge again + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected nudge after reset"); + assert!(nudge.contains("different approach"), "got: {nudge}"); +} + +// 8. Same action but different page_text each time → no stagnation +#[test] +fn page_change_breaks_stagnation() { + let mut d = LoopDetector::new(); + for i in 0..5 { + d.record_action("click", "#btn", &format!("page content {i}")); + } + assert!(d.check_stagnation().is_none()); +} From 89171351176ea130da72400de2e383546e5c8a03 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:23:32 -0700 Subject: [PATCH 03/16] feat(browser): destructive-action approval gate Co-Authored-By: Arch Linux --- src/browser/approval_gate.rs | 203 +++++++++++++++++++++++++++++++++++ src/browser/mod.rs | 1 + tests/approval_gate_tests.rs | 191 ++++++++++++++++++++++++++++++++ 3 files changed, 395 insertions(+) create mode 100644 src/browser/approval_gate.rs create mode 100644 tests/approval_gate_tests.rs diff --git a/src/browser/approval_gate.rs b/src/browser/approval_gate.rs new file mode 100644 index 0000000..34d248c --- /dev/null +++ b/src/browser/approval_gate.rs @@ -0,0 +1,203 @@ +//! Destructive-action approval gate for autonomous browser agent. +//! +//! Pattern-matches tool calls against URL paths, button text, form-field +//! signals, visible prices, and user-defined extension patterns. Read-only +//! tools always pass; everything else is checked against compiled RegexSets. + +use regex::{Regex, RegexSet}; + +/// Read-only tools that never require approval. +const READ_ONLY_TOOLS: &[&str] = &[ + "browser_navigate", + "browser_snapshot", + "browser_screenshot", + "browser_get_text", + "browser_wait", + "browse_done", +]; + +/// Verdict returned by the approval gate. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum GateVerdict { + Allow, + RequireConfirmation { reason: String, detail: String }, +} + +/// Context passed to the gate for each tool invocation. +#[derive(Debug, Clone, Default)] +pub struct GateContext { + pub tool_name: String, + pub url: String, + pub target_text: String, + /// e.g. "input:type=password", "input:autocomplete=cc-number" + pub form_field_signals: Vec, + /// e.g. "$12.99", "€0.00" + pub visible_prices: Vec, +} + +/// Compiled approval gate — all regexes are built once at construction. +pub struct ApprovalGate { + url_set: RegexSet, + button_set: RegexSet, + form_set: RegexSet, + price_re: Regex, + extra_patterns: Vec, +} + +// --- Built-in pattern strings --------------------------------------------------- + +fn url_patterns() -> Vec { + vec![ + r"/pay(ments?)?(/|\?|$)".into(), + r"/checkout(/|\?|$)".into(), + r"/purchase(/|\?|$)".into(), + r"/order-review".into(), + r"/billing/add-card".into(), + r"/wallet/transfer".into(), + r"/oauth/authorize".into(), + r"/consent".into(), + r"/authorize/grant".into(), + ] +} + +fn button_patterns() -> Vec { + vec![ + // Category 2 — payment / purchase + r"(?i)confirm (purchase|order|payment)".into(), + r"(?i)submit payment".into(), + r"(?i)place (my )?order".into(), + r"(?i)complete (purchase|order)".into(), + r"(?i)buy now".into(), + r"(?i)pay (now|\$)".into(), + r"(?i)start (free )?trial".into(), + r"(?i)try free for \d+ days?".into(), + r"(?i)upgrade (to premium|plan|account)".into(), + // Category 3 — account destruction + r"(?i)delete (account|repository|organization|workspace|project)".into(), + r"(?i)remove (account|user)".into(), + r"(?i)revoke (access|permissions|api key)".into(), + r"(?i)permanently delete".into(), + r"(?i)empty trash".into(), + r"(?i)cancel subscription".into(), + r"(?i)close account".into(), + r"(?i)deactivate".into(), + // Category 4 — publication / blast radius + r"(?i)post (tweet|publicly|to public)".into(), + r"(?i)publish (article|page|post)".into(), + r"(?i)go live".into(), + r"(?i)^tweet$".into(), + r"(?i)share publicly".into(), + r"(?i)send (email|message|invitation)".into(), + r"(?i)reply all".into(), + // Category 5 — OAuth / permission grants + r"(?i)(authorize|allow) (this )?(app|application|access)".into(), + r"(?i)grant (access|permissions)".into(), + r"(?i)i authorize".into(), + // Category 6 — contracts / legal + r"(?i)(accept|agree to) (terms|contract|agreement)".into(), + r"(?i)sign contract".into(), + r"(?i)sign electronically".into(), + r"(?i)i agree (and|to) (continue|proceed)".into(), + ] +} + +fn form_field_patterns() -> Vec { + vec![ + r"^input:type=password$".into(), + r"^input:autocomplete=cc-(number|exp|csc|name)$".into(), + r"^input:name=(card|cc|cvv|cvc|pin|ssn)$".into(), + r"^input:id=(card|cc|cvv|cvc|pin|ssn)$".into(), + ] +} + +const PRICE_PATTERN: &str = r"[\$€£]\s*(\d+\.\d{2}|\d+,\d{2})"; + +// --- Implementation ------------------------------------------------------------ + +impl Default for ApprovalGate { + fn default() -> Self { + Self::with_user_patterns(Vec::new()) + } +} + +impl ApprovalGate { + /// Create a gate, appending user-supplied regex patterns. + /// Invalid user patterns are logged to stderr and skipped. + pub fn with_user_patterns(user_patterns: Vec) -> Self { + let mut extras = Vec::new(); + for pat in &user_patterns { + match Regex::new(pat) { + Ok(re) => extras.push(re), + Err(e) => eprintln!("approval_gate: skipping invalid user pattern {pat:?}: {e}"), + } + } + + Self { + url_set: RegexSet::new(url_patterns()).expect("built-in URL patterns must compile"), + button_set: RegexSet::new(button_patterns()) + .expect("built-in button patterns must compile"), + form_set: RegexSet::new(form_field_patterns()) + .expect("built-in form patterns must compile"), + price_re: Regex::new(PRICE_PATTERN).expect("price pattern must compile"), + extra_patterns: extras, + } + } + + /// Evaluate a tool call context. Returns `Allow` or `RequireConfirmation`. + pub fn check(&self, c: &GateContext) -> GateVerdict { + // 1. Read-only tools always pass. + if READ_ONLY_TOOLS.contains(&c.tool_name.as_str()) { + return GateVerdict::Allow; + } + + let mut reasons: Vec = Vec::new(); + + // 2. URL patterns + if self.url_set.is_match(&c.url) { + reasons.push(format!("url_pattern: {}", c.url)); + } + + // 3. Button / text patterns (categories 2-6) + if self.button_set.is_match(&c.target_text) { + reasons.push(format!("button_text: {}", c.target_text)); + } + + // 4. Form field signals + for sig in &c.form_field_signals { + if self.form_set.is_match(sig) { + reasons.push(format!("form_field: {sig}")); + } + } + + // 5. Visible prices — skip zero amounts + for price in &c.visible_prices { + if let Some(caps) = self.price_re.captures(price) { + if let Some(amount_str) = caps.get(1) { + let normalized = amount_str.as_str().replace(',', "."); + if let Ok(val) = normalized.parse::() { + if val >= 0.01 { + reasons.push(format!("visible_price: {price}")); + } + } + } + } + } + + // 6. Extra (user-defined) patterns — checked against url + target_text + for re in &self.extra_patterns { + if re.is_match(&c.url) || re.is_match(&c.target_text) { + reasons.push(format!("user_pattern: {}", re.as_str())); + } + } + + // 7. Verdict + if reasons.is_empty() { + GateVerdict::Allow + } else { + GateVerdict::RequireConfirmation { + reason: reasons[0].clone(), + detail: reasons.join("; "), + } + } + } +} diff --git a/src/browser/mod.rs b/src/browser/mod.rs index 3faa5ee..e12e1c8 100644 --- a/src/browser/mod.rs +++ b/src/browser/mod.rs @@ -1,5 +1,6 @@ //! Browser automation via Chrome DevTools Protocol. pub mod actions; +pub mod approval_gate; pub mod cdp; pub mod element; pub mod extraction; diff --git a/tests/approval_gate_tests.rs b/tests/approval_gate_tests.rs new file mode 100644 index 0000000..b22252e --- /dev/null +++ b/tests/approval_gate_tests.rs @@ -0,0 +1,191 @@ +use rustyclaw::browser::approval_gate::{ApprovalGate, GateContext, GateVerdict}; + +fn gate() -> ApprovalGate { + ApprovalGate::default() +} + +fn ctx(tool: &str) -> GateContext { + GateContext { + tool_name: tool.into(), + ..Default::default() + } +} + +// 1. Read-only tools always pass. +#[test] +fn allows_plain_read_tools() { + let g = gate(); + for tool in &[ + "browser_navigate", + "browser_snapshot", + "browser_screenshot", + "browser_get_text", + "browser_wait", + "browse_done", + ] { + let c = ctx(tool); + assert_eq!(g.check(&c), GateVerdict::Allow, "tool {tool} should Allow"); + } +} + +// 2. /checkout → RequireConfirmation +#[test] +fn trips_on_checkout_url() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + url: "https://shop.example.com/checkout".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 3. Article about checkout should not trip (path is /articles/checkout-guide). +#[test] +fn does_not_trip_on_article_about_checkout() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + url: "https://blog.example.com/articles/checkout-guide".into(), + target_text: "Read more".into(), + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} + +// 4. "Confirm Purchase" → trips +#[test] +fn trips_on_confirm_purchase_button() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Confirm Purchase".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 5. "Start Free Trial" → trips +#[test] +fn trips_on_start_free_trial_autobill() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Start Free Trial".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 6. Bare "Submit" does NOT trip. +#[test] +fn does_not_trip_on_bare_submit() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Submit".into(), + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} + +// 7. /oauth/authorize → trips +#[test] +fn trips_on_oauth_authorize_url() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + url: "https://accounts.google.com/oauth/authorize?client_id=abc".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 8. "Delete Account" → trips +#[test] +fn trips_on_delete_account() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Delete Account".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 9. "$12.99" → trips, reason contains "visible_price" +#[test] +fn trips_on_visible_price() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + visible_prices: vec!["$12.99".into()], + ..Default::default() + }; + match g.check(&c) { + GateVerdict::RequireConfirmation { reason, .. } => { + assert!(reason.contains("visible_price"), "reason should mention visible_price, got: {reason}"); + } + GateVerdict::Allow => panic!("expected RequireConfirmation for $12.99"), + } +} + +// 10. "$0.00" → Allow (zero price) +#[test] +fn zero_price_does_not_trip() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + visible_prices: vec!["$0.00".into()], + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} + +// 11. Password field → trips +#[test] +fn trips_on_password_field() { + let g = gate(); + let c = GateContext { + tool_name: "browser_fill".into(), + form_field_signals: vec!["input:type=password".into()], + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 12. CC autocomplete → trips +#[test] +fn trips_on_cc_autocomplete() { + let g = gate(); + let c = GateContext { + tool_name: "browser_fill".into(), + form_field_signals: vec!["input:autocomplete=cc-number".into()], + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 13. User extension pattern matches → trips +#[test] +fn user_extension_patterns_append() { + let g = ApprovalGate::with_user_patterns(vec!["force-merge".into()]); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "force-merge into main".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 14. Invalid user regex is skipped — no panic, still Allow for benign context. +#[test] +fn invalid_user_pattern_is_skipped_not_crash() { + let g = ApprovalGate::with_user_patterns(vec!["[".into()]); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "OK".into(), + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} From 0e1a153b4633f5f1698d4335bba2af4631f29d17 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:29:38 -0700 Subject: [PATCH 04/16] =?UTF-8?q?feat(config):=20browse=20settings=20?= =?UTF-8?q?=E2=80=94=20max=20steps,=20approval=20patterns,=20policy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Arch Linux --- src/config.rs | 21 +++++++++++++++++++++ src/settings.rs | 14 ++++++++++++++ tests/browse_config_tests.rs | 17 +++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 tests/browse_config_tests.rs diff --git a/src/config.rs b/src/config.rs index 1db6364..dd22a03 100644 --- a/src/config.rs +++ b/src/config.rs @@ -316,6 +316,13 @@ pub struct Config { /// Default browser action timeout in milliseconds. pub browser_timeout_ms: u64, + /// Default max steps for /browse runs. Configurable per-run. + pub browse_max_steps: u32, + /// User-appended destructive-action patterns (regex). + pub browse_approval_patterns: Vec, + /// Default policy: "pattern" (default), "ask", "yolo" (not honored from settings — per-run only). + pub browse_default_policy: String, + /// Watch debounce (ms) — coalesces rapid filesystem events. pub watch_debounce_ms: u64, /// Minimum gap between watch triggers (ms). @@ -433,6 +440,9 @@ impl Default for Config { browser_chrome_path: None, browser_cdp_endpoint: None, browser_timeout_ms: 30_000, + browse_max_steps: 50, + browse_approval_patterns: Vec::new(), + browse_default_policy: "pattern".to_string(), watch_debounce_ms: 500, watch_rate_limit_ms: 10_000, watch_markers: vec!["AI:".into(), "AGENT:".into()], @@ -623,6 +633,17 @@ impl Config { } } + // Browse agent settings + if let Some(s) = settings.browse_max_steps { + cfg.browse_max_steps = s; + } + if let Some(p) = settings.browse_approval_patterns { + cfg.browse_approval_patterns = p; + } + cfg.browse_default_policy = settings + .browse_default_policy + .unwrap_or_else(|| cfg.browse_default_policy.clone()); + // Resolve output style: load name from settings, look up prompt if let Some(ref style_name) = settings.output_style && style_name != "default" diff --git a/src/settings.rs b/src/settings.rs index 307d32c..0a6af70 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -253,6 +253,15 @@ pub struct Settings { /// (`refs/rustyclaw/sessions/`) navigable via `/undo` and `/redo`. #[serde(rename = "autoCommit")] pub auto_commit: Option, + + #[serde(rename = "browseMaxSteps")] + pub browse_max_steps: Option, + + #[serde(rename = "browseApprovalPatterns")] + pub browse_approval_patterns: Option>, + + #[serde(rename = "browseDefaultPolicy")] + pub browse_default_policy: Option, } /// Settings for phase-declarative model routing. @@ -489,6 +498,11 @@ impl Settings { phase_router: other.phase_router.or(self.phase_router), auto_fix: other.auto_fix.or(self.auto_fix), auto_commit: other.auto_commit.or(self.auto_commit), + browse_max_steps: other.browse_max_steps.or(self.browse_max_steps), + browse_approval_patterns: other + .browse_approval_patterns + .or(self.browse_approval_patterns), + browse_default_policy: other.browse_default_policy.or(self.browse_default_policy), permissions: PermissionsConfig { // Union both lists — project additions stack on top of global allow: { diff --git a/tests/browse_config_tests.rs b/tests/browse_config_tests.rs new file mode 100644 index 0000000..788a789 --- /dev/null +++ b/tests/browse_config_tests.rs @@ -0,0 +1,17 @@ +#[test] +fn parses_browse_settings_from_json() { + let json = r#"{"browseMaxSteps": 75, "browseApprovalPatterns": ["force-merge"], "browseDefaultPolicy": "ask"}"#; + let s: rustyclaw::settings::Settings = serde_json::from_str(json).unwrap(); + assert_eq!(s.browse_max_steps, Some(75)); + assert_eq!( + s.browse_approval_patterns, + Some(vec!["force-merge".to_string()]) + ); + assert_eq!(s.browse_default_policy.as_deref(), Some("ask")); +} + +#[test] +fn config_default_max_steps_is_fifty() { + let cfg = rustyclaw::config::Config::default(); + assert_eq!(cfg.browse_max_steps, 50); +} From a6ad44e633d057943df5ecb2b310729486c58747 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:33:03 -0700 Subject: [PATCH 05/16] feat(browser): browse_done tool for autonomous mode termination Co-Authored-By: Arch Linux --- src/tools/browser_tools.rs | 49 +++++++++++++++++++++++++++++++++ tests/browse_done_tool_tests.rs | 37 +++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tests/browse_done_tool_tests.rs diff --git a/src/tools/browser_tools.rs b/src/tools/browser_tools.rs index 41e4198..5d47086 100644 --- a/src/tools/browser_tools.rs +++ b/src/tools/browser_tools.rs @@ -432,3 +432,52 @@ impl Tool for BrowserWaitTool { Ok(ToolOutput::success(result)) } } + +// ── browse_done ────────────────────────────────────────────────────────────── + +/// Sentinel tool the model calls to signal the end of an autonomous `/browse` +/// run. Returns a `BROWSE_DONE` string the orchestrator parses to exit the loop. +pub struct BrowseDoneTool; + +impl BrowseDoneTool { + pub fn new() -> Self { + Self + } +} + +impl Default for BrowseDoneTool { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Tool for BrowseDoneTool { + fn name(&self) -> &str { + "browse_done" + } + fn description(&self) -> &str { + "Signal that the browser goal is achieved or that you're stuck." + } + fn input_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "summary": { "type": "string" }, + "achieved": { "type": "boolean" } + }, + "required": ["summary", "achieved"] + }) + } + async fn execute(&self, input: serde_json::Value, _ctx: &ToolContext) -> Result { + let summary = input["summary"] + .as_str() + .ok_or_else(|| anyhow!("missing required field: summary"))?; + let achieved = input["achieved"] + .as_bool() + .ok_or_else(|| anyhow!("missing required field: achieved"))?; + Ok(ToolOutput::success(format!( + "BROWSE_DONE achieved={achieved} summary={summary}" + ))) + } +} diff --git a/tests/browse_done_tool_tests.rs b/tests/browse_done_tool_tests.rs new file mode 100644 index 0000000..bda8b81 --- /dev/null +++ b/tests/browse_done_tool_tests.rs @@ -0,0 +1,37 @@ +use rustyclaw::api::types::ToolResultContent; +use rustyclaw::tools::{browser_tools::BrowseDoneTool, Tool, ToolContext}; +use serde_json::json; + +fn extract_text(output: &rustyclaw::tools::ToolOutput) -> String { + output + .content + .iter() + .map(|c| { + let ToolResultContent::Text { text } = c; + text.as_str() + }) + .collect::>() + .join("") +} + +#[tokio::test] +async fn browse_done_records_summary_and_achieved() { + let tool = BrowseDoneTool::new(); + let ctx = ToolContext::new(std::env::current_dir().unwrap()); + let input = json!({ "summary": "Found flight", "achieved": true }); + let result = tool.execute(input, &ctx).await.unwrap(); + let text = extract_text(&result); + assert!(text.contains("BROWSE_DONE"), "expected BROWSE_DONE sentinel in: {text}"); + assert!(text.contains("achieved=true"), "expected achieved=true in: {text}"); + assert!(text.contains("Found flight"), "expected summary in: {text}"); +} + +#[tokio::test] +async fn browse_done_handles_not_achieved() { + let tool = BrowseDoneTool::new(); + let ctx = ToolContext::new(std::env::current_dir().unwrap()); + let input = json!({ "summary": "Stuck", "achieved": false }); + let result = tool.execute(input, &ctx).await.unwrap(); + let text = extract_text(&result); + assert!(text.contains("achieved=false"), "expected achieved=false in: {text}"); +} From d56a48a0f7434dbfa86248a1f72dd5095b3d5ddc Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:34:32 -0700 Subject: [PATCH 06/16] feat(browser): browse_loop types + run_browse stub Co-Authored-By: Arch Linux --- src/browser/browse_loop.rs | 68 ++++++++++++++++++++++++++++++++++++++ src/browser/mod.rs | 1 + 2 files changed, 69 insertions(+) create mode 100644 src/browser/browse_loop.rs diff --git a/src/browser/browse_loop.rs b/src/browser/browse_loop.rs new file mode 100644 index 0000000..5c4cf60 --- /dev/null +++ b/src/browser/browse_loop.rs @@ -0,0 +1,68 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use tokio::sync::mpsc; + +/// Policy for the approval gate during this run. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum BrowsePolicy { + Pattern, + Ask, + Yolo, +} + +impl Default for BrowsePolicy { + fn default() -> Self { Self::Pattern } +} + +/// A single browse-run configuration. +#[derive(Debug, Clone)] +pub struct BrowseRequest { + pub goal: String, + pub policy: BrowsePolicy, + pub max_steps: u32, + pub voice: bool, +} + +/// Progress events streamed to the caller during a run. +#[derive(Debug, Clone, Serialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum BrowseProgress { + Started { goal: String, max_steps: u32 }, + Step { n: u32, action: String, target: String }, + Nudge { level: u8, text: String }, + ApprovalNeeded { step: u32, action: String, target_text: String, url: String, reason: String }, + Completed(BrowseResult), +} + +/// Final result of a browse run. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BrowseResult { + pub achieved: bool, + pub summary: String, + pub reason: BrowseReason, + pub steps_used: u32, + pub final_url: Option, +} + +/// Why the loop terminated. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum BrowseReason { + Done, + Bailed, + StepCap, + Stagnation, + Budget, + BrowserCrashed, + UserDenied, + Cancelled, +} + +/// Stub — wiring happens in Task 7. +pub async fn run_browse( + _req: BrowseRequest, + _progress_tx: mpsc::Sender, +) -> Result { + anyhow::bail!("run_browse not yet implemented (Task 7)") +} diff --git a/src/browser/mod.rs b/src/browser/mod.rs index e12e1c8..cbb9588 100644 --- a/src/browser/mod.rs +++ b/src/browser/mod.rs @@ -1,5 +1,6 @@ //! Browser automation via Chrome DevTools Protocol. pub mod actions; +pub mod browse_loop; pub mod approval_gate; pub mod cdp; pub mod element; From 887d735824087f86f681dfcd19fd4b12a662fdd4 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:41:28 -0700 Subject: [PATCH 07/16] feat(browser): run_browse orchestrator + middleware bridges Co-Authored-By: Arch Linux --- src/browser/approval_gate.rs | 163 +++++++++++++++++++++++ src/browser/browse_loop.rs | 242 ++++++++++++++++++++++++++++++++++- src/browser/loop_detector.rs | 63 +++++++++ src/query_engine.rs | 77 +++++++++++ 4 files changed, 541 insertions(+), 4 deletions(-) diff --git a/src/browser/approval_gate.rs b/src/browser/approval_gate.rs index 34d248c..9768666 100644 --- a/src/browser/approval_gate.rs +++ b/src/browser/approval_gate.rs @@ -4,7 +4,13 @@ //! signals, visible prices, and user-defined extension patterns. Read-only //! tools always pass; everything else is checked against compiled RegexSets. +use crate::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use async_trait::async_trait; use regex::{Regex, RegexSet}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; +use tokio::sync::{mpsc, oneshot}; /// Read-only tools that never require approval. const READ_ONLY_TOOLS: &[&str] = &[ @@ -201,3 +207,160 @@ impl ApprovalGate { } } } + +// ── Middleware bridge ───────────────────────────────────────────────────────── + +/// Approval prompt sent to the host (TUI/SDK/voice). +pub struct ApprovalPrompt { + pub step: u32, + pub tool_name: String, + pub target_text: String, + pub url: String, + pub reason: String, + pub reply: oneshot::Sender, +} + +pub struct ApprovalGateMiddleware { + gate: ApprovalGate, + policy: crate::browser::browse_loop::BrowsePolicy, + current_url: Arc>, + approval_tx: mpsc::Sender, + step_counter: Arc, + /// Tracks consecutive denial count per action key ("{tool_name}:{target_text}"). + denial_counts: Mutex>, + /// Set when the same action is denied twice — triggers session termination. + user_denied: AtomicBool, +} + +impl ApprovalGateMiddleware { + pub fn new( + gate: ApprovalGate, + policy: crate::browser::browse_loop::BrowsePolicy, + current_url: Arc>, + approval_tx: mpsc::Sender, + step_counter: Arc, + ) -> Self { + Self { + gate, + policy, + current_url, + approval_tx, + step_counter, + denial_counts: Mutex::new(HashMap::new()), + user_denied: AtomicBool::new(false), + } + } + + /// Returns true if the user denied the same action twice, triggering termination. + pub fn is_user_denied(&self) -> bool { + self.user_denied.load(Ordering::SeqCst) + } +} + +#[async_trait] +impl ToolMiddleware for ApprovalGateMiddleware { + async fn before_tool(&self, tool_name: &str, input: &serde_json::Value) -> MiddlewareVerdict { + use crate::browser::browse_loop::BrowsePolicy; + + // If user already denied twice, block everything. + if self.user_denied.load(Ordering::SeqCst) { + return MiddlewareVerdict::Deny { + reason: "User denied this action twice. Terminating browse session.".into(), + }; + } + + // Yolo policy: always allow. + if self.policy == BrowsePolicy::Yolo { + return MiddlewareVerdict::Allow; + } + + // Read-only tools always pass, regardless of policy. + if READ_ONLY_TOOLS.contains(&tool_name) { + return MiddlewareVerdict::Allow; + } + + let url = self.current_url.lock().await.clone(); + let target_text = input["ref"] + .as_str() + .or_else(|| input["selector"].as_str()) + .unwrap_or("") + .to_string(); + + let gate_ctx = GateContext { + tool_name: tool_name.to_string(), + url: url.clone(), + target_text: target_text.clone(), + form_field_signals: Vec::new(), + visible_prices: Vec::new(), + }; + + // Ask policy: force confirmation for every non-read-only tool. + let verdict = if self.policy == BrowsePolicy::Ask { + GateVerdict::RequireConfirmation { + reason: "ask policy".to_string(), + detail: format!("{tool_name} on {url}"), + } + } else { + // Pattern policy: delegate to the compiled gate. + self.gate.check(&gate_ctx) + }; + + match verdict { + GateVerdict::Allow => { + // Clear denial counter on approval for this action key. + let key = format!("{tool_name}:{target_text}"); + self.denial_counts.lock().unwrap().remove(&key); + MiddlewareVerdict::Allow + } + GateVerdict::RequireConfirmation { reason, .. } => { + let step = self.step_counter.load(Ordering::Relaxed); + let (tx, rx) = oneshot::channel(); + let prompt = ApprovalPrompt { + step, + tool_name: tool_name.to_string(), + target_text: target_text.clone(), + url, + reason: reason.clone(), + reply: tx, + }; + // If the host receiver is gone, deny by default. + if self.approval_tx.send(prompt).await.is_err() { + return MiddlewareVerdict::Deny { + reason: "approval channel closed".to_string(), + }; + } + match rx.await { + Ok(true) => { + // Approved — clear denial counter for this action. + let key = format!("{tool_name}:{target_text}"); + self.denial_counts.lock().unwrap().remove(&key); + MiddlewareVerdict::Allow + } + _ => { + // Denied — increment counter; terminate after 2 denials on same action. + let key = format!("{tool_name}:{target_text}"); + let count = { + let mut counts = self.denial_counts.lock().unwrap(); + let entry = counts.entry(key).or_insert(0); + *entry += 1; + *entry + }; + if count >= 2 { + self.user_denied.store(true, Ordering::SeqCst); + return MiddlewareVerdict::Deny { + reason: "User denied this action twice. Terminating browse session." + .into(), + }; + } + MiddlewareVerdict::Deny { reason } + } + } + } + } + } + + async fn after_tool(&self, _tool_name: &str, _output: &str) { + // Increment step counter after each tool execution. + self.step_counter.fetch_add(1, Ordering::Relaxed); + } +} diff --git a/src/browser/browse_loop.rs b/src/browser/browse_loop.rs index 5c4cf60..e099147 100644 --- a/src/browser/browse_loop.rs +++ b/src/browser/browse_loop.rs @@ -1,7 +1,15 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; +use std::sync::atomic::AtomicU32; +use std::sync::Arc; use tokio::sync::mpsc; +use crate::browser::approval_gate::{ApprovalGate, ApprovalGateMiddleware, ApprovalPrompt}; +use crate::browser::loop_detector::LoopDetectorMiddleware; +use crate::config::Config; +use crate::query_engine::QueryEngine; +use crate::tools::DynTool; + /// Policy for the approval gate during this run. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] @@ -59,10 +67,236 @@ pub enum BrowseReason { Cancelled, } -/// Stub — wiring happens in Task 7. +/// Build the browse-agent system prompt. +fn build_browse_system_prompt(goal: &str, max_steps: u32) -> String { + format!( + "You are an autonomous browser agent.\n\ + \n\ + Goal: {goal}\n\ + \n\ + Instructions:\n\ + - Use the browser_* tools to navigate, inspect, and act on web pages.\n\ + - Take exactly one action per turn. Observe the result before planning the next step.\n\ + - When you believe the goal is achieved (or you're stuck), call browse_done(summary, achieved).\n\ + - Keep summaries under 2 sentences — they may be spoken aloud.\n\ + - You have {max_steps} steps total.\n\ + - If approval is denied, try a different approach or call browse_done(achieved=false)." + ) +} + +/// Filter the tool list down to browser_* + browse_done tools. +fn filter_browser_tools(tools: &[DynTool]) -> Vec { + tools + .iter() + .filter(|t| { + let name = t.name(); + name.starts_with("browser_") || name == "browse_done" + }) + .cloned() + .collect() +} + +/// Parse the BROWSE_DONE sentinel from assistant text. +/// Returns (achieved, summary) if the sentinel is found. +fn parse_browse_done(text: &str) -> Option<(bool, String)> { + if !text.contains("BROWSE_DONE") { + return None; + } + let achieved = text.contains("achieved=true"); + let summary = text + .find("summary=") + .map(|i| text[i + 8..].trim().to_string()) + .unwrap_or_default(); + Some((achieved, summary)) +} + +/// Orchestrate an autonomous browser agent run. pub async fn run_browse( - _req: BrowseRequest, - _progress_tx: mpsc::Sender, + req: BrowseRequest, + config: &Config, + tools: Vec, + current_url: Arc>, + progress_tx: mpsc::Sender, + approval_tx: mpsc::Sender, ) -> Result { - anyhow::bail!("run_browse not yet implemented (Task 7)") + // 1. Emit Started event. + let _ = progress_tx + .send(BrowseProgress::Started { + goal: req.goal.clone(), + max_steps: req.max_steps, + }) + .await; + + // 2. Shared step counter for the approval prompt. + let step_counter = Arc::new(AtomicU32::new(0)); + + // 3. Build the approval gate from config patterns. + let gate = ApprovalGate::with_user_patterns(config.browse_approval_patterns.clone()); + let gate_mw = Arc::new(ApprovalGateMiddleware::new( + gate, + req.policy, + current_url.clone(), + approval_tx, + step_counter.clone(), + )); + + // 4. Build the loop detector middleware. + let (nudge_tx, mut nudge_rx) = mpsc::channel::(16); + let loop_mw = Arc::new(LoopDetectorMiddleware::new(nudge_tx)); + + // 5. Assemble middleware chain (keep Arc refs for post-run inspection). + let middlewares: crate::browser::middleware::MiddlewareChain = vec![ + gate_mw.clone() as Arc, + loop_mw.clone() as Arc, + ]; + + // 6. Build browse-specific system prompt. + let system_prompt = build_browse_system_prompt(&req.goal, req.max_steps); + + // 7. Filter tools to browser_* + browse_done only. + let browser_tools = filter_browser_tools(&tools); + + // 8. Override config's max_turns to the browse step cap. + let mut browse_config = config.clone(); + browse_config.max_turns = req.max_steps; + + // 9. Create the browse-mode query engine. + let mut engine = + QueryEngine::new_for_browse(browse_config, browser_tools, system_prompt, middlewares)?; + + // 10. Spawn a task to forward nudges as BrowseProgress events. + let progress_tx_nudge = progress_tx.clone(); + let step_counter_nudge = step_counter.clone(); + let nudge_handle = tokio::spawn(async move { + let mut level: u8 = 0; + while let Some(text) = nudge_rx.recv().await { + level = level.saturating_add(1); + // Update step counter from the nudge level for progress reporting. + let _ = step_counter_nudge.load(std::sync::atomic::Ordering::Relaxed); + let _ = progress_tx_nudge + .send(BrowseProgress::Nudge { level, text }) + .await; + } + }); + + // 11. Run the agentic loop. + let query_result = engine.query(&req.goal).await; + + // 12. Shut down the nudge forwarder. + nudge_handle.abort(); + + // 13. Determine the result. + let steps_used = engine.turns_used(); + let final_url = { + let url = current_url.lock().await; + if url.is_empty() { None } else { Some(url.clone()) } + }; + + // Check middleware termination flags first — they override sentinel parsing. + let middleware_reason = if loop_mw.is_stopped() { + Some(BrowseReason::Stagnation) + } else if gate_mw.is_user_denied() { + Some(BrowseReason::UserDenied) + } else { + None + }; + + let result = match query_result { + Ok(()) => { + // Bug 1 fix: BROWSE_DONE sentinel is emitted by BrowseDoneTool::execute() + // which returns it as a tool result (user-role ContentBlock::ToolResult), + // NOT as assistant text. Check tool results first, then assistant text as fallback. + let sentinel_text = engine + .last_tool_result_text() + .and_then(|t| parse_browse_done(&t).map(|r| (t, r))) + .or_else(|| { + engine + .last_assistant_text() + .and_then(|t| parse_browse_done(&t).map(|r| (t, r))) + }); + + if let Some((_raw, (achieved, summary))) = sentinel_text { + let mw_active = middleware_reason.is_some(); + let reason = middleware_reason.unwrap_or(if achieved { + BrowseReason::Done + } else { + BrowseReason::Bailed + }); + BrowseResult { + achieved: achieved && !mw_active, + summary, + reason, + steps_used, + final_url, + } + } else if let Some(reason) = middleware_reason { + // Middleware stopped the loop but no sentinel was found. + BrowseResult { + achieved: false, + summary: match reason { + BrowseReason::Stagnation => { + "Agent terminated: repeated same action with no progress".to_string() + } + BrowseReason::UserDenied => { + "Agent terminated: user denied the action twice".to_string() + } + _ => "Agent terminated by middleware".to_string(), + }, + reason, + steps_used, + final_url, + } + } else if let Some(text) = engine.last_assistant_text() { + // No sentinel, no middleware stop — engine stopped for other reasons. + let reason = if steps_used >= req.max_steps { + BrowseReason::StepCap + } else { + BrowseReason::Done + }; + BrowseResult { + achieved: false, + summary: text.chars().take(200).collect(), + reason, + steps_used, + final_url, + } + } else { + // No assistant messages at all. + BrowseResult { + achieved: false, + summary: "No response from browse agent".to_string(), + reason: BrowseReason::Bailed, + steps_used, + final_url, + } + } + } + Err(e) => { + let msg = e.to_string(); + let reason = middleware_reason.unwrap_or_else(|| { + if msg.contains("budget") || msg.contains("Budget") { + BrowseReason::Budget + } else if msg.contains("browser") || msg.contains("CDP") || msg.contains("Chrome") + { + BrowseReason::BrowserCrashed + } else { + BrowseReason::Bailed + } + }); + BrowseResult { + achieved: false, + summary: format!("Browse agent error: {msg}"), + reason, + steps_used, + final_url, + } + } + }; + + // 14. Emit Completed event. + let _ = progress_tx + .send(BrowseProgress::Completed(result.clone())) + .await; + + Ok(result) } diff --git a/src/browser/loop_detector.rs b/src/browser/loop_detector.rs index 41437f2..dba9465 100644 --- a/src/browser/loop_detector.rs +++ b/src/browser/loop_detector.rs @@ -1,5 +1,10 @@ +use crate::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use async_trait::async_trait; use sha2::{Digest, Sha256}; use std::collections::VecDeque; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Mutex; +use tokio::sync::mpsc; const WINDOW_SIZE: usize = 10; const REPEAT_THRESHOLD: usize = 3; @@ -93,3 +98,61 @@ fn hash_string(s: &str) -> String { hasher.update(s.as_bytes()); format!("{:x}", hasher.finalize()) } + +// ── Middleware bridge ───────────────────────────────────────────────────────── + +pub struct LoopDetectorMiddleware { + inner: Mutex, + nudge_tx: mpsc::Sender, + stopped: AtomicBool, +} + +impl LoopDetectorMiddleware { + pub fn new(nudge_tx: mpsc::Sender) -> Self { + Self { + inner: Mutex::new(LoopDetector::new()), + nudge_tx, + stopped: AtomicBool::new(false), + } + } + + /// Returns true if the loop detector reached terminal stagnation (level 3). + pub fn is_stopped(&self) -> bool { + self.stopped.load(Ordering::SeqCst) + } +} + +#[async_trait] +impl ToolMiddleware for LoopDetectorMiddleware { + async fn before_tool(&self, _tool_name: &str, _input: &serde_json::Value) -> MiddlewareVerdict { + if self.stopped.load(Ordering::SeqCst) { + return MiddlewareVerdict::Deny { + reason: "Stagnation detected: the browser agent has been stopped after \ + repeating the same action with no effect." + .into(), + }; + } + MiddlewareVerdict::Allow + } + + async fn after_tool(&self, tool_name: &str, output: &str) { + // browser_navigate resets the detector (new page = fresh state). + if tool_name == "browser_navigate" { + self.inner.lock().unwrap().reset(); + return; + } + let nudge = { + let mut ld = self.inner.lock().unwrap(); + ld.record_action(tool_name, "", output); + ld.check_stagnation() + }; + if let Some(nudge) = nudge { + // If this is the terminal nudge (level 3 — contains "Stopping"), + // set the stopped flag so before_tool denies subsequent calls. + if nudge.contains("Stopping") { + self.stopped.store(true, Ordering::SeqCst); + } + let _ = self.nudge_tx.send(nudge).await; + } + } +} diff --git a/src/query_engine.rs b/src/query_engine.rs index 5732a31..7c49abb 100644 --- a/src/query_engine.rs +++ b/src/query_engine.rs @@ -28,6 +28,10 @@ pub struct QueryEngine { session_id: Option, /// Shared Read-tool cache for deduplicating unchanged re-reads (v2.1.86). read_cache: crate::tools::ReadCache, + /// In-process tool middleware chain (empty by default). + middlewares: crate::browser::middleware::MiddlewareChain, + /// Turn counter. + turns: u32, } impl QueryEngine { @@ -64,6 +68,8 @@ impl QueryEngine { cumulative_cost_usd: 0.0, session_id: Some(uuid::Uuid::new_v4().to_string()), read_cache: crate::tools::new_read_cache(), + middlewares: Vec::new(), + turns: 0, }) } @@ -117,6 +123,7 @@ impl QueryEngine { loop { turn += 1; + self.turns = turn; if turn > max_turns { eprintln!("{}", format!("Stopped after {max_turns} turns.").yellow()); break; @@ -358,6 +365,7 @@ impl QueryEngine { ctx.live_model = Some(self.config.model.clone()); ctx.live_api_key = Some(self.config.api_key.clone()); ctx.live_ollama_host = Some(self.config.ollama_host.clone()); + ctx.middlewares = self.middlewares.clone(); let mut results = Vec::new(); for block in content { @@ -593,6 +601,75 @@ fn truncate_json(v: &serde_json::Value, max_len: usize) -> String { } impl QueryEngine { + /// Create a QueryEngine preconfigured for autonomous browse mode. + /// Overrides the system prompt and injects the middleware chain. + pub fn new_for_browse( + config: Config, + tools: Vec, + system_prompt: String, + middlewares: crate::browser::middleware::MiddlewareChain, + ) -> Result { + let mut engine = Self::new(config, tools)?; + engine.system_prompt = system_prompt; + engine.middlewares = middlewares; + Ok(engine) + } + + /// How many turns the engine has executed since the last `query()` call. + pub fn turns_used(&self) -> u32 { + self.turns + } + + /// Extract the text content from the last assistant message, if any. + pub fn last_assistant_text(&self) -> Option { + self.messages.iter().rev().find_map(|m| { + if m.role == Role::Assistant { + let texts: Vec<&str> = m + .content + .iter() + .filter_map(|b| match b { + ContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect(); + if texts.is_empty() { + None + } else { + Some(texts.join("")) + } + } else { + None + } + }) + } + + /// Extract the text content from the last tool result (user-role message + /// containing `ContentBlock::ToolResult`). Tool results are always sent + /// in user-role messages per the Anthropic API contract. + pub fn last_tool_result_text(&self) -> Option { + for msg in self.messages.iter().rev() { + if msg.role != Role::User { + continue; + } + for block in msg.content.iter().rev() { + if let ContentBlock::ToolResult { content, .. } = block { + let text: String = content + .iter() + .map(|c| { + let ToolResultContent::Text { text } = c; + text.as_str() + }) + .collect::>() + .join("\n"); + if !text.is_empty() { + return Some(text); + } + } + } + } + None + } + fn replay_user_messages(&self) -> bool { self.config.replay_user_messages } From 1b1e9c23fc46bb1d5f4ff6e2d460e03a889489dd Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 21:54:16 -0700 Subject: [PATCH 08/16] feat(commands): /browse slash command + CommandAction::Browse Co-Authored-By: Arch Linux --- src/commands/mod.rs | 60 ++++++++++++++++++++++++++++++++++++- src/tui/run.rs | 59 ++++++++++++++++++++++++++++++++++++ tests/browse_parse_tests.rs | 48 +++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 1 deletion(-) create mode 100644 tests/browse_parse_tests.rs diff --git a/src/commands/mod.rs b/src/commands/mod.rs index e8b8112..a0421e7 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -294,6 +294,12 @@ pub enum CommandAction { Redo { n: Option }, /// `/autocommit [status]` — print auto-commit state to the chat. v1 only supports `status`. AutoCommitStatus, + /// Start an autonomous browser run. + Browse { + goal: String, + policy: crate::browser::browse_loop::BrowsePolicy, + max_steps: Option, + }, /// Launch browser and optionally navigate to URL BrowseUrl(String), /// Take a screenshot of the current browser page @@ -386,7 +392,7 @@ pub fn dispatch(input: &str, ctx: &CommandContext) -> CommandAction { "undo" => cmd_undo(args), "redo" => cmd_redo(args), "autocommit" => cmd_autocommit(args), - "browser" | "browse" => { + "browser" => { let url = args.trim().to_string(); if url == "close" { CommandAction::BrowserClose @@ -396,6 +402,7 @@ pub fn dispatch(input: &str, ctx: &CommandContext) -> CommandAction { CommandAction::BrowseUrl(url) } } + "browse" => parse_browse_command(args.trim()), "screenshot" => CommandAction::BrowserScreenshot, "branch" => cmd_branch(ctx), "summary" => CommandAction::SendPrompt( @@ -527,6 +534,45 @@ fn split_first_word(s: &str) -> (&str, &str) { } } +/// Parse `/browse` arguments into a `CommandAction::Browse` variant. +/// +/// Supported flags (all optional, may appear in any order before the goal): +/// - `--yolo` → `BrowsePolicy::Yolo` +/// - `--ask` → `BrowsePolicy::Ask` +/// - `--max-steps ` → `max_steps = Some(N)` +/// +/// Remaining tokens after flag removal form the `goal` string. +pub fn parse_browse_command(input: &str) -> CommandAction { + use crate::browser::browse_loop::BrowsePolicy; + let mut policy = BrowsePolicy::Pattern; + let mut max_steps: Option = None; + let mut tokens: Vec<&str> = input.split_whitespace().collect(); + let mut i = 0; + while i < tokens.len() { + match tokens[i] { + "--yolo" => { + policy = BrowsePolicy::Yolo; + tokens.remove(i); + } + "--ask" => { + policy = BrowsePolicy::Ask; + tokens.remove(i); + } + "--max-steps" if i + 1 < tokens.len() => { + if let Ok(n) = tokens[i + 1].parse() { + max_steps = Some(n); + } + tokens.drain(i..=i + 1); + } + _ => { + i += 1; + } + } + } + let goal = tokens.join(" ").trim().to_string(); + CommandAction::Browse { goal, policy, max_steps } +} + // ── Individual commands ─────────────────────────────────────────────────────── fn cmd_banner(args: &str) -> CommandAction { @@ -2188,6 +2234,18 @@ pub const HELP_CATEGORIES: &[(&str, &str, &[HelpCommand])] = &[ ("/spawn discard ", "discard agent's worktree"), ], ), + ( + "Browser", + "Autonomous browser agent", + &[ + ("/browse ", "run autonomous browser agent towards a goal"), + ("/browse --yolo ", "yolo mode: no approval prompts"), + ("/browse --ask ", "ask before every action"), + ("/browse --max-steps N ", "cap the run at N steps"), + ("/browser ", "open URL in managed browser session"), + ("/screenshot", "take a screenshot of the current page"), + ], + ), ( "Plugins & tools", "MCP servers, plugins, viz", diff --git a/src/tui/run.rs b/src/tui/run.rs index 471ed58..ff1de3f 100644 --- a/src/tui/run.rs +++ b/src/tui/run.rs @@ -3864,6 +3864,65 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { ); app.entries.push(ChatEntry::system(msg)); } + CommandAction::Browse { goal, policy, max_steps } => { + use crate::browser::browse_loop::BrowsePolicy; + let policy_label = match policy { + BrowsePolicy::Yolo => " [yolo]", + BrowsePolicy::Ask => " [ask]", + BrowsePolicy::Pattern => "", + }; + let steps_note = match max_steps { + Some(n) => format!("Limit yourself to at most {n} browser steps.\n"), + None => String::new(), + }; + let steps_label = match max_steps { + Some(n) => format!(", max {n} steps"), + None => String::new(), + }; + app.entries.push(ChatEntry::system(format!( + "Browser agent{policy_label} starting{steps_label}: {goal}" + ))); + app.entries.push(ChatEntry::user(input.clone())); + app.scroll_to_bottom(); + app.start_loading(); + let prompt = format!( + "You are an autonomous browser agent.\n\ + Goal: {goal}\n\ + Policy:{policy_label}\n\ + {steps_note}\ + Use browser_navigate, browser_click, browser_fill_form, \ + browser_snapshot, and related browser tools to accomplish the goal. \ + Report your progress and final result." + ); + let mut snapshot = messages.clone(); + snapshot.push(Message { + role: Role::User, + content: vec![ContentBlock::Text { text: prompt }], + }); + let c2 = client.clone(); + let tvec = tools.to_vec(); + let cfg = config.clone(); + let tx2 = tx.clone(); + let sp = system_prompt.clone(); + let ps = perm_state.clone(); + let pm = app.plan_mode; + let sid3 = session.id.clone(); + let handle = tokio::spawn(async move { + run_api_task(ApiTask { + client: c2, + tools: tvec, + messages: snapshot, + config: cfg, + perm_state: ps, + system_prompt: sp, + tx: tx2, + plan_mode: pm, + session_id: sid3, + }) + .await; + }); + app.api_task = Some(handle.abort_handle()); + } CommandAction::BrowseUrl(url) => { // Ship a prompt to the agent loop — it will invoke the shared // browser_navigate / browser_snapshot tools (which drive the same diff --git a/tests/browse_parse_tests.rs b/tests/browse_parse_tests.rs new file mode 100644 index 0000000..879cda9 --- /dev/null +++ b/tests/browse_parse_tests.rs @@ -0,0 +1,48 @@ +use rustyclaw::commands::{parse_browse_command, CommandAction}; +use rustyclaw::browser::browse_loop::BrowsePolicy; + +#[test] +fn parses_plain_browse() { + match parse_browse_command("find the cheapest flight") { + CommandAction::Browse { goal, policy, max_steps } => { + assert_eq!(goal, "find the cheapest flight"); + assert_eq!(policy, BrowsePolicy::Pattern); + assert_eq!(max_steps, None); + } + _ => panic!("wrong variant"), + } +} + +#[test] +fn parses_yolo_flag() { + match parse_browse_command("--yolo book the flight") { + CommandAction::Browse { policy, goal, .. } => { + assert_eq!(policy, BrowsePolicy::Yolo); + assert_eq!(goal, "book the flight"); + } + _ => panic!(), + } +} + +#[test] +fn parses_max_steps() { + match parse_browse_command("--max-steps 100 research X") { + CommandAction::Browse { max_steps, goal, .. } => { + assert_eq!(max_steps, Some(100)); + assert_eq!(goal, "research X"); + } + _ => panic!(), + } +} + +#[test] +fn parses_ask_and_max_steps_combined() { + match parse_browse_command("--ask --max-steps 25 quick check") { + CommandAction::Browse { policy, max_steps, goal } => { + assert_eq!(policy, BrowsePolicy::Ask); + assert_eq!(max_steps, Some(25)); + assert_eq!(goal, "quick check"); + } + _ => panic!(), + } +} From c07068e0b6955c8f2d4d14354f34eb4ea7cde081 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:00:36 -0700 Subject: [PATCH 09/16] feat(tui): /browse dispatch via run_browse + approval modal Replace the CommandAction::Browse handler that incorrectly routed through run_api_task with a proper run_browse() call that uses the full orchestrator: middleware chain, approval gate, loop detector, step cap, and progress events. - Add browse_approval, browse_progress_rx, browse_approval_rx fields to App - Spawn run_browse() with progress/approval channels on /browse dispatch - Poll progress channel each frame: step updates, nudges, completion - Poll approval channel: surface ApprovalPrompt as a modal - Approval modal key handling: A=approve, D=deny (blocks other input) - Centered approval modal in render.rs matching permission dialog style Co-Authored-By: Arch Linux --- src/tui/app.rs | 10 +++ src/tui/render.rs | 83 ++++++++++++++++++++++++ src/tui/run.rs | 160 +++++++++++++++++++++++++++++++--------------- 3 files changed, 202 insertions(+), 51 deletions(-) diff --git a/src/tui/app.rs b/src/tui/app.rs index 4c37cdb..3491bfb 100644 --- a/src/tui/app.rs +++ b/src/tui/app.rs @@ -678,6 +678,13 @@ pub struct App { /// Active file-watch state. `None` when no watcher is running. /// Dropping this stops watching. pub watcher: Option, + + /// Active browse approval prompt awaiting user input (A=approve, D=deny). + pub browse_approval: Option, + /// Receiver for browse progress events (active during a /browse run). + pub browse_progress_rx: Option>, + /// Receiver for browse approval prompts. + pub browse_approval_rx: Option>, } /// Format a raw model ID into a human-readable name like "Sonnet 4.6". @@ -808,6 +815,9 @@ impl App { cost_tracker: crate::cost::CostTracker::new(), browser_session: None, watcher: None, + browse_approval: None, + browse_progress_rx: None, + browse_approval_rx: None, } } diff --git a/src/tui/render.rs b/src/tui/render.rs index b5ef945..13ce70d 100644 --- a/src/tui/render.rs +++ b/src/tui/render.rs @@ -139,6 +139,8 @@ pub fn draw(f: &mut Frame, app: &mut App) { if app.overlay.is_some() { draw_overlay(f, area, app, tc); + } else if app.browse_approval.is_some() { + draw_browse_approval(f, area, app, tc); } else if app.pending_permission.is_some() { draw_permission(f, area, app, tc); } else if app.pending_user_question.is_some() { @@ -879,6 +881,87 @@ fn context_window_for_model(model: &str) -> u64 { } } +// ── Browse approval dialog ──────────────────────────────────────────────────── + +fn draw_browse_approval(f: &mut Frame, area: Rect, app: &App, tc: ThemeColors) { + let Some(prompt) = &app.browse_approval else { + return; + }; + + let popup_w = (area.width * 6 / 10).max(50).min(area.width); + let popup_h = 12_u16.max(8).min(area.height); + let x = area.x + (area.width.saturating_sub(popup_w)) / 2; + let y = area.y + (area.height.saturating_sub(popup_h)) / 2; + let popup = Rect { + x, + y, + width: popup_w, + height: popup_h, + }; + + f.render_widget(Clear, popup); + + let block = Block::default() + .borders(Borders::ALL) + .border_style(Style::default().fg(Color::Yellow)) + .title(Span::styled( + " Browse Approval ", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )); + let inner = block.inner(popup); + f.render_widget(block, popup); + + let mut lines = vec![ + Line::raw(""), + Line::from(Span::styled( + " ⚠ Browse approval required", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )), + Line::raw(""), + Line::from(Span::styled( + format!(" Action: {}", prompt.tool_name), + Style::default().fg(Color::White), + )), + Line::from(Span::styled( + format!(" Target: {}", prompt.target_text), + Style::default().fg(Color::White), + )), + Line::from(Span::styled( + format!(" URL: {}", prompt.url), + Style::default().fg(Color::White), + )), + Line::from(Span::styled( + format!(" Reason: {}", prompt.reason), + Style::default().fg(Color::White), + )), + Line::raw(""), + ]; + lines.push(Line::from(vec![ + Span::styled(" [", Style::default().fg(Color::DarkGray)), + Span::styled( + "A", + Style::default() + .fg(tc.assistant) + .add_modifier(Modifier::BOLD), + ), + Span::styled("]pprove [", Style::default().fg(Color::DarkGray)), + Span::styled( + "D", + Style::default().fg(Color::Red).add_modifier(Modifier::BOLD), + ), + Span::styled("]eny", Style::default().fg(Color::DarkGray)), + ])); + + f.render_widget( + Paragraph::new(Text::from(lines)).wrap(Wrap { trim: true }), + inner, + ); +} + // ── Permission dialog ───────────────────────────────────────────────────────── fn draw_permission(f: &mut Frame, area: Rect, app: &App, tc: ThemeColors) { diff --git a/src/tui/run.rs b/src/tui/run.rs index ff1de3f..a054b28 100644 --- a/src/tui/run.rs +++ b/src/tui/run.rs @@ -871,6 +871,61 @@ async fn run_loop(mut config: Config, resume_id: Option) -> Result<()> { }); } + // ── Poll browse progress events ────────────────────────────────────── + if let Some(mut rx) = app.browse_progress_rx.take() { + let mut done = false; + while let Ok(event) = rx.try_recv() { + use crate::browser::browse_loop::BrowseProgress; + match event { + BrowseProgress::Started { .. } => { + // Already shown at dispatch time + } + BrowseProgress::Step { n, action, target } => { + app.entries.push(ChatEntry::system(format!( + " Step {n}: {action} {target}" + ))); + app.scroll_to_bottom(); + } + BrowseProgress::Nudge { level, text } => { + app.entries.push(ChatEntry::system(format!( + " ⚠ Nudge L{level}: {text}" + ))); + app.scroll_to_bottom(); + } + BrowseProgress::ApprovalNeeded { .. } => { + // Handled via approval_rx below + } + BrowseProgress::Completed(result) => { + let icon = if result.achieved { "✅" } else { "⚠" }; + app.entries.push(ChatEntry::system(format!( + "{icon} /browse done ({:?}): {}", result.reason, result.summary + ))); + app.scroll_to_bottom(); + app.finish_loading(); + // Clean up approval channel too + app.browse_approval_rx = None; + done = true; + break; + } + } + } + if !done { + // Put the receiver back — run is still in progress + app.browse_progress_rx = Some(rx); + } + } + + // Poll browse approval prompts + if let Some(mut rx) = app.browse_approval_rx.take() { + if let Ok(prompt) = rx.try_recv() { + app.browse_approval = Some(prompt); + } + // Put back if browse is still running + if app.browse_progress_rx.is_some() || app.browse_approval.is_some() { + app.browse_approval_rx = Some(rx); + } + } + // Uses cached term size — no syscall per frame; updated on Resize events. { let needed = viewport_height(&app, last_term_cols, last_term_rows); @@ -1524,6 +1579,28 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { return Ok(()); } + // Browse approval dialog takes priority after permission dialog + if app.browse_approval.is_some() { + match key.code { + Char('a') | Char('A') => { + if let Some(prompt) = app.browse_approval.take() { + let _ = prompt.reply.send(true); + app.entries.push(ChatEntry::system(" ✓ Approved")); + app.scroll_to_bottom(); + } + } + Char('d') | Char('D') => { + if let Some(prompt) = app.browse_approval.take() { + let _ = prompt.reply.send(false); + app.entries.push(ChatEntry::system(" ✗ Denied")); + app.scroll_to_bottom(); + } + } + _ => {} // ignore other keys while prompt is active + } + return Ok(()); + } + // AskUser dialog takes priority after permission dialog if let Some(ref mut q) = app.pending_user_question { match key.code { @@ -3865,63 +3942,44 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { app.entries.push(ChatEntry::system(msg)); } CommandAction::Browse { goal, policy, max_steps } => { - use crate::browser::browse_loop::BrowsePolicy; - let policy_label = match policy { - BrowsePolicy::Yolo => " [yolo]", - BrowsePolicy::Ask => " [ask]", - BrowsePolicy::Pattern => "", - }; - let steps_note = match max_steps { - Some(n) => format!("Limit yourself to at most {n} browser steps.\n"), - None => String::new(), - }; - let steps_label = match max_steps { - Some(n) => format!(", max {n} steps"), - None => String::new(), - }; + let max = max_steps.unwrap_or(config.browse_max_steps); app.entries.push(ChatEntry::system(format!( - "Browser agent{policy_label} starting{steps_label}: {goal}" + "🌐 /browse started — goal: {goal} (max {max} steps, policy: {policy:?})" ))); - app.entries.push(ChatEntry::user(input.clone())); app.scroll_to_bottom(); app.start_loading(); - let prompt = format!( - "You are an autonomous browser agent.\n\ - Goal: {goal}\n\ - Policy:{policy_label}\n\ - {steps_note}\ - Use browser_navigate, browser_click, browser_fill_form, \ - browser_snapshot, and related browser tools to accomplish the goal. \ - Report your progress and final result." - ); - let mut snapshot = messages.clone(); - snapshot.push(Message { - role: Role::User, - content: vec![ContentBlock::Text { text: prompt }], - }); - let c2 = client.clone(); - let tvec = tools.to_vec(); + + // Create channels for progress events and approval prompts + let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); + let (approval_tx, approval_rx) = tokio::sync::mpsc::channel(4); + app.browse_progress_rx = Some(progress_rx); + app.browse_approval_rx = Some(approval_rx); + + // Shared current-URL state + let current_url = std::sync::Arc::new(tokio::sync::Mutex::new(String::new())); + let cfg = config.clone(); - let tx2 = tx.clone(); - let sp = system_prompt.clone(); - let ps = perm_state.clone(); - let pm = app.plan_mode; - let sid3 = session.id.clone(); - let handle = tokio::spawn(async move { - run_api_task(ApiTask { - client: c2, - tools: tvec, - messages: snapshot, - config: cfg, - perm_state: ps, - system_prompt: sp, - tx: tx2, - plan_mode: pm, - session_id: sid3, - }) - .await; + let all_tools = tools.to_vec(); + + let browse_req = crate::browser::browse_loop::BrowseRequest { + goal, + policy, + max_steps: max, + voice: false, + }; + tokio::spawn(async move { + let result = crate::browser::browse_loop::run_browse( + browse_req, + &cfg, + all_tools, + current_url, + progress_tx, + approval_tx, + ).await; + if let Err(e) = result { + eprintln!("Browse error: {e}"); + } }); - app.api_task = Some(handle.abort_handle()); } CommandAction::BrowseUrl(url) => { // Ship a prompt to the agent loop — it will invoke the shared From 76af64381cdaf81f020a3102dce3f5ce2dcc6fd4 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:01:54 -0700 Subject: [PATCH 10/16] feat(browser): --yolo first-run acknowledgment Co-Authored-By: Arch Linux --- src/browser/mod.rs | 1 + src/browser/yolo_ack.rs | 43 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/browser/yolo_ack.rs diff --git a/src/browser/mod.rs b/src/browser/mod.rs index cbb9588..c3dfc7a 100644 --- a/src/browser/mod.rs +++ b/src/browser/mod.rs @@ -8,6 +8,7 @@ pub mod extraction; pub mod loop_detector; pub mod middleware; pub mod snapshot; +pub mod yolo_ack; use anyhow::{Result, bail}; use cdp::CdpClient; diff --git a/src/browser/yolo_ack.rs b/src/browser/yolo_ack.rs new file mode 100644 index 0000000..5d40797 --- /dev/null +++ b/src/browser/yolo_ack.rs @@ -0,0 +1,43 @@ +//! First-run --yolo acknowledgment. +//! +//! Writes a timestamp+version file to $XDG_STATE_HOME/rustyclaw/yolo-ack +//! on first --yolo use. Subsequent runs are silent. + +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +fn ack_path() -> PathBuf { + let state_home = std::env::var("XDG_STATE_HOME") + .map(PathBuf::from) + .unwrap_or_else(|_| { + dirs::home_dir() + .unwrap_or_else(|| { + std::env::var("HOME") + .map(PathBuf::from) + .unwrap_or_default() + }) + .join(".local/state") + }); + state_home.join("rustyclaw").join("yolo-ack") +} + +pub fn is_acknowledged() -> bool { + ack_path().exists() +} + +pub fn acknowledge() -> std::io::Result<()> { + let p = ack_path(); + if let Some(parent) = p.parent() { + fs::create_dir_all(parent)?; + } + // Format: seconds since epoch as ISO-8601-ish timestamp (no chrono dep) + let secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let contents = format!("{secs} rustyclaw v{VERSION}\n"); + fs::write(p, contents) +} From 84b135233b3a2fa49d132278ae0c0a4c80f61eb9 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:03:30 -0700 Subject: [PATCH 11/16] feat(sdk): browse/start request + progress/approval/completed notifications Co-Authored-By: Arch Linux --- src/sdk/mod.rs | 8 +++++++- src/sdk/protocol.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/sdk/mod.rs b/src/sdk/mod.rs index 2c08c89..c7fd545 100644 --- a/src/sdk/mod.rs +++ b/src/sdk/mod.rs @@ -293,7 +293,8 @@ impl SdkServer { | SdkRequest::TurnInterrupt { id, .. } | SdkRequest::SessionResume { id, .. } | SdkRequest::SessionExport { id, .. } - | SdkRequest::CostReport { id, .. } => { + | SdkRequest::CostReport { id, .. } + | SdkRequest::BrowseStart { id, .. } => { transport .send_response(SdkResponse::Error { id, @@ -302,6 +303,11 @@ impl SdkServer { }) .await?; } + + // BrowseApprovalReply has no request id — it's a fire-and-forget host reply. + SdkRequest::BrowseApprovalReply { .. } => { + // Phase B: route to the waiting browse session's approval channel. + } } Ok(()) diff --git a/src/sdk/protocol.rs b/src/sdk/protocol.rs index ba0de80..c729f38 100644 --- a/src/sdk/protocol.rs +++ b/src/sdk/protocol.rs @@ -200,6 +200,25 @@ pub enum SdkRequest { #[serde(rename = "health/check")] HealthCheck { id: String }, + + #[serde(rename = "browse/start")] + BrowseStart { + id: String, + goal: String, + #[serde(default)] + policy: crate::browser::browse_loop::BrowsePolicy, + #[serde(default)] + max_steps: Option, + #[serde(default)] + yolo_ack: bool, + }, + + #[serde(rename = "browse/approval_reply")] + BrowseApprovalReply { + session_id: String, + step: u32, + approved: bool, + }, } // ── Responses (RustyClaw → Host, correlated by ID) ────────────────────────── @@ -355,4 +374,28 @@ pub enum SdkNotification { code: String, message: String, }, + + #[serde(rename = "browse/progress")] + BrowseProgress { + session_id: String, + step: u32, + action: String, + target: String, + }, + + #[serde(rename = "browse/approval_needed")] + BrowseApprovalNeeded { + session_id: String, + step: u32, + tool_name: String, + target_text: String, + url: String, + reason: String, + }, + + #[serde(rename = "browse/completed")] + BrowseCompleted { + session_id: String, + result: crate::browser::browse_loop::BrowseResult, + }, } From b9ed421dfc076fd08fabdfee3d9cc83139a73ae9 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:05:37 -0700 Subject: [PATCH 12/16] feat(sdk): browse/start dispatcher Wire BrowseStart SDK request to run_browse(): yolo_ack validation, progress/approval NDJSON forwarding, active session tracking. Co-Authored-By: Arch Linux --- src/sdk/mod.rs | 130 +++++++++++++++++++++++++++++++++++++++++++- src/sdk/protocol.rs | 6 ++ 2 files changed, 134 insertions(+), 2 deletions(-) diff --git a/src/sdk/mod.rs b/src/sdk/mod.rs index c7fd545..7b0b802 100644 --- a/src/sdk/mod.rs +++ b/src/sdk/mod.rs @@ -11,6 +11,7 @@ pub mod transport; pub use protocol::*; +use crate::browser::browse_loop::{BrowsePolicy, BrowseProgress, BrowseRequest, run_browse}; use crate::config::Config; use crate::tools::all_tools; use anyhow::Result; @@ -288,13 +289,138 @@ impl SdkServer { } } + // ── Browse Start ──────────────────────────────────────── + SdkRequest::BrowseStart { + id, + goal, + policy, + max_steps, + yolo_ack, + } => { + // Validate yolo_ack requirement + if policy == BrowsePolicy::Yolo && !yolo_ack { + transport + .send_response(SdkResponse::Error { + id, + code: "yolo_ack_required".into(), + message: "browse/start with policy=yolo requires yolo_ack=true".into(), + }) + .await?; + return Ok(()); + } + + // Generate a session ID for this browse run + let session_id = format!( + "browse-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() + ); + + // Clone config and build tools + let cfg = config.clone(); + let all_tools_list = all_tools(&cfg); + + // Channels: progress events from browse loop → notif forwarding task + let (progress_tx, mut progress_rx) = tokio::sync::mpsc::channel::(64); + // Channel: approval prompts from browse loop → host + let (approval_tx, mut approval_rx) = + tokio::sync::mpsc::channel::(4); + + let current_url = Arc::new(tokio::sync::Mutex::new(String::new())); + + // Respond immediately with session_id + transport + .send_response(SdkResponse::BrowseStarted { + id, + session_id: session_id.clone(), + }) + .await?; + + active_sessions.fetch_add(1, Ordering::Relaxed); + + // Forward BrowseProgress events as SdkNotification NDJSON + let fwd_notif_tx = notif_tx.clone(); + let fwd_sid = session_id.clone(); + tokio::spawn(async move { + while let Some(event) = progress_rx.recv().await { + let notif = match event { + BrowseProgress::Step { n, action, target } => { + SdkNotification::BrowseProgress { + session_id: fwd_sid.clone(), + step: n, + action, + target, + } + } + BrowseProgress::Completed(result) => { + SdkNotification::BrowseCompleted { + session_id: fwd_sid.clone(), + result, + } + } + BrowseProgress::Nudge { .. } | BrowseProgress::Started { .. } => { + // Not surfaced as SDK notifications + continue; + } + BrowseProgress::ApprovalNeeded { .. } => { + // Handled via approval_rx below + continue; + } + }; + let _ = fwd_notif_tx.send(notif); + } + }); + + // Forward ApprovalPrompt events as BrowseApprovalNeeded notifications + let appr_notif_tx = approval_out_tx.clone(); + let appr_sid = session_id.clone(); + tokio::spawn(async move { + while let Some(prompt) = approval_rx.recv().await { + let notif = SdkNotification::BrowseApprovalNeeded { + session_id: appr_sid.clone(), + step: prompt.step, + tool_name: prompt.tool_name, + target_text: prompt.target_text, + url: prompt.url, + reason: prompt.reason, + }; + let _ = appr_notif_tx.send(notif); + // Note: prompt.reply is dropped here — the gate will treat + // an unreceived reply as a deny in Phase A. Phase B will + // wire BrowseApprovalReply to fulfill this oneshot. + } + }); + + // Spawn the browse run + let browse_req = BrowseRequest { + goal, + policy, + max_steps: max_steps.unwrap_or(cfg.browse_max_steps), + voice: false, + }; + let session_counter = Arc::clone(active_sessions); + tokio::spawn(async move { + let _ = run_browse( + browse_req, + &cfg, + all_tools_list, + current_url, + progress_tx, + approval_tx, + ) + .await; + session_counter.fetch_sub(1, Ordering::Relaxed); + }); + } + // ── Not yet implemented (Phase A stubs) ───────────────── SdkRequest::TurnStart { id, .. } | SdkRequest::TurnInterrupt { id, .. } | SdkRequest::SessionResume { id, .. } | SdkRequest::SessionExport { id, .. } - | SdkRequest::CostReport { id, .. } - | SdkRequest::BrowseStart { id, .. } => { + | SdkRequest::CostReport { id, .. } => { transport .send_response(SdkResponse::Error { id, diff --git a/src/sdk/protocol.rs b/src/sdk/protocol.rs index c729f38..3cb01b2 100644 --- a/src/sdk/protocol.rs +++ b/src/sdk/protocol.rs @@ -268,6 +268,12 @@ pub enum SdkResponse { uptime_seconds: u64, }, + #[serde(rename = "browse/started")] + BrowseStarted { + id: String, + session_id: String, + }, + #[serde(rename = "error")] Error { id: String, From 0968dbf2831766d4776d9b47f37918bc5a56a664 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:07:53 -0700 Subject: [PATCH 13/16] feat(cli): rustyclaw browse subcommand Co-Authored-By: Arch Linux --- src/main.rs | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/src/main.rs b/src/main.rs index e5c0cfe..1a9a0a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -308,6 +308,20 @@ enum Commands { Doctor, /// Self-update to the latest release from GitHub Update, + /// Run the autonomous browser agent + Browse { + /// Goal for the browser agent + goal: Vec, + /// Skip all approval prompts (requires prior acknowledgment) + #[arg(long)] + yolo: bool, + /// Prompt for approval on every destructive action + #[arg(long)] + ask: bool, + /// Maximum number of steps (default: 50) + #[arg(long, default_value = "50")] + max_steps: u32, + }, } #[derive(Subcommand)] @@ -607,6 +621,86 @@ async fn main() -> Result<()> { Commands::Update => { return self_update().await; } + Commands::Browse { goal, yolo, ask, max_steps } => { + use crate::browser::browse_loop::{BrowsePolicy, BrowseRequest, BrowseProgress, run_browse}; + use tokio::sync::mpsc; + + let goal_str = goal.join(" "); + if goal_str.trim().is_empty() { + eprintln!("Error: browse requires a goal argument"); + std::process::exit(1); + } + + // Determine policy: --yolo > --ask > default (pattern-match) + let policy = if *yolo { + // First-time --yolo: write acknowledgment file if not yet present + if !crate::browser::yolo_ack::is_acknowledged() { + eprintln!( + "Warning: --yolo disables all approval prompts. \ + The browser agent will execute destructive actions without confirmation.\n\ + To proceed, this acknowledgment is recorded in your XDG state directory." + ); + if let Err(e) = crate::browser::yolo_ack::acknowledge() { + eprintln!("Warning: could not write yolo-ack file: {e}"); + } + } + BrowsePolicy::Yolo + } else if *ask { + BrowsePolicy::Ask + } else { + BrowsePolicy::Pattern + }; + + let req = BrowseRequest { + goal: goal_str.clone(), + policy, + max_steps: *max_steps, + voice: false, + }; + + let config = Config::load()?; + let tools = all_tools(&config); + let current_url = std::sync::Arc::new(tokio::sync::Mutex::new(String::new())); + + let (progress_tx, mut progress_rx) = mpsc::channel::(64); + // Approval channel: in CLI mode auto-deny (user must use --yolo or --ask interactively) + let (approval_tx, mut approval_rx) = mpsc::channel::(8); + + // Spawn task to handle approval prompts: prompt on stderr, read from stdin + let _approval_task = tokio::spawn(async move { + use std::io::Write; + while let Some(prompt) = approval_rx.recv().await { + eprint!( + "Approval needed [step {}]: {} on '{}' at {}\n Reason: {}\nAllow? [y/N] ", + prompt.step, prompt.tool_name, prompt.target_text, prompt.url, prompt.reason + ); + let _ = std::io::stderr().flush(); + let mut line = String::new(); + let allowed = if std::io::stdin().read_line(&mut line).is_ok() { + matches!(line.trim().to_lowercase().as_str(), "y" | "yes") + } else { + false + }; + let _ = prompt.reply.send(allowed); + } + }); + + // Spawn task to print progress as NDJSON + let progress_task = tokio::spawn(async move { + while let Some(event) = progress_rx.recv().await { + if let Ok(json) = serde_json::to_string(&event) { + println!("{json}"); + } + } + }); + + let result = run_browse(req, &config, tools, current_url, progress_tx, approval_tx).await?; + progress_task.await.ok(); + + // Print final result as JSON + println!("{}", serde_json::to_string_pretty(&result)?); + return Ok(()); + } } } From 9ded378d1770f142bdfdf8cd4beac52b0b6c2ff5 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:12:49 -0700 Subject: [PATCH 14/16] feat(voice): prefix routing + milestone TTS + approval listening Co-Authored-By: Arch Linux --- src/tui/app.rs | 6 ++ src/tui/events.rs | 2 + src/tui/run.rs | 40 ++++++++++++- src/voice.rs | 107 +++++++++++++++++++++++++++++++++++ tests/voice_routing_tests.rs | 27 +++++++++ 5 files changed, 181 insertions(+), 1 deletion(-) create mode 100644 tests/voice_routing_tests.rs diff --git a/src/tui/app.rs b/src/tui/app.rs index 3491bfb..4391721 100644 --- a/src/tui/app.rs +++ b/src/tui/app.rs @@ -1378,6 +1378,12 @@ impl App { self.voice_task = None; self.voice_stop_tx = None; } + AppEvent::VoiceBrowse(_goal) => { + // Recording state cleared here; run.rs handles the Browse dispatch. + self.voice_recording = false; + self.voice_task = None; + self.voice_stop_tx = None; + } AppEvent::PluginInstallDone { success, message } => { if success { self.entries.push(ChatEntry::system(message)); diff --git a/src/tui/events.rs b/src/tui/events.rs index a69e491..1640a6b 100644 --- a/src/tui/events.rs +++ b/src/tui/events.rs @@ -50,6 +50,8 @@ pub enum AppEvent { // through the AppEvent channel. /// Voice transcription completed — insert text into input buffer VoiceTranscription(String), + /// Voice transcription matched a browse prefix — dispatch as /browse + VoiceBrowse(String), /// Plugin install completed (success or failure) PluginInstallDone { success: bool, message: String }, /// GitHub upgrade check completed diff --git a/src/tui/run.rs b/src/tui/run.rs index a054b28..dd6d61e 100644 --- a/src/tui/run.rs +++ b/src/tui/run.rs @@ -1107,6 +1107,39 @@ async fn run_loop(mut config: Config, resume_id: Option) -> Result<()> { summary_len, }); } + AppEvent::VoiceBrowse(ref goal) => { + // Voice always uses Pattern policy — never Yolo (too easy + // to mis-transcribe destructive commands). + let goal_str = goal.clone(); + app.apply(ev); + let max = config.browse_max_steps; + app.entries.push(ChatEntry::system(format!( + "🌐 /browse (voice) — goal: {goal_str} (max {max} steps, policy: Pattern)" + ))); + app.scroll_to_bottom(); + app.start_loading(); + let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); + let (approval_tx, approval_rx) = tokio::sync::mpsc::channel(4); + app.browse_progress_rx = Some(progress_rx); + app.browse_approval_rx = Some(approval_rx); + let current_url = std::sync::Arc::new(tokio::sync::Mutex::new(String::new())); + let cfg = config.clone(); + let all_tools = tools.to_vec(); + let browse_req = crate::browser::browse_loop::BrowseRequest { + goal: goal_str, + policy: crate::browser::browse_loop::BrowsePolicy::Pattern, + max_steps: max, + voice: true, + }; + tokio::spawn(async move { + let result = crate::browser::browse_loop::run_browse( + browse_req, &cfg, all_tools, current_url, progress_tx, approval_tx, + ).await; + if let Err(e) = result { + eprintln!("Voice browse error: {e}"); + } + }); + } other => app.apply(other), } match rx.try_recv() { @@ -1719,7 +1752,12 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { )); } Ok(text) => { - let _ = tx2.send(AppEvent::VoiceTranscription(text)); + if crate::voice::voice_routes_to_browse(&text) { + let goal = crate::voice::strip_browse_prefix(&text); + let _ = tx2.send(AppEvent::VoiceBrowse(goal)); + } else { + let _ = tx2.send(AppEvent::VoiceTranscription(text)); + } } Err(e) => { let _ = diff --git a/src/voice.rs b/src/voice.rs index d288291..76db13d 100644 --- a/src/voice.rs +++ b/src/voice.rs @@ -770,6 +770,113 @@ async fn play_wav( Ok(()) } +// ── Browse prefix routing ───────────────────────────────────────────────────── + +/// Decide whether a voice transcript should enter autonomous browse mode. +/// Only unambiguous prefixes route to /browse; "find" is deliberately excluded +/// to avoid collisions with codebase/chat "find" intent. +pub fn voice_routes_to_browse(transcript: &str) -> bool { + let t = transcript.trim().to_lowercase(); + t.starts_with("browse ") + || t.starts_with("browser ") + || t.starts_with("web ") + || t.starts_with("go to ") + || t.starts_with("open ") + || t.starts_with("shop for ") + || t.starts_with("book ") + || t.starts_with("order ") +} + +/// Strip the browse prefix, leaving the goal text. +pub fn strip_browse_prefix(transcript: &str) -> String { + let t = transcript.trim(); + let lower = t.to_lowercase(); + for prefix in &[ + "browse ", "browser ", "web ", "go to ", "open ", "shop for ", "book ", "order ", + ] { + if lower.starts_with(prefix) { + return t[prefix.len()..].to_string(); + } + } + t.to_string() +} + +// ── Browse milestone TTS ────────────────────────────────────────────────────── + +pub enum BrowseMilestone { + Start, + GateTrip, + End, +} + +/// Speak one of the three browse milestones via TTS. +/// Only called when voice == true. Fire-and-forget: errors are silently ignored +/// so a missing TTS engine never blocks the browse loop. +pub async fn speak_browse_milestone(milestone: BrowseMilestone, text: &str) { + let phrase = match milestone { + BrowseMilestone::Start => format!("Searching for {text}"), + BrowseMilestone::GateTrip => text.to_string(), + BrowseMilestone::End => text.to_string(), + }; + // Create a dummy stop channel — milestone phrases are short; we never need + // to cancel them mid-word. + let (_stop_tx, stop_rx) = tokio::sync::oneshot::channel::<()>(); + let _ = speak(&phrase, None, stop_rx).await; +} + +// ── Voice approval listener ─────────────────────────────────────────────────── + +/// Listen for a voice approve/deny reply during an approval prompt. +/// Returns true if the user said "confirm"/"yes"/"approve"/"ok", false otherwise. +/// Times out after `timeout_secs`, returning false on timeout. +pub async fn await_voice_approval(timeout_secs: u64) -> bool { + // Requires a recorder to be available; return deny if none found. + let backend = match find_recorder() { + Some(b) => b, + None => return false, + }; + + let (stop_tx, stop_rx) = tokio::sync::oneshot::channel::<()>(); + + // Record for at most timeout_secs then stop automatically. + let record_task = tokio::spawn(async move { + match start_recording(&backend).await { + Ok(mut child) => { + tokio::select! { + _ = stop_rx => { + if let Some(pid) = child.id() { + let _ = tokio::process::Command::new("kill") + .args(["-2", &pid.to_string()]) + .status() + .await; + } + let _ = child.wait().await; + } + _ = child.wait() => {} + } + } + Err(_) => {} + } + }); + + // Wait for the timeout then signal the recorder to stop. + tokio::time::sleep(std::time::Duration::from_secs(timeout_secs)).await; + let _ = stop_tx.send(()); + let _ = record_task.await; + + // Transcribe and check for affirmative keywords. + match transcribe(None, None).await { + Ok(text) => { + let lower = text.trim().to_lowercase(); + lower.contains("confirm") + || lower.contains("yes") + || lower.contains("approve") + || lower.contains("ok") + } + Err(_) => false, + } +} + // ── Status display ──────────────────────────────────────────────────────────── pub fn voice_status(enabled: bool, tts_enabled: bool) -> String { diff --git a/tests/voice_routing_tests.rs b/tests/voice_routing_tests.rs new file mode 100644 index 0000000..74a5dd5 --- /dev/null +++ b/tests/voice_routing_tests.rs @@ -0,0 +1,27 @@ +use rustyclaw::voice::{voice_routes_to_browse, strip_browse_prefix}; + +#[test] +fn routes_browse_prefix() { + assert!(voice_routes_to_browse("browse find the flight")); + assert!(voice_routes_to_browse("Browser open flights")); + assert!(voice_routes_to_browse("web find something")); + assert!(voice_routes_to_browse("go to amazon.com")); + assert!(voice_routes_to_browse("book a flight")); + assert!(voice_routes_to_browse("shop for coffee")); + assert!(voice_routes_to_browse("order pizza")); +} + +#[test] +fn does_not_route_plain_find() { + assert!(!voice_routes_to_browse("find the bug in my code")); + assert!(!voice_routes_to_browse("what's the capital of france")); + assert!(!voice_routes_to_browse("search for todo items")); + assert!(!voice_routes_to_browse("look up documentation")); +} + +#[test] +fn strip_preserves_case() { + assert_eq!(strip_browse_prefix("browse Find Tokyo flights"), "Find Tokyo flights"); + assert_eq!(strip_browse_prefix("Book a Hotel in Paris"), "a Hotel in Paris"); + assert_eq!(strip_browse_prefix("GO TO example.com"), "example.com"); +} From 38775a49e8e6caf09d4abbab42a0349908e69521 Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:22:12 -0700 Subject: [PATCH 15/16] test(browser): integration suite for browse agent middleware + types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 13 tests covering: approval gate (standalone + middleware), loop detector (nudge firing, L3 stop, navigate reset), middleware composition (yolo bypass, channel-closed denial), type serialization (BrowseResult, BrowsePolicy, BrowseReason roundtrip), voice prefix routing. No network, no Chrome — all in-process with channels. Co-Authored-By: Arch Linux --- tests/browse_integration.rs | 199 ++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 tests/browse_integration.rs diff --git a/tests/browse_integration.rs b/tests/browse_integration.rs new file mode 100644 index 0000000..1b62d3f --- /dev/null +++ b/tests/browse_integration.rs @@ -0,0 +1,199 @@ +//! Integration tests for the autonomous browser agent. +//! +//! Tests middleware composition, sentinel parsing, type serialization, +//! stagnation termination, and denial-counter termination. +//! No network, no Chrome — all in-process with channels. + +use rustyclaw::browser::approval_gate::{ApprovalGate, ApprovalGateMiddleware, GateContext, GateVerdict}; +use rustyclaw::browser::browse_loop::{BrowsePolicy, BrowseReason, BrowseResult}; +use rustyclaw::browser::loop_detector::LoopDetectorMiddleware; +use rustyclaw::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use serde_json::json; +use std::sync::atomic::AtomicU32; +use std::sync::Arc; + +// ── Approval gate (standalone) ────────────────────────────────────────────── + +#[test] +fn gate_allows_read_only_tools() { + let gate = ApprovalGate::default(); + let ctx = GateContext { + tool_name: "browser_navigate".into(), + url: "https://evil.com/checkout".into(), + target_text: "Go".into(), + ..Default::default() + }; + assert!(matches!(gate.check(&ctx), GateVerdict::Allow)); +} + +#[test] +fn gate_trips_on_checkout_url() { + let gate = ApprovalGate::default(); + let ctx = GateContext { + tool_name: "browser_click".into(), + url: "https://shop.example.com/checkout".into(), + target_text: "Continue".into(), + ..Default::default() + }; + assert!(matches!(gate.check(&ctx), GateVerdict::RequireConfirmation { .. })); +} + +#[test] +fn gate_ignores_article_about_checkout() { + let gate = ApprovalGate::default(); + let ctx = GateContext { + tool_name: "browser_click".into(), + url: "https://blog.example.com/articles/checkout-guide".into(), + target_text: "Read More".into(), + ..Default::default() + }; + assert!(matches!(gate.check(&ctx), GateVerdict::Allow)); +} + +// ── Loop detector middleware ──────────────────────────────────────────────── + +#[tokio::test] +async fn loop_detector_middleware_fires_nudge() { + let (nudge_tx, mut nudge_rx) = tokio::sync::mpsc::channel(32); + let mw = LoopDetectorMiddleware::new(nudge_tx); + + // 3 identical after_tool calls → stagnation + for _ in 0..3 { + mw.after_tool("browser_click", "same page content").await; + } + + let nudge = nudge_rx.try_recv().expect("should have received a nudge"); + assert!(nudge.contains("different approach")); +} + +#[tokio::test] +async fn loop_detector_stops_at_level_three() { + let (nudge_tx, mut nudge_rx) = tokio::sync::mpsc::channel(32); + let mw = LoopDetectorMiddleware::new(nudge_tx); + + // Fire 3 levels of nudges — each fires after 3 identical after_tool calls + // But check_stagnation fires every call once threshold is met + for _ in 0..9 { + mw.after_tool("browser_click", "same").await; + } + + // Drain nudges + let mut nudges = Vec::new(); + while let Ok(n) = nudge_rx.try_recv() { + nudges.push(n); + } + assert!(!nudges.is_empty(), "should have received nudges"); + assert!(nudges.last().unwrap().contains("Stopping"), "last nudge should be terminal"); + + // After L3, before_tool should return Deny + let verdict = mw.before_tool("browser_click", &json!({})).await; + assert!(matches!(verdict, MiddlewareVerdict::Deny { .. })); + assert!(mw.is_stopped()); +} + +#[tokio::test] +async fn loop_detector_resets_on_navigate() { + let (nudge_tx, _nudge_rx) = tokio::sync::mpsc::channel(32); + let mw = LoopDetectorMiddleware::new(nudge_tx); + + mw.after_tool("browser_click", "same").await; + mw.after_tool("browser_click", "same").await; + // Navigate resets + mw.after_tool("browser_navigate", "new page").await; + mw.after_tool("browser_click", "same").await; + // Only 1 click after reset — not enough for stagnation + assert!(!mw.is_stopped()); +} + +// ── Approval gate middleware ──────────────────────────────────────────────── + +#[tokio::test] +async fn approval_middleware_allows_read_tools() { + let gate = ApprovalGate::default(); + let current_url = Arc::new(tokio::sync::Mutex::new("https://example.com".into())); + let (approval_tx, _) = tokio::sync::mpsc::channel(4); + let step = Arc::new(AtomicU32::new(0)); + + let mw = ApprovalGateMiddleware::new(gate, BrowsePolicy::Pattern, current_url, approval_tx, step); + + let verdict = mw.before_tool("browser_navigate", &json!({"url": "https://example.com"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Allow)); +} + +#[tokio::test] +async fn approval_middleware_yolo_allows_everything() { + let gate = ApprovalGate::default(); + let current_url = Arc::new(tokio::sync::Mutex::new("https://shop.com/checkout".into())); + let (approval_tx, _) = tokio::sync::mpsc::channel(4); + let step = Arc::new(AtomicU32::new(0)); + + let mw = ApprovalGateMiddleware::new(gate, BrowsePolicy::Yolo, current_url, approval_tx, step); + + let verdict = mw.before_tool("browser_click", &json!({"ref": "@e1"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Allow)); +} + +#[tokio::test] +async fn approval_middleware_denies_on_dropped_channel() { + let gate = ApprovalGate::default(); + let current_url = Arc::new(tokio::sync::Mutex::new("https://shop.com/checkout".into())); + let (approval_tx, approval_rx) = tokio::sync::mpsc::channel(4); + let step = Arc::new(AtomicU32::new(0)); + + drop(approval_rx); + + let mw = ApprovalGateMiddleware::new(gate, BrowsePolicy::Pattern, current_url, approval_tx, step); + + let verdict = mw.before_tool("browser_click", &json!({"ref": "@e1"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Deny { .. })); +} + +// ── Type serialization ────────────────────────────────────────────────────── + +#[test] +fn browse_result_roundtrip() { + let result = BrowseResult { + achieved: true, + summary: "Found cheapest flight: $847 United".into(), + reason: BrowseReason::Done, + steps_used: 12, + final_url: Some("https://flights.example.com/results".into()), + }; + let json = serde_json::to_string(&result).unwrap(); + let parsed: BrowseResult = serde_json::from_str(&json).unwrap(); + assert!(parsed.achieved); + assert_eq!(parsed.reason, BrowseReason::Done); + assert_eq!(parsed.steps_used, 12); + assert!(parsed.summary.contains("$847")); +} + +#[test] +fn browse_policy_serializes_lowercase() { + assert_eq!(serde_json::to_string(&BrowsePolicy::Pattern).unwrap(), r#""pattern""#); + assert_eq!(serde_json::to_string(&BrowsePolicy::Yolo).unwrap(), r#""yolo""#); + assert_eq!(serde_json::to_string(&BrowsePolicy::Ask).unwrap(), r#""ask""#); +} + +#[test] +fn browse_reason_serializes_snake_case() { + assert_eq!(serde_json::to_string(&BrowseReason::StepCap).unwrap(), r#""step_cap""#); + assert_eq!(serde_json::to_string(&BrowseReason::BrowserCrashed).unwrap(), r#""browser_crashed""#); + assert_eq!(serde_json::to_string(&BrowseReason::UserDenied).unwrap(), r#""user_denied""#); +} + +// ── Voice routing ─────────────────────────────────────────────────────────── + +#[test] +fn voice_routes_browse_not_find() { + use rustyclaw::voice::{voice_routes_to_browse, strip_browse_prefix}; + + assert!(voice_routes_to_browse("browse find flights to Tokyo")); + assert!(voice_routes_to_browse("book a hotel in Paris")); + assert!(voice_routes_to_browse("go to flights.google.com")); + assert!(!voice_routes_to_browse("find the bug in my code")); + assert!(!voice_routes_to_browse("search for todo items")); + assert!(!voice_routes_to_browse("what time is it")); + + assert_eq!(strip_browse_prefix("browse Find flights"), "Find flights"); + assert_eq!(strip_browse_prefix("Book a Hotel"), "a Hotel"); +} From 3b2630bfc295b1db3f89ace94ab9b46523ea7cca Mon Sep 17 00:00:00 2001 From: Yeti Paw <22755327+ForkedInTime@users.noreply.github.com> Date: Wed, 15 Apr 2026 22:23:46 -0700 Subject: [PATCH 16/16] docs: ship autonomous browser agent in README + CLAUDE.md Co-Authored-By: Arch Linux --- CLAUDE.md | 4 +--- README.md | 7 ++++++- scripts/readme-lint.sh | 8 ++++++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5e46018..430fdb3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,14 +35,12 @@ You are a 0.1% expert in computer science, systems programming, infrastructure, ### PHASE 2 (shipping now) - **Auto-fix loop (2026-04-10)** — Post-edit lint + tests + feedback-driven retries replace the old rollback revert. Anti-cheat protected. `autoFixLoop` in settings.json, `autoRollback` alias kept for backward compat. - **Auto git commits + /undo + /redo (2026-04-10)** — Per-turn working-tree snapshots on private shadow refs (`refs/rustyclaw/sessions/`). New `/undo`, `/redo`, `/autocommit` slash commands. Keeps 10 newest session refs with startup prune. Other tools with undo pollute history; RustyClaw's shadow refs are invisible to `git log`/`branch`/`status`. No competitor has `/redo`. +8. **Autonomous browser agent (2026-04-15)** — `/browse `, `rustyclaw browse`, `/voice` prefix routing. Goal-driven loop reuses the query_engine tool-use pipeline. 50-step cap, approval gate on destructive actions, loop_detector stagnation guard, milestone TTS for voice. SDK exposes `browse/start` + progress + approval + completed notifications. ### NEXT UP 6. **SDK/headless sidecar** — NDJSON stdio binary for editor embedding. Uncontested. 7. **Phase 2 robustness** — Diff review, self-update, shell completions. -### DEFERRED -- **Autonomous browser agent** — `/browse ` plan-act-evaluate loop with stagnation detection. Manual `browser_*` CDP tools ship today and cover the common cases. Previous scaffolding (`src/browser/loop_detector.rs`, `src/browser/planner.rs`) was removed 2026-04-12 to keep the codebase honest; re-design from scratch when ready. - ### THE PITCH "A single 19 MB static Rust binary that indexes your codebase, routes tasks to the cheapest model, runs parallel agents in worktrees, speaks in your voice, shows you every token spent, and works offline via Ollama. Sub-50ms startup. Zero dependencies. Zero flickering. XDG-compliant. AGENTS.md + CLAUDE.md." diff --git a/README.md b/README.md index edc0e95..fc05cd5 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,8 @@ rustyclaw | Model router | No | No | **Auto-route by task complexity** | | Parallel agents | No | No | **Git-worktree isolation** | | Voice I/O | No | No | **Whisper + XTTS v2 cloning** | -| Browser automation | External MCP server | No | **8 CDP tools, in the binary** | +| Browser automation | External MCP server | No | **9 CDP tools, in the binary** | +| Autonomous browser agent | No | No | **Goal-driven, 50-step cap, safety-gated** | | Auto-fix loop | No | No | **Post-edit lint + tests + retry** | | `/undo` · `/redo` | No | Partial (pollutes git log) | **Invisible shadow refs** | | OpenAI-compat providers | No | Partial | **9 providers, working tools** | @@ -141,6 +142,10 @@ Full tool use over Ollama's native format. Other Rust ports have had this broken Eight CDP-driven tools — `browser_navigate`, `browser_snapshot`, `browser_click`, `browser_fill`, `browser_screenshot`, `browser_get_text`, `browser_press_key`, `browser_wait` — shipped in the binary and enabled by default. Snapshots return a text tree with stable `@eN` element refs you can pass to click/fill. Works against any Chromium-based browser (Chrome, Chromium, Brave, Edge) you already have installed. No external automation server, no separate install. +### 🤖   Autonomous browser mode — `/browse ` + +Give it a goal, it drives. `/browse find the cheapest flight SF to Tokyo on July 7` navigates, fills forms, scrolls, reads results, and speaks the answer. 50-step hard cap (configurable), destructive-action approval gate (pauses at payment / delete / OAuth / free-trial-autobill), stagnation detector (escalating nudges when the model is stuck). `rustyclaw browse "" --json` runs the same loop headless from scripts or CI. `/voice` with prefixes `browse | browser | web | go to | open | shop for | book | order` drives it hands-free with milestone TTS at start, gate trip, and end. + ### 🦀   Single 19 MB static binary No runtime. No dependencies. No post-install scripts. `scp` it to a server and run. Cross-compiled for `x86_64-linux-gnu`, `aarch64-linux-gnu`, and `x86_64-linux-musl` on every release. diff --git a/scripts/readme-lint.sh b/scripts/readme-lint.sh index 83347be..ecaeaba 100755 --- a/scripts/readme-lint.sh +++ b/scripts/readme-lint.sh @@ -49,6 +49,14 @@ else err " fix: edit README.md (table row + /model line) to say \"${provider_count} providers\"" fi +# ── 4. Autonomous browser claim ────────────────────────────────────────────── +if grep -qF "Autonomous browser agent" README.md; then + ok "README.md has 'Autonomous browser agent' row" +else + err "README.md missing 'Autonomous browser agent' row (expected after /browse ship)" + err " fix: add '| Autonomous browser agent | No | No | ...' to the comparison table" +fi + # ── Result ─────────────────────────────────────────────────────────────────── if [ "$fail" -eq 0 ]; then echo ""