diff --git a/CLAUDE.md b/CLAUDE.md index 5e46018..430fdb3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,14 +35,12 @@ You are a 0.1% expert in computer science, systems programming, infrastructure, ### PHASE 2 (shipping now) - **Auto-fix loop (2026-04-10)** — Post-edit lint + tests + feedback-driven retries replace the old rollback revert. Anti-cheat protected. `autoFixLoop` in settings.json, `autoRollback` alias kept for backward compat. - **Auto git commits + /undo + /redo (2026-04-10)** — Per-turn working-tree snapshots on private shadow refs (`refs/rustyclaw/sessions/`). New `/undo`, `/redo`, `/autocommit` slash commands. Keeps 10 newest session refs with startup prune. Other tools with undo pollute history; RustyClaw's shadow refs are invisible to `git log`/`branch`/`status`. No competitor has `/redo`. +8. **Autonomous browser agent (2026-04-15)** — `/browse `, `rustyclaw browse`, `/voice` prefix routing. Goal-driven loop reuses the query_engine tool-use pipeline. 50-step cap, approval gate on destructive actions, loop_detector stagnation guard, milestone TTS for voice. SDK exposes `browse/start` + progress + approval + completed notifications. ### NEXT UP 6. **SDK/headless sidecar** — NDJSON stdio binary for editor embedding. Uncontested. 7. **Phase 2 robustness** — Diff review, self-update, shell completions. -### DEFERRED -- **Autonomous browser agent** — `/browse ` plan-act-evaluate loop with stagnation detection. Manual `browser_*` CDP tools ship today and cover the common cases. Previous scaffolding (`src/browser/loop_detector.rs`, `src/browser/planner.rs`) was removed 2026-04-12 to keep the codebase honest; re-design from scratch when ready. - ### THE PITCH "A single 19 MB static Rust binary that indexes your codebase, routes tasks to the cheapest model, runs parallel agents in worktrees, speaks in your voice, shows you every token spent, and works offline via Ollama. Sub-50ms startup. Zero dependencies. Zero flickering. XDG-compliant. AGENTS.md + CLAUDE.md." diff --git a/README.md b/README.md index edc0e95..fc05cd5 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,8 @@ rustyclaw | Model router | No | No | **Auto-route by task complexity** | | Parallel agents | No | No | **Git-worktree isolation** | | Voice I/O | No | No | **Whisper + XTTS v2 cloning** | -| Browser automation | External MCP server | No | **8 CDP tools, in the binary** | +| Browser automation | External MCP server | No | **9 CDP tools, in the binary** | +| Autonomous browser agent | No | No | **Goal-driven, 50-step cap, safety-gated** | | Auto-fix loop | No | No | **Post-edit lint + tests + retry** | | `/undo` · `/redo` | No | Partial (pollutes git log) | **Invisible shadow refs** | | OpenAI-compat providers | No | Partial | **9 providers, working tools** | @@ -141,6 +142,10 @@ Full tool use over Ollama's native format. Other Rust ports have had this broken Eight CDP-driven tools — `browser_navigate`, `browser_snapshot`, `browser_click`, `browser_fill`, `browser_screenshot`, `browser_get_text`, `browser_press_key`, `browser_wait` — shipped in the binary and enabled by default. Snapshots return a text tree with stable `@eN` element refs you can pass to click/fill. Works against any Chromium-based browser (Chrome, Chromium, Brave, Edge) you already have installed. No external automation server, no separate install. +### 🤖   Autonomous browser mode — `/browse ` + +Give it a goal, it drives. `/browse find the cheapest flight SF to Tokyo on July 7` navigates, fills forms, scrolls, reads results, and speaks the answer. 50-step hard cap (configurable), destructive-action approval gate (pauses at payment / delete / OAuth / free-trial-autobill), stagnation detector (escalating nudges when the model is stuck). `rustyclaw browse "" --json` runs the same loop headless from scripts or CI. `/voice` with prefixes `browse | browser | web | go to | open | shop for | book | order` drives it hands-free with milestone TTS at start, gate trip, and end. + ### 🦀   Single 19 MB static binary No runtime. No dependencies. No post-install scripts. `scp` it to a server and run. Cross-compiled for `x86_64-linux-gnu`, `aarch64-linux-gnu`, and `x86_64-linux-musl` on every release. diff --git a/scripts/readme-lint.sh b/scripts/readme-lint.sh index 83347be..ecaeaba 100755 --- a/scripts/readme-lint.sh +++ b/scripts/readme-lint.sh @@ -49,6 +49,14 @@ else err " fix: edit README.md (table row + /model line) to say \"${provider_count} providers\"" fi +# ── 4. Autonomous browser claim ────────────────────────────────────────────── +if grep -qF "Autonomous browser agent" README.md; then + ok "README.md has 'Autonomous browser agent' row" +else + err "README.md missing 'Autonomous browser agent' row (expected after /browse ship)" + err " fix: add '| Autonomous browser agent | No | No | ...' to the comparison table" +fi + # ── Result ─────────────────────────────────────────────────────────────────── if [ "$fail" -eq 0 ]; then echo "" diff --git a/src/browser/approval_gate.rs b/src/browser/approval_gate.rs new file mode 100644 index 0000000..9768666 --- /dev/null +++ b/src/browser/approval_gate.rs @@ -0,0 +1,366 @@ +//! Destructive-action approval gate for autonomous browser agent. +//! +//! Pattern-matches tool calls against URL paths, button text, form-field +//! signals, visible prices, and user-defined extension patterns. Read-only +//! tools always pass; everything else is checked against compiled RegexSets. + +use crate::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use async_trait::async_trait; +use regex::{Regex, RegexSet}; +use std::collections::HashMap; +use std::sync::atomic::{AtomicBool, AtomicU32, Ordering}; +use std::sync::{Arc, Mutex}; +use tokio::sync::{mpsc, oneshot}; + +/// Read-only tools that never require approval. +const READ_ONLY_TOOLS: &[&str] = &[ + "browser_navigate", + "browser_snapshot", + "browser_screenshot", + "browser_get_text", + "browser_wait", + "browse_done", +]; + +/// Verdict returned by the approval gate. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum GateVerdict { + Allow, + RequireConfirmation { reason: String, detail: String }, +} + +/// Context passed to the gate for each tool invocation. +#[derive(Debug, Clone, Default)] +pub struct GateContext { + pub tool_name: String, + pub url: String, + pub target_text: String, + /// e.g. "input:type=password", "input:autocomplete=cc-number" + pub form_field_signals: Vec, + /// e.g. "$12.99", "€0.00" + pub visible_prices: Vec, +} + +/// Compiled approval gate — all regexes are built once at construction. +pub struct ApprovalGate { + url_set: RegexSet, + button_set: RegexSet, + form_set: RegexSet, + price_re: Regex, + extra_patterns: Vec, +} + +// --- Built-in pattern strings --------------------------------------------------- + +fn url_patterns() -> Vec { + vec![ + r"/pay(ments?)?(/|\?|$)".into(), + r"/checkout(/|\?|$)".into(), + r"/purchase(/|\?|$)".into(), + r"/order-review".into(), + r"/billing/add-card".into(), + r"/wallet/transfer".into(), + r"/oauth/authorize".into(), + r"/consent".into(), + r"/authorize/grant".into(), + ] +} + +fn button_patterns() -> Vec { + vec![ + // Category 2 — payment / purchase + r"(?i)confirm (purchase|order|payment)".into(), + r"(?i)submit payment".into(), + r"(?i)place (my )?order".into(), + r"(?i)complete (purchase|order)".into(), + r"(?i)buy now".into(), + r"(?i)pay (now|\$)".into(), + r"(?i)start (free )?trial".into(), + r"(?i)try free for \d+ days?".into(), + r"(?i)upgrade (to premium|plan|account)".into(), + // Category 3 — account destruction + r"(?i)delete (account|repository|organization|workspace|project)".into(), + r"(?i)remove (account|user)".into(), + r"(?i)revoke (access|permissions|api key)".into(), + r"(?i)permanently delete".into(), + r"(?i)empty trash".into(), + r"(?i)cancel subscription".into(), + r"(?i)close account".into(), + r"(?i)deactivate".into(), + // Category 4 — publication / blast radius + r"(?i)post (tweet|publicly|to public)".into(), + r"(?i)publish (article|page|post)".into(), + r"(?i)go live".into(), + r"(?i)^tweet$".into(), + r"(?i)share publicly".into(), + r"(?i)send (email|message|invitation)".into(), + r"(?i)reply all".into(), + // Category 5 — OAuth / permission grants + r"(?i)(authorize|allow) (this )?(app|application|access)".into(), + r"(?i)grant (access|permissions)".into(), + r"(?i)i authorize".into(), + // Category 6 — contracts / legal + r"(?i)(accept|agree to) (terms|contract|agreement)".into(), + r"(?i)sign contract".into(), + r"(?i)sign electronically".into(), + r"(?i)i agree (and|to) (continue|proceed)".into(), + ] +} + +fn form_field_patterns() -> Vec { + vec![ + r"^input:type=password$".into(), + r"^input:autocomplete=cc-(number|exp|csc|name)$".into(), + r"^input:name=(card|cc|cvv|cvc|pin|ssn)$".into(), + r"^input:id=(card|cc|cvv|cvc|pin|ssn)$".into(), + ] +} + +const PRICE_PATTERN: &str = r"[\$€£]\s*(\d+\.\d{2}|\d+,\d{2})"; + +// --- Implementation ------------------------------------------------------------ + +impl Default for ApprovalGate { + fn default() -> Self { + Self::with_user_patterns(Vec::new()) + } +} + +impl ApprovalGate { + /// Create a gate, appending user-supplied regex patterns. + /// Invalid user patterns are logged to stderr and skipped. + pub fn with_user_patterns(user_patterns: Vec) -> Self { + let mut extras = Vec::new(); + for pat in &user_patterns { + match Regex::new(pat) { + Ok(re) => extras.push(re), + Err(e) => eprintln!("approval_gate: skipping invalid user pattern {pat:?}: {e}"), + } + } + + Self { + url_set: RegexSet::new(url_patterns()).expect("built-in URL patterns must compile"), + button_set: RegexSet::new(button_patterns()) + .expect("built-in button patterns must compile"), + form_set: RegexSet::new(form_field_patterns()) + .expect("built-in form patterns must compile"), + price_re: Regex::new(PRICE_PATTERN).expect("price pattern must compile"), + extra_patterns: extras, + } + } + + /// Evaluate a tool call context. Returns `Allow` or `RequireConfirmation`. + pub fn check(&self, c: &GateContext) -> GateVerdict { + // 1. Read-only tools always pass. + if READ_ONLY_TOOLS.contains(&c.tool_name.as_str()) { + return GateVerdict::Allow; + } + + let mut reasons: Vec = Vec::new(); + + // 2. URL patterns + if self.url_set.is_match(&c.url) { + reasons.push(format!("url_pattern: {}", c.url)); + } + + // 3. Button / text patterns (categories 2-6) + if self.button_set.is_match(&c.target_text) { + reasons.push(format!("button_text: {}", c.target_text)); + } + + // 4. Form field signals + for sig in &c.form_field_signals { + if self.form_set.is_match(sig) { + reasons.push(format!("form_field: {sig}")); + } + } + + // 5. Visible prices — skip zero amounts + for price in &c.visible_prices { + if let Some(caps) = self.price_re.captures(price) { + if let Some(amount_str) = caps.get(1) { + let normalized = amount_str.as_str().replace(',', "."); + if let Ok(val) = normalized.parse::() { + if val >= 0.01 { + reasons.push(format!("visible_price: {price}")); + } + } + } + } + } + + // 6. Extra (user-defined) patterns — checked against url + target_text + for re in &self.extra_patterns { + if re.is_match(&c.url) || re.is_match(&c.target_text) { + reasons.push(format!("user_pattern: {}", re.as_str())); + } + } + + // 7. Verdict + if reasons.is_empty() { + GateVerdict::Allow + } else { + GateVerdict::RequireConfirmation { + reason: reasons[0].clone(), + detail: reasons.join("; "), + } + } + } +} + +// ── Middleware bridge ───────────────────────────────────────────────────────── + +/// Approval prompt sent to the host (TUI/SDK/voice). +pub struct ApprovalPrompt { + pub step: u32, + pub tool_name: String, + pub target_text: String, + pub url: String, + pub reason: String, + pub reply: oneshot::Sender, +} + +pub struct ApprovalGateMiddleware { + gate: ApprovalGate, + policy: crate::browser::browse_loop::BrowsePolicy, + current_url: Arc>, + approval_tx: mpsc::Sender, + step_counter: Arc, + /// Tracks consecutive denial count per action key ("{tool_name}:{target_text}"). + denial_counts: Mutex>, + /// Set when the same action is denied twice — triggers session termination. + user_denied: AtomicBool, +} + +impl ApprovalGateMiddleware { + pub fn new( + gate: ApprovalGate, + policy: crate::browser::browse_loop::BrowsePolicy, + current_url: Arc>, + approval_tx: mpsc::Sender, + step_counter: Arc, + ) -> Self { + Self { + gate, + policy, + current_url, + approval_tx, + step_counter, + denial_counts: Mutex::new(HashMap::new()), + user_denied: AtomicBool::new(false), + } + } + + /// Returns true if the user denied the same action twice, triggering termination. + pub fn is_user_denied(&self) -> bool { + self.user_denied.load(Ordering::SeqCst) + } +} + +#[async_trait] +impl ToolMiddleware for ApprovalGateMiddleware { + async fn before_tool(&self, tool_name: &str, input: &serde_json::Value) -> MiddlewareVerdict { + use crate::browser::browse_loop::BrowsePolicy; + + // If user already denied twice, block everything. + if self.user_denied.load(Ordering::SeqCst) { + return MiddlewareVerdict::Deny { + reason: "User denied this action twice. Terminating browse session.".into(), + }; + } + + // Yolo policy: always allow. + if self.policy == BrowsePolicy::Yolo { + return MiddlewareVerdict::Allow; + } + + // Read-only tools always pass, regardless of policy. + if READ_ONLY_TOOLS.contains(&tool_name) { + return MiddlewareVerdict::Allow; + } + + let url = self.current_url.lock().await.clone(); + let target_text = input["ref"] + .as_str() + .or_else(|| input["selector"].as_str()) + .unwrap_or("") + .to_string(); + + let gate_ctx = GateContext { + tool_name: tool_name.to_string(), + url: url.clone(), + target_text: target_text.clone(), + form_field_signals: Vec::new(), + visible_prices: Vec::new(), + }; + + // Ask policy: force confirmation for every non-read-only tool. + let verdict = if self.policy == BrowsePolicy::Ask { + GateVerdict::RequireConfirmation { + reason: "ask policy".to_string(), + detail: format!("{tool_name} on {url}"), + } + } else { + // Pattern policy: delegate to the compiled gate. + self.gate.check(&gate_ctx) + }; + + match verdict { + GateVerdict::Allow => { + // Clear denial counter on approval for this action key. + let key = format!("{tool_name}:{target_text}"); + self.denial_counts.lock().unwrap().remove(&key); + MiddlewareVerdict::Allow + } + GateVerdict::RequireConfirmation { reason, .. } => { + let step = self.step_counter.load(Ordering::Relaxed); + let (tx, rx) = oneshot::channel(); + let prompt = ApprovalPrompt { + step, + tool_name: tool_name.to_string(), + target_text: target_text.clone(), + url, + reason: reason.clone(), + reply: tx, + }; + // If the host receiver is gone, deny by default. + if self.approval_tx.send(prompt).await.is_err() { + return MiddlewareVerdict::Deny { + reason: "approval channel closed".to_string(), + }; + } + match rx.await { + Ok(true) => { + // Approved — clear denial counter for this action. + let key = format!("{tool_name}:{target_text}"); + self.denial_counts.lock().unwrap().remove(&key); + MiddlewareVerdict::Allow + } + _ => { + // Denied — increment counter; terminate after 2 denials on same action. + let key = format!("{tool_name}:{target_text}"); + let count = { + let mut counts = self.denial_counts.lock().unwrap(); + let entry = counts.entry(key).or_insert(0); + *entry += 1; + *entry + }; + if count >= 2 { + self.user_denied.store(true, Ordering::SeqCst); + return MiddlewareVerdict::Deny { + reason: "User denied this action twice. Terminating browse session." + .into(), + }; + } + MiddlewareVerdict::Deny { reason } + } + } + } + } + } + + async fn after_tool(&self, _tool_name: &str, _output: &str) { + // Increment step counter after each tool execution. + self.step_counter.fetch_add(1, Ordering::Relaxed); + } +} diff --git a/src/browser/browse_loop.rs b/src/browser/browse_loop.rs new file mode 100644 index 0000000..e099147 --- /dev/null +++ b/src/browser/browse_loop.rs @@ -0,0 +1,302 @@ +use anyhow::Result; +use serde::{Deserialize, Serialize}; +use std::sync::atomic::AtomicU32; +use std::sync::Arc; +use tokio::sync::mpsc; + +use crate::browser::approval_gate::{ApprovalGate, ApprovalGateMiddleware, ApprovalPrompt}; +use crate::browser::loop_detector::LoopDetectorMiddleware; +use crate::config::Config; +use crate::query_engine::QueryEngine; +use crate::tools::DynTool; + +/// Policy for the approval gate during this run. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum BrowsePolicy { + Pattern, + Ask, + Yolo, +} + +impl Default for BrowsePolicy { + fn default() -> Self { Self::Pattern } +} + +/// A single browse-run configuration. +#[derive(Debug, Clone)] +pub struct BrowseRequest { + pub goal: String, + pub policy: BrowsePolicy, + pub max_steps: u32, + pub voice: bool, +} + +/// Progress events streamed to the caller during a run. +#[derive(Debug, Clone, Serialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum BrowseProgress { + Started { goal: String, max_steps: u32 }, + Step { n: u32, action: String, target: String }, + Nudge { level: u8, text: String }, + ApprovalNeeded { step: u32, action: String, target_text: String, url: String, reason: String }, + Completed(BrowseResult), +} + +/// Final result of a browse run. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BrowseResult { + pub achieved: bool, + pub summary: String, + pub reason: BrowseReason, + pub steps_used: u32, + pub final_url: Option, +} + +/// Why the loop terminated. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "snake_case")] +pub enum BrowseReason { + Done, + Bailed, + StepCap, + Stagnation, + Budget, + BrowserCrashed, + UserDenied, + Cancelled, +} + +/// Build the browse-agent system prompt. +fn build_browse_system_prompt(goal: &str, max_steps: u32) -> String { + format!( + "You are an autonomous browser agent.\n\ + \n\ + Goal: {goal}\n\ + \n\ + Instructions:\n\ + - Use the browser_* tools to navigate, inspect, and act on web pages.\n\ + - Take exactly one action per turn. Observe the result before planning the next step.\n\ + - When you believe the goal is achieved (or you're stuck), call browse_done(summary, achieved).\n\ + - Keep summaries under 2 sentences — they may be spoken aloud.\n\ + - You have {max_steps} steps total.\n\ + - If approval is denied, try a different approach or call browse_done(achieved=false)." + ) +} + +/// Filter the tool list down to browser_* + browse_done tools. +fn filter_browser_tools(tools: &[DynTool]) -> Vec { + tools + .iter() + .filter(|t| { + let name = t.name(); + name.starts_with("browser_") || name == "browse_done" + }) + .cloned() + .collect() +} + +/// Parse the BROWSE_DONE sentinel from assistant text. +/// Returns (achieved, summary) if the sentinel is found. +fn parse_browse_done(text: &str) -> Option<(bool, String)> { + if !text.contains("BROWSE_DONE") { + return None; + } + let achieved = text.contains("achieved=true"); + let summary = text + .find("summary=") + .map(|i| text[i + 8..].trim().to_string()) + .unwrap_or_default(); + Some((achieved, summary)) +} + +/// Orchestrate an autonomous browser agent run. +pub async fn run_browse( + req: BrowseRequest, + config: &Config, + tools: Vec, + current_url: Arc>, + progress_tx: mpsc::Sender, + approval_tx: mpsc::Sender, +) -> Result { + // 1. Emit Started event. + let _ = progress_tx + .send(BrowseProgress::Started { + goal: req.goal.clone(), + max_steps: req.max_steps, + }) + .await; + + // 2. Shared step counter for the approval prompt. + let step_counter = Arc::new(AtomicU32::new(0)); + + // 3. Build the approval gate from config patterns. + let gate = ApprovalGate::with_user_patterns(config.browse_approval_patterns.clone()); + let gate_mw = Arc::new(ApprovalGateMiddleware::new( + gate, + req.policy, + current_url.clone(), + approval_tx, + step_counter.clone(), + )); + + // 4. Build the loop detector middleware. + let (nudge_tx, mut nudge_rx) = mpsc::channel::(16); + let loop_mw = Arc::new(LoopDetectorMiddleware::new(nudge_tx)); + + // 5. Assemble middleware chain (keep Arc refs for post-run inspection). + let middlewares: crate::browser::middleware::MiddlewareChain = vec![ + gate_mw.clone() as Arc, + loop_mw.clone() as Arc, + ]; + + // 6. Build browse-specific system prompt. + let system_prompt = build_browse_system_prompt(&req.goal, req.max_steps); + + // 7. Filter tools to browser_* + browse_done only. + let browser_tools = filter_browser_tools(&tools); + + // 8. Override config's max_turns to the browse step cap. + let mut browse_config = config.clone(); + browse_config.max_turns = req.max_steps; + + // 9. Create the browse-mode query engine. + let mut engine = + QueryEngine::new_for_browse(browse_config, browser_tools, system_prompt, middlewares)?; + + // 10. Spawn a task to forward nudges as BrowseProgress events. + let progress_tx_nudge = progress_tx.clone(); + let step_counter_nudge = step_counter.clone(); + let nudge_handle = tokio::spawn(async move { + let mut level: u8 = 0; + while let Some(text) = nudge_rx.recv().await { + level = level.saturating_add(1); + // Update step counter from the nudge level for progress reporting. + let _ = step_counter_nudge.load(std::sync::atomic::Ordering::Relaxed); + let _ = progress_tx_nudge + .send(BrowseProgress::Nudge { level, text }) + .await; + } + }); + + // 11. Run the agentic loop. + let query_result = engine.query(&req.goal).await; + + // 12. Shut down the nudge forwarder. + nudge_handle.abort(); + + // 13. Determine the result. + let steps_used = engine.turns_used(); + let final_url = { + let url = current_url.lock().await; + if url.is_empty() { None } else { Some(url.clone()) } + }; + + // Check middleware termination flags first — they override sentinel parsing. + let middleware_reason = if loop_mw.is_stopped() { + Some(BrowseReason::Stagnation) + } else if gate_mw.is_user_denied() { + Some(BrowseReason::UserDenied) + } else { + None + }; + + let result = match query_result { + Ok(()) => { + // Bug 1 fix: BROWSE_DONE sentinel is emitted by BrowseDoneTool::execute() + // which returns it as a tool result (user-role ContentBlock::ToolResult), + // NOT as assistant text. Check tool results first, then assistant text as fallback. + let sentinel_text = engine + .last_tool_result_text() + .and_then(|t| parse_browse_done(&t).map(|r| (t, r))) + .or_else(|| { + engine + .last_assistant_text() + .and_then(|t| parse_browse_done(&t).map(|r| (t, r))) + }); + + if let Some((_raw, (achieved, summary))) = sentinel_text { + let mw_active = middleware_reason.is_some(); + let reason = middleware_reason.unwrap_or(if achieved { + BrowseReason::Done + } else { + BrowseReason::Bailed + }); + BrowseResult { + achieved: achieved && !mw_active, + summary, + reason, + steps_used, + final_url, + } + } else if let Some(reason) = middleware_reason { + // Middleware stopped the loop but no sentinel was found. + BrowseResult { + achieved: false, + summary: match reason { + BrowseReason::Stagnation => { + "Agent terminated: repeated same action with no progress".to_string() + } + BrowseReason::UserDenied => { + "Agent terminated: user denied the action twice".to_string() + } + _ => "Agent terminated by middleware".to_string(), + }, + reason, + steps_used, + final_url, + } + } else if let Some(text) = engine.last_assistant_text() { + // No sentinel, no middleware stop — engine stopped for other reasons. + let reason = if steps_used >= req.max_steps { + BrowseReason::StepCap + } else { + BrowseReason::Done + }; + BrowseResult { + achieved: false, + summary: text.chars().take(200).collect(), + reason, + steps_used, + final_url, + } + } else { + // No assistant messages at all. + BrowseResult { + achieved: false, + summary: "No response from browse agent".to_string(), + reason: BrowseReason::Bailed, + steps_used, + final_url, + } + } + } + Err(e) => { + let msg = e.to_string(); + let reason = middleware_reason.unwrap_or_else(|| { + if msg.contains("budget") || msg.contains("Budget") { + BrowseReason::Budget + } else if msg.contains("browser") || msg.contains("CDP") || msg.contains("Chrome") + { + BrowseReason::BrowserCrashed + } else { + BrowseReason::Bailed + } + }); + BrowseResult { + achieved: false, + summary: format!("Browse agent error: {msg}"), + reason, + steps_used, + final_url, + } + } + }; + + // 14. Emit Completed event. + let _ = progress_tx + .send(BrowseProgress::Completed(result.clone())) + .await; + + Ok(result) +} diff --git a/src/browser/loop_detector.rs b/src/browser/loop_detector.rs new file mode 100644 index 0000000..dba9465 --- /dev/null +++ b/src/browser/loop_detector.rs @@ -0,0 +1,158 @@ +use crate::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use async_trait::async_trait; +use sha2::{Digest, Sha256}; +use std::collections::VecDeque; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Mutex; +use tokio::sync::mpsc; + +const WINDOW_SIZE: usize = 10; +const REPEAT_THRESHOLD: usize = 3; + +const NUDGES: [&str; 3] = [ + "You seem to be repeating the same action with no effect. Try a different approach — \ + perhaps a different element, a different selector, or navigate to a different section.", + "This action has failed multiple times on the same page state. Consider: \ + (1) the element may be disabled or overlaid, (2) you may need to scroll first, \ + (3) try using JavaScript evaluation as a fallback, (4) the page may require authentication.", + "Stopping — the browser agent has repeated the same action 3 times with no progress. \ + The page may be stuck, require a CAPTCHA, or the target element may not be interactable.", +]; + +#[derive(Clone, Debug, PartialEq, Eq)] +struct ActionFingerprint { + action_hash: String, + page_hash: String, +} + +pub struct LoopDetector { + window: VecDeque, + nudge_level: usize, +} + +impl LoopDetector { + pub fn new() -> Self { + Self { + window: VecDeque::new(), + nudge_level: 0, + } + } + + /// Record an action. Hashes action_type+target and page_text, appends to window, + /// pruning to WINDOW_SIZE. + pub fn record_action(&mut self, action_type: &str, target: &str, page_text: &str) { + let fp = ActionFingerprint { + action_hash: hash_string(&format!("{action_type}:{target}")), + page_hash: hash_string(page_text), + }; + self.window.push_back(fp); + if self.window.len() > WINDOW_SIZE { + self.window.pop_front(); + } + } + + /// Check whether the last REPEAT_THRESHOLD entries are identical. + /// Returns an escalating nudge string if stagnation is detected, None otherwise. + /// Nudge level advances each time stagnation is confirmed (capped at NUDGES.len()-1). + pub fn check_stagnation(&mut self) -> Option { + if self.window.len() < REPEAT_THRESHOLD { + return None; + } + let tail_start = self.window.len() - REPEAT_THRESHOLD; + let tail: Vec<_> = self.window.range(tail_start..).collect(); + let first = &tail[0]; + let all_same = tail.iter().all(|fp| fp == first); + if !all_same { + return None; + } + let level = self.nudge_level.min(NUDGES.len() - 1); + let nudge = NUDGES[level].to_string(); + self.nudge_level = (self.nudge_level + 1).min(NUDGES.len() - 1); + Some(nudge) + } + + /// Clear the window and reset escalation state. + pub fn reset(&mut self) { + self.window.clear(); + self.nudge_level = 0; + } + + pub fn window_len(&self) -> usize { + self.window.len() + } +} + +impl Default for LoopDetector { + fn default() -> Self { + Self::new() + } +} + +/// Public helper: SHA-256 of "{action_type}:{target}". +pub fn fingerprint_action(action_type: &str, target: &str, _extra: &str) -> String { + hash_string(&format!("{action_type}:{target}")) +} + +fn hash_string(s: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(s.as_bytes()); + format!("{:x}", hasher.finalize()) +} + +// ── Middleware bridge ───────────────────────────────────────────────────────── + +pub struct LoopDetectorMiddleware { + inner: Mutex, + nudge_tx: mpsc::Sender, + stopped: AtomicBool, +} + +impl LoopDetectorMiddleware { + pub fn new(nudge_tx: mpsc::Sender) -> Self { + Self { + inner: Mutex::new(LoopDetector::new()), + nudge_tx, + stopped: AtomicBool::new(false), + } + } + + /// Returns true if the loop detector reached terminal stagnation (level 3). + pub fn is_stopped(&self) -> bool { + self.stopped.load(Ordering::SeqCst) + } +} + +#[async_trait] +impl ToolMiddleware for LoopDetectorMiddleware { + async fn before_tool(&self, _tool_name: &str, _input: &serde_json::Value) -> MiddlewareVerdict { + if self.stopped.load(Ordering::SeqCst) { + return MiddlewareVerdict::Deny { + reason: "Stagnation detected: the browser agent has been stopped after \ + repeating the same action with no effect." + .into(), + }; + } + MiddlewareVerdict::Allow + } + + async fn after_tool(&self, tool_name: &str, output: &str) { + // browser_navigate resets the detector (new page = fresh state). + if tool_name == "browser_navigate" { + self.inner.lock().unwrap().reset(); + return; + } + let nudge = { + let mut ld = self.inner.lock().unwrap(); + ld.record_action(tool_name, "", output); + ld.check_stagnation() + }; + if let Some(nudge) = nudge { + // If this is the terminal nudge (level 3 — contains "Stopping"), + // set the stopped flag so before_tool denies subsequent calls. + if nudge.contains("Stopping") { + self.stopped.store(true, Ordering::SeqCst); + } + let _ = self.nudge_tx.send(nudge).await; + } + } +} diff --git a/src/browser/middleware.rs b/src/browser/middleware.rs new file mode 100644 index 0000000..7805214 --- /dev/null +++ b/src/browser/middleware.rs @@ -0,0 +1,33 @@ +//! ToolMiddleware — pluggable pre/post hook trait around every tool call. +//! +//! Default empty chain (no middlewares = existing behavior unchanged). +//! The approval gate and loop detector implement this trait. + +use async_trait::async_trait; +use serde_json::Value; +use std::sync::Arc; + +/// Result of a middleware's `before_tool` check. +#[derive(Debug, Clone)] +pub enum MiddlewareVerdict { + /// Allow the tool to proceed. + Allow, + /// Block the tool with an error reason. + Deny { reason: String }, + /// Request confirmation before proceeding. + /// Treated as Deny until the approval gate resolves it internally. + RequireConfirmation { reason: String, detail: String }, +} + +/// Extension point invoked before and after every tool execution. +#[async_trait] +pub trait ToolMiddleware: Send + Sync { + /// Called before a tool runs. Return `Deny` or `RequireConfirmation` to block. + async fn before_tool(&self, tool_name: &str, input: &Value) -> MiddlewareVerdict; + + /// Called after a tool runs with its output text. + async fn after_tool(&self, tool_name: &str, output: &str); +} + +/// Ordered list of middlewares applied to every tool call. +pub type MiddlewareChain = Vec>; diff --git a/src/browser/mod.rs b/src/browser/mod.rs index 21ee48b..c3dfc7a 100644 --- a/src/browser/mod.rs +++ b/src/browser/mod.rs @@ -1,9 +1,14 @@ //! Browser automation via Chrome DevTools Protocol. -pub mod cdp; -pub mod snapshot; pub mod actions; +pub mod browse_loop; +pub mod approval_gate; +pub mod cdp; pub mod element; pub mod extraction; +pub mod loop_detector; +pub mod middleware; +pub mod snapshot; +pub mod yolo_ack; use anyhow::{Result, bail}; use cdp::CdpClient; diff --git a/src/browser/yolo_ack.rs b/src/browser/yolo_ack.rs new file mode 100644 index 0000000..5d40797 --- /dev/null +++ b/src/browser/yolo_ack.rs @@ -0,0 +1,43 @@ +//! First-run --yolo acknowledgment. +//! +//! Writes a timestamp+version file to $XDG_STATE_HOME/rustyclaw/yolo-ack +//! on first --yolo use. Subsequent runs are silent. + +use std::fs; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +fn ack_path() -> PathBuf { + let state_home = std::env::var("XDG_STATE_HOME") + .map(PathBuf::from) + .unwrap_or_else(|_| { + dirs::home_dir() + .unwrap_or_else(|| { + std::env::var("HOME") + .map(PathBuf::from) + .unwrap_or_default() + }) + .join(".local/state") + }); + state_home.join("rustyclaw").join("yolo-ack") +} + +pub fn is_acknowledged() -> bool { + ack_path().exists() +} + +pub fn acknowledge() -> std::io::Result<()> { + let p = ack_path(); + if let Some(parent) = p.parent() { + fs::create_dir_all(parent)?; + } + // Format: seconds since epoch as ISO-8601-ish timestamp (no chrono dep) + let secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let contents = format!("{secs} rustyclaw v{VERSION}\n"); + fs::write(p, contents) +} diff --git a/src/commands/mod.rs b/src/commands/mod.rs index e8b8112..a0421e7 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -294,6 +294,12 @@ pub enum CommandAction { Redo { n: Option }, /// `/autocommit [status]` — print auto-commit state to the chat. v1 only supports `status`. AutoCommitStatus, + /// Start an autonomous browser run. + Browse { + goal: String, + policy: crate::browser::browse_loop::BrowsePolicy, + max_steps: Option, + }, /// Launch browser and optionally navigate to URL BrowseUrl(String), /// Take a screenshot of the current browser page @@ -386,7 +392,7 @@ pub fn dispatch(input: &str, ctx: &CommandContext) -> CommandAction { "undo" => cmd_undo(args), "redo" => cmd_redo(args), "autocommit" => cmd_autocommit(args), - "browser" | "browse" => { + "browser" => { let url = args.trim().to_string(); if url == "close" { CommandAction::BrowserClose @@ -396,6 +402,7 @@ pub fn dispatch(input: &str, ctx: &CommandContext) -> CommandAction { CommandAction::BrowseUrl(url) } } + "browse" => parse_browse_command(args.trim()), "screenshot" => CommandAction::BrowserScreenshot, "branch" => cmd_branch(ctx), "summary" => CommandAction::SendPrompt( @@ -527,6 +534,45 @@ fn split_first_word(s: &str) -> (&str, &str) { } } +/// Parse `/browse` arguments into a `CommandAction::Browse` variant. +/// +/// Supported flags (all optional, may appear in any order before the goal): +/// - `--yolo` → `BrowsePolicy::Yolo` +/// - `--ask` → `BrowsePolicy::Ask` +/// - `--max-steps ` → `max_steps = Some(N)` +/// +/// Remaining tokens after flag removal form the `goal` string. +pub fn parse_browse_command(input: &str) -> CommandAction { + use crate::browser::browse_loop::BrowsePolicy; + let mut policy = BrowsePolicy::Pattern; + let mut max_steps: Option = None; + let mut tokens: Vec<&str> = input.split_whitespace().collect(); + let mut i = 0; + while i < tokens.len() { + match tokens[i] { + "--yolo" => { + policy = BrowsePolicy::Yolo; + tokens.remove(i); + } + "--ask" => { + policy = BrowsePolicy::Ask; + tokens.remove(i); + } + "--max-steps" if i + 1 < tokens.len() => { + if let Ok(n) = tokens[i + 1].parse() { + max_steps = Some(n); + } + tokens.drain(i..=i + 1); + } + _ => { + i += 1; + } + } + } + let goal = tokens.join(" ").trim().to_string(); + CommandAction::Browse { goal, policy, max_steps } +} + // ── Individual commands ─────────────────────────────────────────────────────── fn cmd_banner(args: &str) -> CommandAction { @@ -2188,6 +2234,18 @@ pub const HELP_CATEGORIES: &[(&str, &str, &[HelpCommand])] = &[ ("/spawn discard ", "discard agent's worktree"), ], ), + ( + "Browser", + "Autonomous browser agent", + &[ + ("/browse ", "run autonomous browser agent towards a goal"), + ("/browse --yolo ", "yolo mode: no approval prompts"), + ("/browse --ask ", "ask before every action"), + ("/browse --max-steps N ", "cap the run at N steps"), + ("/browser ", "open URL in managed browser session"), + ("/screenshot", "take a screenshot of the current page"), + ], + ), ( "Plugins & tools", "MCP servers, plugins, viz", diff --git a/src/config.rs b/src/config.rs index 1db6364..dd22a03 100644 --- a/src/config.rs +++ b/src/config.rs @@ -316,6 +316,13 @@ pub struct Config { /// Default browser action timeout in milliseconds. pub browser_timeout_ms: u64, + /// Default max steps for /browse runs. Configurable per-run. + pub browse_max_steps: u32, + /// User-appended destructive-action patterns (regex). + pub browse_approval_patterns: Vec, + /// Default policy: "pattern" (default), "ask", "yolo" (not honored from settings — per-run only). + pub browse_default_policy: String, + /// Watch debounce (ms) — coalesces rapid filesystem events. pub watch_debounce_ms: u64, /// Minimum gap between watch triggers (ms). @@ -433,6 +440,9 @@ impl Default for Config { browser_chrome_path: None, browser_cdp_endpoint: None, browser_timeout_ms: 30_000, + browse_max_steps: 50, + browse_approval_patterns: Vec::new(), + browse_default_policy: "pattern".to_string(), watch_debounce_ms: 500, watch_rate_limit_ms: 10_000, watch_markers: vec!["AI:".into(), "AGENT:".into()], @@ -623,6 +633,17 @@ impl Config { } } + // Browse agent settings + if let Some(s) = settings.browse_max_steps { + cfg.browse_max_steps = s; + } + if let Some(p) = settings.browse_approval_patterns { + cfg.browse_approval_patterns = p; + } + cfg.browse_default_policy = settings + .browse_default_policy + .unwrap_or_else(|| cfg.browse_default_policy.clone()); + // Resolve output style: load name from settings, look up prompt if let Some(ref style_name) = settings.output_style && style_name != "default" diff --git a/src/main.rs b/src/main.rs index e5c0cfe..1a9a0a7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -308,6 +308,20 @@ enum Commands { Doctor, /// Self-update to the latest release from GitHub Update, + /// Run the autonomous browser agent + Browse { + /// Goal for the browser agent + goal: Vec, + /// Skip all approval prompts (requires prior acknowledgment) + #[arg(long)] + yolo: bool, + /// Prompt for approval on every destructive action + #[arg(long)] + ask: bool, + /// Maximum number of steps (default: 50) + #[arg(long, default_value = "50")] + max_steps: u32, + }, } #[derive(Subcommand)] @@ -607,6 +621,86 @@ async fn main() -> Result<()> { Commands::Update => { return self_update().await; } + Commands::Browse { goal, yolo, ask, max_steps } => { + use crate::browser::browse_loop::{BrowsePolicy, BrowseRequest, BrowseProgress, run_browse}; + use tokio::sync::mpsc; + + let goal_str = goal.join(" "); + if goal_str.trim().is_empty() { + eprintln!("Error: browse requires a goal argument"); + std::process::exit(1); + } + + // Determine policy: --yolo > --ask > default (pattern-match) + let policy = if *yolo { + // First-time --yolo: write acknowledgment file if not yet present + if !crate::browser::yolo_ack::is_acknowledged() { + eprintln!( + "Warning: --yolo disables all approval prompts. \ + The browser agent will execute destructive actions without confirmation.\n\ + To proceed, this acknowledgment is recorded in your XDG state directory." + ); + if let Err(e) = crate::browser::yolo_ack::acknowledge() { + eprintln!("Warning: could not write yolo-ack file: {e}"); + } + } + BrowsePolicy::Yolo + } else if *ask { + BrowsePolicy::Ask + } else { + BrowsePolicy::Pattern + }; + + let req = BrowseRequest { + goal: goal_str.clone(), + policy, + max_steps: *max_steps, + voice: false, + }; + + let config = Config::load()?; + let tools = all_tools(&config); + let current_url = std::sync::Arc::new(tokio::sync::Mutex::new(String::new())); + + let (progress_tx, mut progress_rx) = mpsc::channel::(64); + // Approval channel: in CLI mode auto-deny (user must use --yolo or --ask interactively) + let (approval_tx, mut approval_rx) = mpsc::channel::(8); + + // Spawn task to handle approval prompts: prompt on stderr, read from stdin + let _approval_task = tokio::spawn(async move { + use std::io::Write; + while let Some(prompt) = approval_rx.recv().await { + eprint!( + "Approval needed [step {}]: {} on '{}' at {}\n Reason: {}\nAllow? [y/N] ", + prompt.step, prompt.tool_name, prompt.target_text, prompt.url, prompt.reason + ); + let _ = std::io::stderr().flush(); + let mut line = String::new(); + let allowed = if std::io::stdin().read_line(&mut line).is_ok() { + matches!(line.trim().to_lowercase().as_str(), "y" | "yes") + } else { + false + }; + let _ = prompt.reply.send(allowed); + } + }); + + // Spawn task to print progress as NDJSON + let progress_task = tokio::spawn(async move { + while let Some(event) = progress_rx.recv().await { + if let Ok(json) = serde_json::to_string(&event) { + println!("{json}"); + } + } + }); + + let result = run_browse(req, &config, tools, current_url, progress_tx, approval_tx).await?; + progress_task.await.ok(); + + // Print final result as JSON + println!("{}", serde_json::to_string_pretty(&result)?); + return Ok(()); + } } } diff --git a/src/query_engine.rs b/src/query_engine.rs index 66abad9..7c49abb 100644 --- a/src/query_engine.rs +++ b/src/query_engine.rs @@ -5,6 +5,7 @@ use crate::api::{ApiBackend, MessagesRequest}; use crate::compact::{CompactNeeded, compact_needed, snip_compact, summarize_compact}; use crate::config::Config; use crate::rag; +use crate::browser::middleware::MiddlewareVerdict; use crate::tools::{DynTool, ToolContext}; use anyhow::{Context, Result}; use colored::Colorize; @@ -27,6 +28,10 @@ pub struct QueryEngine { session_id: Option, /// Shared Read-tool cache for deduplicating unchanged re-reads (v2.1.86). read_cache: crate::tools::ReadCache, + /// In-process tool middleware chain (empty by default). + middlewares: crate::browser::middleware::MiddlewareChain, + /// Turn counter. + turns: u32, } impl QueryEngine { @@ -63,6 +68,8 @@ impl QueryEngine { cumulative_cost_usd: 0.0, session_id: Some(uuid::Uuid::new_v4().to_string()), read_cache: crate::tools::new_read_cache(), + middlewares: Vec::new(), + turns: 0, }) } @@ -116,6 +123,7 @@ impl QueryEngine { loop { turn += 1; + self.turns = turn; if turn > max_turns { eprintln!("{}", format!("Stopped after {max_turns} turns.").yellow()); break; @@ -357,6 +365,7 @@ impl QueryEngine { ctx.live_model = Some(self.config.model.clone()); ctx.live_api_key = Some(self.config.api_key.clone()); ctx.live_ollama_host = Some(self.config.ollama_host.clone()); + ctx.middlewares = self.middlewares.clone(); let mut results = Vec::new(); for block in content { @@ -413,6 +422,40 @@ impl QueryEngine { } } + // ── Middleware before_tool check ────────────────────── + let mut middleware_denied = false; + for mw in &ctx.middlewares { + match mw.before_tool(name, input).await { + MiddlewareVerdict::Allow => {} + MiddlewareVerdict::Deny { reason } => { + results.push(ContentBlock::ToolResult { + tool_use_id: id.clone(), + content: vec![ToolResultContent::text(format!( + "Middleware denied: {reason}" + ))], + is_error: Some(true), + }); + middleware_denied = true; + break; + } + MiddlewareVerdict::RequireConfirmation { reason, .. } => { + // Treat as Deny until the approval gate resolves internally. + results.push(ContentBlock::ToolResult { + tool_use_id: id.clone(), + content: vec![ToolResultContent::text(format!( + "Middleware requires confirmation: {reason}" + ))], + is_error: Some(true), + }); + middleware_denied = true; + break; + } + } + } + if middleware_denied { + continue; + } + let tool = self.tools.iter().find(|t| t.name() == name); let output = match tool { @@ -423,6 +466,20 @@ impl QueryEngine { None => crate::tools::ToolOutput::error(format!("Unknown tool: {name}")), }; + // ── Middleware after_tool ───────────────────────────── + let output_text: String = output + .content + .iter() + .map(|c| { + let ToolResultContent::Text { text } = c; + text.as_str() + }) + .collect::>() + .join("\n"); + for mw in &ctx.middlewares { + mw.after_tool(name, &output_text).await; + } + if output.is_error && !self.stream_json_output { eprintln!( "{} {}", @@ -441,20 +498,11 @@ impl QueryEngine { // Emit tool_result event for stream-json + hook-events mode if self.include_hook_events && self.stream_json_output { - let result_text = output - .content - .iter() - .map(|c| { - let ToolResultContent::Text { text } = c; - text.as_str() - }) - .collect::>() - .join("\n"); let event = serde_json::json!({ "type": "tool_result", "name": name, "is_error": output.is_error, - "content": result_text, + "content": output_text, }); println!("{}", event); } @@ -553,6 +601,75 @@ fn truncate_json(v: &serde_json::Value, max_len: usize) -> String { } impl QueryEngine { + /// Create a QueryEngine preconfigured for autonomous browse mode. + /// Overrides the system prompt and injects the middleware chain. + pub fn new_for_browse( + config: Config, + tools: Vec, + system_prompt: String, + middlewares: crate::browser::middleware::MiddlewareChain, + ) -> Result { + let mut engine = Self::new(config, tools)?; + engine.system_prompt = system_prompt; + engine.middlewares = middlewares; + Ok(engine) + } + + /// How many turns the engine has executed since the last `query()` call. + pub fn turns_used(&self) -> u32 { + self.turns + } + + /// Extract the text content from the last assistant message, if any. + pub fn last_assistant_text(&self) -> Option { + self.messages.iter().rev().find_map(|m| { + if m.role == Role::Assistant { + let texts: Vec<&str> = m + .content + .iter() + .filter_map(|b| match b { + ContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect(); + if texts.is_empty() { + None + } else { + Some(texts.join("")) + } + } else { + None + } + }) + } + + /// Extract the text content from the last tool result (user-role message + /// containing `ContentBlock::ToolResult`). Tool results are always sent + /// in user-role messages per the Anthropic API contract. + pub fn last_tool_result_text(&self) -> Option { + for msg in self.messages.iter().rev() { + if msg.role != Role::User { + continue; + } + for block in msg.content.iter().rev() { + if let ContentBlock::ToolResult { content, .. } = block { + let text: String = content + .iter() + .map(|c| { + let ToolResultContent::Text { text } = c; + text.as_str() + }) + .collect::>() + .join("\n"); + if !text.is_empty() { + return Some(text); + } + } + } + } + None + } + fn replay_user_messages(&self) -> bool { self.config.replay_user_messages } diff --git a/src/sdk/mod.rs b/src/sdk/mod.rs index 2c08c89..7b0b802 100644 --- a/src/sdk/mod.rs +++ b/src/sdk/mod.rs @@ -11,6 +11,7 @@ pub mod transport; pub use protocol::*; +use crate::browser::browse_loop::{BrowsePolicy, BrowseProgress, BrowseRequest, run_browse}; use crate::config::Config; use crate::tools::all_tools; use anyhow::Result; @@ -288,6 +289,132 @@ impl SdkServer { } } + // ── Browse Start ──────────────────────────────────────── + SdkRequest::BrowseStart { + id, + goal, + policy, + max_steps, + yolo_ack, + } => { + // Validate yolo_ack requirement + if policy == BrowsePolicy::Yolo && !yolo_ack { + transport + .send_response(SdkResponse::Error { + id, + code: "yolo_ack_required".into(), + message: "browse/start with policy=yolo requires yolo_ack=true".into(), + }) + .await?; + return Ok(()); + } + + // Generate a session ID for this browse run + let session_id = format!( + "browse-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() + ); + + // Clone config and build tools + let cfg = config.clone(); + let all_tools_list = all_tools(&cfg); + + // Channels: progress events from browse loop → notif forwarding task + let (progress_tx, mut progress_rx) = tokio::sync::mpsc::channel::(64); + // Channel: approval prompts from browse loop → host + let (approval_tx, mut approval_rx) = + tokio::sync::mpsc::channel::(4); + + let current_url = Arc::new(tokio::sync::Mutex::new(String::new())); + + // Respond immediately with session_id + transport + .send_response(SdkResponse::BrowseStarted { + id, + session_id: session_id.clone(), + }) + .await?; + + active_sessions.fetch_add(1, Ordering::Relaxed); + + // Forward BrowseProgress events as SdkNotification NDJSON + let fwd_notif_tx = notif_tx.clone(); + let fwd_sid = session_id.clone(); + tokio::spawn(async move { + while let Some(event) = progress_rx.recv().await { + let notif = match event { + BrowseProgress::Step { n, action, target } => { + SdkNotification::BrowseProgress { + session_id: fwd_sid.clone(), + step: n, + action, + target, + } + } + BrowseProgress::Completed(result) => { + SdkNotification::BrowseCompleted { + session_id: fwd_sid.clone(), + result, + } + } + BrowseProgress::Nudge { .. } | BrowseProgress::Started { .. } => { + // Not surfaced as SDK notifications + continue; + } + BrowseProgress::ApprovalNeeded { .. } => { + // Handled via approval_rx below + continue; + } + }; + let _ = fwd_notif_tx.send(notif); + } + }); + + // Forward ApprovalPrompt events as BrowseApprovalNeeded notifications + let appr_notif_tx = approval_out_tx.clone(); + let appr_sid = session_id.clone(); + tokio::spawn(async move { + while let Some(prompt) = approval_rx.recv().await { + let notif = SdkNotification::BrowseApprovalNeeded { + session_id: appr_sid.clone(), + step: prompt.step, + tool_name: prompt.tool_name, + target_text: prompt.target_text, + url: prompt.url, + reason: prompt.reason, + }; + let _ = appr_notif_tx.send(notif); + // Note: prompt.reply is dropped here — the gate will treat + // an unreceived reply as a deny in Phase A. Phase B will + // wire BrowseApprovalReply to fulfill this oneshot. + } + }); + + // Spawn the browse run + let browse_req = BrowseRequest { + goal, + policy, + max_steps: max_steps.unwrap_or(cfg.browse_max_steps), + voice: false, + }; + let session_counter = Arc::clone(active_sessions); + tokio::spawn(async move { + let _ = run_browse( + browse_req, + &cfg, + all_tools_list, + current_url, + progress_tx, + approval_tx, + ) + .await; + session_counter.fetch_sub(1, Ordering::Relaxed); + }); + } + // ── Not yet implemented (Phase A stubs) ───────────────── SdkRequest::TurnStart { id, .. } | SdkRequest::TurnInterrupt { id, .. } @@ -302,6 +429,11 @@ impl SdkServer { }) .await?; } + + // BrowseApprovalReply has no request id — it's a fire-and-forget host reply. + SdkRequest::BrowseApprovalReply { .. } => { + // Phase B: route to the waiting browse session's approval channel. + } } Ok(()) diff --git a/src/sdk/protocol.rs b/src/sdk/protocol.rs index ba0de80..3cb01b2 100644 --- a/src/sdk/protocol.rs +++ b/src/sdk/protocol.rs @@ -200,6 +200,25 @@ pub enum SdkRequest { #[serde(rename = "health/check")] HealthCheck { id: String }, + + #[serde(rename = "browse/start")] + BrowseStart { + id: String, + goal: String, + #[serde(default)] + policy: crate::browser::browse_loop::BrowsePolicy, + #[serde(default)] + max_steps: Option, + #[serde(default)] + yolo_ack: bool, + }, + + #[serde(rename = "browse/approval_reply")] + BrowseApprovalReply { + session_id: String, + step: u32, + approved: bool, + }, } // ── Responses (RustyClaw → Host, correlated by ID) ────────────────────────── @@ -249,6 +268,12 @@ pub enum SdkResponse { uptime_seconds: u64, }, + #[serde(rename = "browse/started")] + BrowseStarted { + id: String, + session_id: String, + }, + #[serde(rename = "error")] Error { id: String, @@ -355,4 +380,28 @@ pub enum SdkNotification { code: String, message: String, }, + + #[serde(rename = "browse/progress")] + BrowseProgress { + session_id: String, + step: u32, + action: String, + target: String, + }, + + #[serde(rename = "browse/approval_needed")] + BrowseApprovalNeeded { + session_id: String, + step: u32, + tool_name: String, + target_text: String, + url: String, + reason: String, + }, + + #[serde(rename = "browse/completed")] + BrowseCompleted { + session_id: String, + result: crate::browser::browse_loop::BrowseResult, + }, } diff --git a/src/settings.rs b/src/settings.rs index 307d32c..0a6af70 100644 --- a/src/settings.rs +++ b/src/settings.rs @@ -253,6 +253,15 @@ pub struct Settings { /// (`refs/rustyclaw/sessions/`) navigable via `/undo` and `/redo`. #[serde(rename = "autoCommit")] pub auto_commit: Option, + + #[serde(rename = "browseMaxSteps")] + pub browse_max_steps: Option, + + #[serde(rename = "browseApprovalPatterns")] + pub browse_approval_patterns: Option>, + + #[serde(rename = "browseDefaultPolicy")] + pub browse_default_policy: Option, } /// Settings for phase-declarative model routing. @@ -489,6 +498,11 @@ impl Settings { phase_router: other.phase_router.or(self.phase_router), auto_fix: other.auto_fix.or(self.auto_fix), auto_commit: other.auto_commit.or(self.auto_commit), + browse_max_steps: other.browse_max_steps.or(self.browse_max_steps), + browse_approval_patterns: other + .browse_approval_patterns + .or(self.browse_approval_patterns), + browse_default_policy: other.browse_default_policy.or(self.browse_default_policy), permissions: PermissionsConfig { // Union both lists — project additions stack on top of global allow: { diff --git a/src/tools/browser_tools.rs b/src/tools/browser_tools.rs index 41e4198..5d47086 100644 --- a/src/tools/browser_tools.rs +++ b/src/tools/browser_tools.rs @@ -432,3 +432,52 @@ impl Tool for BrowserWaitTool { Ok(ToolOutput::success(result)) } } + +// ── browse_done ────────────────────────────────────────────────────────────── + +/// Sentinel tool the model calls to signal the end of an autonomous `/browse` +/// run. Returns a `BROWSE_DONE` string the orchestrator parses to exit the loop. +pub struct BrowseDoneTool; + +impl BrowseDoneTool { + pub fn new() -> Self { + Self + } +} + +impl Default for BrowseDoneTool { + fn default() -> Self { + Self::new() + } +} + +#[async_trait] +impl Tool for BrowseDoneTool { + fn name(&self) -> &str { + "browse_done" + } + fn description(&self) -> &str { + "Signal that the browser goal is achieved or that you're stuck." + } + fn input_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "summary": { "type": "string" }, + "achieved": { "type": "boolean" } + }, + "required": ["summary", "achieved"] + }) + } + async fn execute(&self, input: serde_json::Value, _ctx: &ToolContext) -> Result { + let summary = input["summary"] + .as_str() + .ok_or_else(|| anyhow!("missing required field: summary"))?; + let achieved = input["achieved"] + .as_bool() + .ok_or_else(|| anyhow!("missing required field: achieved"))?; + Ok(ToolOutput::success(format!( + "BROWSE_DONE achieved={achieved} summary={summary}" + ))) + } +} diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 530a2c6..dc8de14 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -97,6 +97,10 @@ pub struct ToolContext { pub live_model: Option, pub live_api_key: Option, pub live_ollama_host: Option, + + /// Middleware chain: pre/post hooks around every tool call. + /// Default empty = no-op (existing behavior unchanged). + pub middlewares: crate::browser::middleware::MiddlewareChain, } impl std::fmt::Debug for ToolContext { @@ -126,6 +130,7 @@ impl ToolContext { live_model: None, live_api_key: None, live_ollama_host: None, + middlewares: Vec::new(), } } diff --git a/src/tui/app.rs b/src/tui/app.rs index 4c37cdb..4391721 100644 --- a/src/tui/app.rs +++ b/src/tui/app.rs @@ -678,6 +678,13 @@ pub struct App { /// Active file-watch state. `None` when no watcher is running. /// Dropping this stops watching. pub watcher: Option, + + /// Active browse approval prompt awaiting user input (A=approve, D=deny). + pub browse_approval: Option, + /// Receiver for browse progress events (active during a /browse run). + pub browse_progress_rx: Option>, + /// Receiver for browse approval prompts. + pub browse_approval_rx: Option>, } /// Format a raw model ID into a human-readable name like "Sonnet 4.6". @@ -808,6 +815,9 @@ impl App { cost_tracker: crate::cost::CostTracker::new(), browser_session: None, watcher: None, + browse_approval: None, + browse_progress_rx: None, + browse_approval_rx: None, } } @@ -1368,6 +1378,12 @@ impl App { self.voice_task = None; self.voice_stop_tx = None; } + AppEvent::VoiceBrowse(_goal) => { + // Recording state cleared here; run.rs handles the Browse dispatch. + self.voice_recording = false; + self.voice_task = None; + self.voice_stop_tx = None; + } AppEvent::PluginInstallDone { success, message } => { if success { self.entries.push(ChatEntry::system(message)); diff --git a/src/tui/events.rs b/src/tui/events.rs index a69e491..1640a6b 100644 --- a/src/tui/events.rs +++ b/src/tui/events.rs @@ -50,6 +50,8 @@ pub enum AppEvent { // through the AppEvent channel. /// Voice transcription completed — insert text into input buffer VoiceTranscription(String), + /// Voice transcription matched a browse prefix — dispatch as /browse + VoiceBrowse(String), /// Plugin install completed (success or failure) PluginInstallDone { success: bool, message: String }, /// GitHub upgrade check completed diff --git a/src/tui/render.rs b/src/tui/render.rs index b5ef945..13ce70d 100644 --- a/src/tui/render.rs +++ b/src/tui/render.rs @@ -139,6 +139,8 @@ pub fn draw(f: &mut Frame, app: &mut App) { if app.overlay.is_some() { draw_overlay(f, area, app, tc); + } else if app.browse_approval.is_some() { + draw_browse_approval(f, area, app, tc); } else if app.pending_permission.is_some() { draw_permission(f, area, app, tc); } else if app.pending_user_question.is_some() { @@ -879,6 +881,87 @@ fn context_window_for_model(model: &str) -> u64 { } } +// ── Browse approval dialog ──────────────────────────────────────────────────── + +fn draw_browse_approval(f: &mut Frame, area: Rect, app: &App, tc: ThemeColors) { + let Some(prompt) = &app.browse_approval else { + return; + }; + + let popup_w = (area.width * 6 / 10).max(50).min(area.width); + let popup_h = 12_u16.max(8).min(area.height); + let x = area.x + (area.width.saturating_sub(popup_w)) / 2; + let y = area.y + (area.height.saturating_sub(popup_h)) / 2; + let popup = Rect { + x, + y, + width: popup_w, + height: popup_h, + }; + + f.render_widget(Clear, popup); + + let block = Block::default() + .borders(Borders::ALL) + .border_style(Style::default().fg(Color::Yellow)) + .title(Span::styled( + " Browse Approval ", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )); + let inner = block.inner(popup); + f.render_widget(block, popup); + + let mut lines = vec![ + Line::raw(""), + Line::from(Span::styled( + " ⚠ Browse approval required", + Style::default() + .fg(Color::Yellow) + .add_modifier(Modifier::BOLD), + )), + Line::raw(""), + Line::from(Span::styled( + format!(" Action: {}", prompt.tool_name), + Style::default().fg(Color::White), + )), + Line::from(Span::styled( + format!(" Target: {}", prompt.target_text), + Style::default().fg(Color::White), + )), + Line::from(Span::styled( + format!(" URL: {}", prompt.url), + Style::default().fg(Color::White), + )), + Line::from(Span::styled( + format!(" Reason: {}", prompt.reason), + Style::default().fg(Color::White), + )), + Line::raw(""), + ]; + lines.push(Line::from(vec![ + Span::styled(" [", Style::default().fg(Color::DarkGray)), + Span::styled( + "A", + Style::default() + .fg(tc.assistant) + .add_modifier(Modifier::BOLD), + ), + Span::styled("]pprove [", Style::default().fg(Color::DarkGray)), + Span::styled( + "D", + Style::default().fg(Color::Red).add_modifier(Modifier::BOLD), + ), + Span::styled("]eny", Style::default().fg(Color::DarkGray)), + ])); + + f.render_widget( + Paragraph::new(Text::from(lines)).wrap(Wrap { trim: true }), + inner, + ); +} + // ── Permission dialog ───────────────────────────────────────────────────────── fn draw_permission(f: &mut Frame, area: Rect, app: &App, tc: ThemeColors) { diff --git a/src/tui/run.rs b/src/tui/run.rs index 471ed58..dd6d61e 100644 --- a/src/tui/run.rs +++ b/src/tui/run.rs @@ -871,6 +871,61 @@ async fn run_loop(mut config: Config, resume_id: Option) -> Result<()> { }); } + // ── Poll browse progress events ────────────────────────────────────── + if let Some(mut rx) = app.browse_progress_rx.take() { + let mut done = false; + while let Ok(event) = rx.try_recv() { + use crate::browser::browse_loop::BrowseProgress; + match event { + BrowseProgress::Started { .. } => { + // Already shown at dispatch time + } + BrowseProgress::Step { n, action, target } => { + app.entries.push(ChatEntry::system(format!( + " Step {n}: {action} {target}" + ))); + app.scroll_to_bottom(); + } + BrowseProgress::Nudge { level, text } => { + app.entries.push(ChatEntry::system(format!( + " ⚠ Nudge L{level}: {text}" + ))); + app.scroll_to_bottom(); + } + BrowseProgress::ApprovalNeeded { .. } => { + // Handled via approval_rx below + } + BrowseProgress::Completed(result) => { + let icon = if result.achieved { "✅" } else { "⚠" }; + app.entries.push(ChatEntry::system(format!( + "{icon} /browse done ({:?}): {}", result.reason, result.summary + ))); + app.scroll_to_bottom(); + app.finish_loading(); + // Clean up approval channel too + app.browse_approval_rx = None; + done = true; + break; + } + } + } + if !done { + // Put the receiver back — run is still in progress + app.browse_progress_rx = Some(rx); + } + } + + // Poll browse approval prompts + if let Some(mut rx) = app.browse_approval_rx.take() { + if let Ok(prompt) = rx.try_recv() { + app.browse_approval = Some(prompt); + } + // Put back if browse is still running + if app.browse_progress_rx.is_some() || app.browse_approval.is_some() { + app.browse_approval_rx = Some(rx); + } + } + // Uses cached term size — no syscall per frame; updated on Resize events. { let needed = viewport_height(&app, last_term_cols, last_term_rows); @@ -1052,6 +1107,39 @@ async fn run_loop(mut config: Config, resume_id: Option) -> Result<()> { summary_len, }); } + AppEvent::VoiceBrowse(ref goal) => { + // Voice always uses Pattern policy — never Yolo (too easy + // to mis-transcribe destructive commands). + let goal_str = goal.clone(); + app.apply(ev); + let max = config.browse_max_steps; + app.entries.push(ChatEntry::system(format!( + "🌐 /browse (voice) — goal: {goal_str} (max {max} steps, policy: Pattern)" + ))); + app.scroll_to_bottom(); + app.start_loading(); + let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); + let (approval_tx, approval_rx) = tokio::sync::mpsc::channel(4); + app.browse_progress_rx = Some(progress_rx); + app.browse_approval_rx = Some(approval_rx); + let current_url = std::sync::Arc::new(tokio::sync::Mutex::new(String::new())); + let cfg = config.clone(); + let all_tools = tools.to_vec(); + let browse_req = crate::browser::browse_loop::BrowseRequest { + goal: goal_str, + policy: crate::browser::browse_loop::BrowsePolicy::Pattern, + max_steps: max, + voice: true, + }; + tokio::spawn(async move { + let result = crate::browser::browse_loop::run_browse( + browse_req, &cfg, all_tools, current_url, progress_tx, approval_tx, + ).await; + if let Err(e) = result { + eprintln!("Voice browse error: {e}"); + } + }); + } other => app.apply(other), } match rx.try_recv() { @@ -1524,6 +1612,28 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { return Ok(()); } + // Browse approval dialog takes priority after permission dialog + if app.browse_approval.is_some() { + match key.code { + Char('a') | Char('A') => { + if let Some(prompt) = app.browse_approval.take() { + let _ = prompt.reply.send(true); + app.entries.push(ChatEntry::system(" ✓ Approved")); + app.scroll_to_bottom(); + } + } + Char('d') | Char('D') => { + if let Some(prompt) = app.browse_approval.take() { + let _ = prompt.reply.send(false); + app.entries.push(ChatEntry::system(" ✗ Denied")); + app.scroll_to_bottom(); + } + } + _ => {} // ignore other keys while prompt is active + } + return Ok(()); + } + // AskUser dialog takes priority after permission dialog if let Some(ref mut q) = app.pending_user_question { match key.code { @@ -1642,7 +1752,12 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { )); } Ok(text) => { - let _ = tx2.send(AppEvent::VoiceTranscription(text)); + if crate::voice::voice_routes_to_browse(&text) { + let goal = crate::voice::strip_browse_prefix(&text); + let _ = tx2.send(AppEvent::VoiceBrowse(goal)); + } else { + let _ = tx2.send(AppEvent::VoiceTranscription(text)); + } } Err(e) => { let _ = @@ -3864,6 +3979,46 @@ async fn handle_key(ctx: KeyCtx<'_>) -> Result<()> { ); app.entries.push(ChatEntry::system(msg)); } + CommandAction::Browse { goal, policy, max_steps } => { + let max = max_steps.unwrap_or(config.browse_max_steps); + app.entries.push(ChatEntry::system(format!( + "🌐 /browse started — goal: {goal} (max {max} steps, policy: {policy:?})" + ))); + app.scroll_to_bottom(); + app.start_loading(); + + // Create channels for progress events and approval prompts + let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); + let (approval_tx, approval_rx) = tokio::sync::mpsc::channel(4); + app.browse_progress_rx = Some(progress_rx); + app.browse_approval_rx = Some(approval_rx); + + // Shared current-URL state + let current_url = std::sync::Arc::new(tokio::sync::Mutex::new(String::new())); + + let cfg = config.clone(); + let all_tools = tools.to_vec(); + + let browse_req = crate::browser::browse_loop::BrowseRequest { + goal, + policy, + max_steps: max, + voice: false, + }; + tokio::spawn(async move { + let result = crate::browser::browse_loop::run_browse( + browse_req, + &cfg, + all_tools, + current_url, + progress_tx, + approval_tx, + ).await; + if let Err(e) = result { + eprintln!("Browse error: {e}"); + } + }); + } CommandAction::BrowseUrl(url) => { // Ship a prompt to the agent loop — it will invoke the shared // browser_navigate / browser_snapshot tools (which drive the same diff --git a/src/voice.rs b/src/voice.rs index d288291..76db13d 100644 --- a/src/voice.rs +++ b/src/voice.rs @@ -770,6 +770,113 @@ async fn play_wav( Ok(()) } +// ── Browse prefix routing ───────────────────────────────────────────────────── + +/// Decide whether a voice transcript should enter autonomous browse mode. +/// Only unambiguous prefixes route to /browse; "find" is deliberately excluded +/// to avoid collisions with codebase/chat "find" intent. +pub fn voice_routes_to_browse(transcript: &str) -> bool { + let t = transcript.trim().to_lowercase(); + t.starts_with("browse ") + || t.starts_with("browser ") + || t.starts_with("web ") + || t.starts_with("go to ") + || t.starts_with("open ") + || t.starts_with("shop for ") + || t.starts_with("book ") + || t.starts_with("order ") +} + +/// Strip the browse prefix, leaving the goal text. +pub fn strip_browse_prefix(transcript: &str) -> String { + let t = transcript.trim(); + let lower = t.to_lowercase(); + for prefix in &[ + "browse ", "browser ", "web ", "go to ", "open ", "shop for ", "book ", "order ", + ] { + if lower.starts_with(prefix) { + return t[prefix.len()..].to_string(); + } + } + t.to_string() +} + +// ── Browse milestone TTS ────────────────────────────────────────────────────── + +pub enum BrowseMilestone { + Start, + GateTrip, + End, +} + +/// Speak one of the three browse milestones via TTS. +/// Only called when voice == true. Fire-and-forget: errors are silently ignored +/// so a missing TTS engine never blocks the browse loop. +pub async fn speak_browse_milestone(milestone: BrowseMilestone, text: &str) { + let phrase = match milestone { + BrowseMilestone::Start => format!("Searching for {text}"), + BrowseMilestone::GateTrip => text.to_string(), + BrowseMilestone::End => text.to_string(), + }; + // Create a dummy stop channel — milestone phrases are short; we never need + // to cancel them mid-word. + let (_stop_tx, stop_rx) = tokio::sync::oneshot::channel::<()>(); + let _ = speak(&phrase, None, stop_rx).await; +} + +// ── Voice approval listener ─────────────────────────────────────────────────── + +/// Listen for a voice approve/deny reply during an approval prompt. +/// Returns true if the user said "confirm"/"yes"/"approve"/"ok", false otherwise. +/// Times out after `timeout_secs`, returning false on timeout. +pub async fn await_voice_approval(timeout_secs: u64) -> bool { + // Requires a recorder to be available; return deny if none found. + let backend = match find_recorder() { + Some(b) => b, + None => return false, + }; + + let (stop_tx, stop_rx) = tokio::sync::oneshot::channel::<()>(); + + // Record for at most timeout_secs then stop automatically. + let record_task = tokio::spawn(async move { + match start_recording(&backend).await { + Ok(mut child) => { + tokio::select! { + _ = stop_rx => { + if let Some(pid) = child.id() { + let _ = tokio::process::Command::new("kill") + .args(["-2", &pid.to_string()]) + .status() + .await; + } + let _ = child.wait().await; + } + _ = child.wait() => {} + } + } + Err(_) => {} + } + }); + + // Wait for the timeout then signal the recorder to stop. + tokio::time::sleep(std::time::Duration::from_secs(timeout_secs)).await; + let _ = stop_tx.send(()); + let _ = record_task.await; + + // Transcribe and check for affirmative keywords. + match transcribe(None, None).await { + Ok(text) => { + let lower = text.trim().to_lowercase(); + lower.contains("confirm") + || lower.contains("yes") + || lower.contains("approve") + || lower.contains("ok") + } + Err(_) => false, + } +} + // ── Status display ──────────────────────────────────────────────────────────── pub fn voice_status(enabled: bool, tts_enabled: bool) -> String { diff --git a/tests/approval_gate_tests.rs b/tests/approval_gate_tests.rs new file mode 100644 index 0000000..b22252e --- /dev/null +++ b/tests/approval_gate_tests.rs @@ -0,0 +1,191 @@ +use rustyclaw::browser::approval_gate::{ApprovalGate, GateContext, GateVerdict}; + +fn gate() -> ApprovalGate { + ApprovalGate::default() +} + +fn ctx(tool: &str) -> GateContext { + GateContext { + tool_name: tool.into(), + ..Default::default() + } +} + +// 1. Read-only tools always pass. +#[test] +fn allows_plain_read_tools() { + let g = gate(); + for tool in &[ + "browser_navigate", + "browser_snapshot", + "browser_screenshot", + "browser_get_text", + "browser_wait", + "browse_done", + ] { + let c = ctx(tool); + assert_eq!(g.check(&c), GateVerdict::Allow, "tool {tool} should Allow"); + } +} + +// 2. /checkout → RequireConfirmation +#[test] +fn trips_on_checkout_url() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + url: "https://shop.example.com/checkout".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 3. Article about checkout should not trip (path is /articles/checkout-guide). +#[test] +fn does_not_trip_on_article_about_checkout() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + url: "https://blog.example.com/articles/checkout-guide".into(), + target_text: "Read more".into(), + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} + +// 4. "Confirm Purchase" → trips +#[test] +fn trips_on_confirm_purchase_button() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Confirm Purchase".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 5. "Start Free Trial" → trips +#[test] +fn trips_on_start_free_trial_autobill() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Start Free Trial".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 6. Bare "Submit" does NOT trip. +#[test] +fn does_not_trip_on_bare_submit() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Submit".into(), + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} + +// 7. /oauth/authorize → trips +#[test] +fn trips_on_oauth_authorize_url() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + url: "https://accounts.google.com/oauth/authorize?client_id=abc".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 8. "Delete Account" → trips +#[test] +fn trips_on_delete_account() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "Delete Account".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 9. "$12.99" → trips, reason contains "visible_price" +#[test] +fn trips_on_visible_price() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + visible_prices: vec!["$12.99".into()], + ..Default::default() + }; + match g.check(&c) { + GateVerdict::RequireConfirmation { reason, .. } => { + assert!(reason.contains("visible_price"), "reason should mention visible_price, got: {reason}"); + } + GateVerdict::Allow => panic!("expected RequireConfirmation for $12.99"), + } +} + +// 10. "$0.00" → Allow (zero price) +#[test] +fn zero_price_does_not_trip() { + let g = gate(); + let c = GateContext { + tool_name: "browser_click".into(), + visible_prices: vec!["$0.00".into()], + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} + +// 11. Password field → trips +#[test] +fn trips_on_password_field() { + let g = gate(); + let c = GateContext { + tool_name: "browser_fill".into(), + form_field_signals: vec!["input:type=password".into()], + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 12. CC autocomplete → trips +#[test] +fn trips_on_cc_autocomplete() { + let g = gate(); + let c = GateContext { + tool_name: "browser_fill".into(), + form_field_signals: vec!["input:autocomplete=cc-number".into()], + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 13. User extension pattern matches → trips +#[test] +fn user_extension_patterns_append() { + let g = ApprovalGate::with_user_patterns(vec!["force-merge".into()]); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "force-merge into main".into(), + ..Default::default() + }; + assert!(matches!(g.check(&c), GateVerdict::RequireConfirmation { .. })); +} + +// 14. Invalid user regex is skipped — no panic, still Allow for benign context. +#[test] +fn invalid_user_pattern_is_skipped_not_crash() { + let g = ApprovalGate::with_user_patterns(vec!["[".into()]); + let c = GateContext { + tool_name: "browser_click".into(), + target_text: "OK".into(), + ..Default::default() + }; + assert_eq!(g.check(&c), GateVerdict::Allow); +} diff --git a/tests/browse_config_tests.rs b/tests/browse_config_tests.rs new file mode 100644 index 0000000..788a789 --- /dev/null +++ b/tests/browse_config_tests.rs @@ -0,0 +1,17 @@ +#[test] +fn parses_browse_settings_from_json() { + let json = r#"{"browseMaxSteps": 75, "browseApprovalPatterns": ["force-merge"], "browseDefaultPolicy": "ask"}"#; + let s: rustyclaw::settings::Settings = serde_json::from_str(json).unwrap(); + assert_eq!(s.browse_max_steps, Some(75)); + assert_eq!( + s.browse_approval_patterns, + Some(vec!["force-merge".to_string()]) + ); + assert_eq!(s.browse_default_policy.as_deref(), Some("ask")); +} + +#[test] +fn config_default_max_steps_is_fifty() { + let cfg = rustyclaw::config::Config::default(); + assert_eq!(cfg.browse_max_steps, 50); +} diff --git a/tests/browse_done_tool_tests.rs b/tests/browse_done_tool_tests.rs new file mode 100644 index 0000000..bda8b81 --- /dev/null +++ b/tests/browse_done_tool_tests.rs @@ -0,0 +1,37 @@ +use rustyclaw::api::types::ToolResultContent; +use rustyclaw::tools::{browser_tools::BrowseDoneTool, Tool, ToolContext}; +use serde_json::json; + +fn extract_text(output: &rustyclaw::tools::ToolOutput) -> String { + output + .content + .iter() + .map(|c| { + let ToolResultContent::Text { text } = c; + text.as_str() + }) + .collect::>() + .join("") +} + +#[tokio::test] +async fn browse_done_records_summary_and_achieved() { + let tool = BrowseDoneTool::new(); + let ctx = ToolContext::new(std::env::current_dir().unwrap()); + let input = json!({ "summary": "Found flight", "achieved": true }); + let result = tool.execute(input, &ctx).await.unwrap(); + let text = extract_text(&result); + assert!(text.contains("BROWSE_DONE"), "expected BROWSE_DONE sentinel in: {text}"); + assert!(text.contains("achieved=true"), "expected achieved=true in: {text}"); + assert!(text.contains("Found flight"), "expected summary in: {text}"); +} + +#[tokio::test] +async fn browse_done_handles_not_achieved() { + let tool = BrowseDoneTool::new(); + let ctx = ToolContext::new(std::env::current_dir().unwrap()); + let input = json!({ "summary": "Stuck", "achieved": false }); + let result = tool.execute(input, &ctx).await.unwrap(); + let text = extract_text(&result); + assert!(text.contains("achieved=false"), "expected achieved=false in: {text}"); +} diff --git a/tests/browse_integration.rs b/tests/browse_integration.rs new file mode 100644 index 0000000..1b62d3f --- /dev/null +++ b/tests/browse_integration.rs @@ -0,0 +1,199 @@ +//! Integration tests for the autonomous browser agent. +//! +//! Tests middleware composition, sentinel parsing, type serialization, +//! stagnation termination, and denial-counter termination. +//! No network, no Chrome — all in-process with channels. + +use rustyclaw::browser::approval_gate::{ApprovalGate, ApprovalGateMiddleware, GateContext, GateVerdict}; +use rustyclaw::browser::browse_loop::{BrowsePolicy, BrowseReason, BrowseResult}; +use rustyclaw::browser::loop_detector::LoopDetectorMiddleware; +use rustyclaw::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use serde_json::json; +use std::sync::atomic::AtomicU32; +use std::sync::Arc; + +// ── Approval gate (standalone) ────────────────────────────────────────────── + +#[test] +fn gate_allows_read_only_tools() { + let gate = ApprovalGate::default(); + let ctx = GateContext { + tool_name: "browser_navigate".into(), + url: "https://evil.com/checkout".into(), + target_text: "Go".into(), + ..Default::default() + }; + assert!(matches!(gate.check(&ctx), GateVerdict::Allow)); +} + +#[test] +fn gate_trips_on_checkout_url() { + let gate = ApprovalGate::default(); + let ctx = GateContext { + tool_name: "browser_click".into(), + url: "https://shop.example.com/checkout".into(), + target_text: "Continue".into(), + ..Default::default() + }; + assert!(matches!(gate.check(&ctx), GateVerdict::RequireConfirmation { .. })); +} + +#[test] +fn gate_ignores_article_about_checkout() { + let gate = ApprovalGate::default(); + let ctx = GateContext { + tool_name: "browser_click".into(), + url: "https://blog.example.com/articles/checkout-guide".into(), + target_text: "Read More".into(), + ..Default::default() + }; + assert!(matches!(gate.check(&ctx), GateVerdict::Allow)); +} + +// ── Loop detector middleware ──────────────────────────────────────────────── + +#[tokio::test] +async fn loop_detector_middleware_fires_nudge() { + let (nudge_tx, mut nudge_rx) = tokio::sync::mpsc::channel(32); + let mw = LoopDetectorMiddleware::new(nudge_tx); + + // 3 identical after_tool calls → stagnation + for _ in 0..3 { + mw.after_tool("browser_click", "same page content").await; + } + + let nudge = nudge_rx.try_recv().expect("should have received a nudge"); + assert!(nudge.contains("different approach")); +} + +#[tokio::test] +async fn loop_detector_stops_at_level_three() { + let (nudge_tx, mut nudge_rx) = tokio::sync::mpsc::channel(32); + let mw = LoopDetectorMiddleware::new(nudge_tx); + + // Fire 3 levels of nudges — each fires after 3 identical after_tool calls + // But check_stagnation fires every call once threshold is met + for _ in 0..9 { + mw.after_tool("browser_click", "same").await; + } + + // Drain nudges + let mut nudges = Vec::new(); + while let Ok(n) = nudge_rx.try_recv() { + nudges.push(n); + } + assert!(!nudges.is_empty(), "should have received nudges"); + assert!(nudges.last().unwrap().contains("Stopping"), "last nudge should be terminal"); + + // After L3, before_tool should return Deny + let verdict = mw.before_tool("browser_click", &json!({})).await; + assert!(matches!(verdict, MiddlewareVerdict::Deny { .. })); + assert!(mw.is_stopped()); +} + +#[tokio::test] +async fn loop_detector_resets_on_navigate() { + let (nudge_tx, _nudge_rx) = tokio::sync::mpsc::channel(32); + let mw = LoopDetectorMiddleware::new(nudge_tx); + + mw.after_tool("browser_click", "same").await; + mw.after_tool("browser_click", "same").await; + // Navigate resets + mw.after_tool("browser_navigate", "new page").await; + mw.after_tool("browser_click", "same").await; + // Only 1 click after reset — not enough for stagnation + assert!(!mw.is_stopped()); +} + +// ── Approval gate middleware ──────────────────────────────────────────────── + +#[tokio::test] +async fn approval_middleware_allows_read_tools() { + let gate = ApprovalGate::default(); + let current_url = Arc::new(tokio::sync::Mutex::new("https://example.com".into())); + let (approval_tx, _) = tokio::sync::mpsc::channel(4); + let step = Arc::new(AtomicU32::new(0)); + + let mw = ApprovalGateMiddleware::new(gate, BrowsePolicy::Pattern, current_url, approval_tx, step); + + let verdict = mw.before_tool("browser_navigate", &json!({"url": "https://example.com"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Allow)); +} + +#[tokio::test] +async fn approval_middleware_yolo_allows_everything() { + let gate = ApprovalGate::default(); + let current_url = Arc::new(tokio::sync::Mutex::new("https://shop.com/checkout".into())); + let (approval_tx, _) = tokio::sync::mpsc::channel(4); + let step = Arc::new(AtomicU32::new(0)); + + let mw = ApprovalGateMiddleware::new(gate, BrowsePolicy::Yolo, current_url, approval_tx, step); + + let verdict = mw.before_tool("browser_click", &json!({"ref": "@e1"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Allow)); +} + +#[tokio::test] +async fn approval_middleware_denies_on_dropped_channel() { + let gate = ApprovalGate::default(); + let current_url = Arc::new(tokio::sync::Mutex::new("https://shop.com/checkout".into())); + let (approval_tx, approval_rx) = tokio::sync::mpsc::channel(4); + let step = Arc::new(AtomicU32::new(0)); + + drop(approval_rx); + + let mw = ApprovalGateMiddleware::new(gate, BrowsePolicy::Pattern, current_url, approval_tx, step); + + let verdict = mw.before_tool("browser_click", &json!({"ref": "@e1"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Deny { .. })); +} + +// ── Type serialization ────────────────────────────────────────────────────── + +#[test] +fn browse_result_roundtrip() { + let result = BrowseResult { + achieved: true, + summary: "Found cheapest flight: $847 United".into(), + reason: BrowseReason::Done, + steps_used: 12, + final_url: Some("https://flights.example.com/results".into()), + }; + let json = serde_json::to_string(&result).unwrap(); + let parsed: BrowseResult = serde_json::from_str(&json).unwrap(); + assert!(parsed.achieved); + assert_eq!(parsed.reason, BrowseReason::Done); + assert_eq!(parsed.steps_used, 12); + assert!(parsed.summary.contains("$847")); +} + +#[test] +fn browse_policy_serializes_lowercase() { + assert_eq!(serde_json::to_string(&BrowsePolicy::Pattern).unwrap(), r#""pattern""#); + assert_eq!(serde_json::to_string(&BrowsePolicy::Yolo).unwrap(), r#""yolo""#); + assert_eq!(serde_json::to_string(&BrowsePolicy::Ask).unwrap(), r#""ask""#); +} + +#[test] +fn browse_reason_serializes_snake_case() { + assert_eq!(serde_json::to_string(&BrowseReason::StepCap).unwrap(), r#""step_cap""#); + assert_eq!(serde_json::to_string(&BrowseReason::BrowserCrashed).unwrap(), r#""browser_crashed""#); + assert_eq!(serde_json::to_string(&BrowseReason::UserDenied).unwrap(), r#""user_denied""#); +} + +// ── Voice routing ─────────────────────────────────────────────────────────── + +#[test] +fn voice_routes_browse_not_find() { + use rustyclaw::voice::{voice_routes_to_browse, strip_browse_prefix}; + + assert!(voice_routes_to_browse("browse find flights to Tokyo")); + assert!(voice_routes_to_browse("book a hotel in Paris")); + assert!(voice_routes_to_browse("go to flights.google.com")); + assert!(!voice_routes_to_browse("find the bug in my code")); + assert!(!voice_routes_to_browse("search for todo items")); + assert!(!voice_routes_to_browse("what time is it")); + + assert_eq!(strip_browse_prefix("browse Find flights"), "Find flights"); + assert_eq!(strip_browse_prefix("Book a Hotel"), "a Hotel"); +} diff --git a/tests/browse_parse_tests.rs b/tests/browse_parse_tests.rs new file mode 100644 index 0000000..879cda9 --- /dev/null +++ b/tests/browse_parse_tests.rs @@ -0,0 +1,48 @@ +use rustyclaw::commands::{parse_browse_command, CommandAction}; +use rustyclaw::browser::browse_loop::BrowsePolicy; + +#[test] +fn parses_plain_browse() { + match parse_browse_command("find the cheapest flight") { + CommandAction::Browse { goal, policy, max_steps } => { + assert_eq!(goal, "find the cheapest flight"); + assert_eq!(policy, BrowsePolicy::Pattern); + assert_eq!(max_steps, None); + } + _ => panic!("wrong variant"), + } +} + +#[test] +fn parses_yolo_flag() { + match parse_browse_command("--yolo book the flight") { + CommandAction::Browse { policy, goal, .. } => { + assert_eq!(policy, BrowsePolicy::Yolo); + assert_eq!(goal, "book the flight"); + } + _ => panic!(), + } +} + +#[test] +fn parses_max_steps() { + match parse_browse_command("--max-steps 100 research X") { + CommandAction::Browse { max_steps, goal, .. } => { + assert_eq!(max_steps, Some(100)); + assert_eq!(goal, "research X"); + } + _ => panic!(), + } +} + +#[test] +fn parses_ask_and_max_steps_combined() { + match parse_browse_command("--ask --max-steps 25 quick check") { + CommandAction::Browse { policy, max_steps, goal } => { + assert_eq!(policy, BrowsePolicy::Ask); + assert_eq!(max_steps, Some(25)); + assert_eq!(goal, "quick check"); + } + _ => panic!(), + } +} diff --git a/tests/loop_detector_tests.rs b/tests/loop_detector_tests.rs new file mode 100644 index 0000000..e72ab6d --- /dev/null +++ b/tests/loop_detector_tests.rs @@ -0,0 +1,103 @@ +use rustyclaw::browser::loop_detector::{fingerprint_action, LoopDetector}; + +// 1. Same inputs → same hash +#[test] +fn fingerprint_equality_same_inputs() { + let h1 = fingerprint_action("click", "#submit", ""); + let h2 = fingerprint_action("click", "#submit", ""); + assert_eq!(h1, h2); +} + +// 2. Different target → different hash +#[test] +fn fingerprint_differs_on_target() { + let h1 = fingerprint_action("click", "#submit", ""); + let h2 = fingerprint_action("click", "#cancel", ""); + assert_ne!(h1, h2); +} + +// 3. 11 records → window capped at 10 +#[test] +fn window_prunes_oldest_at_eleven() { + let mut d = LoopDetector::new(); + for i in 0..11 { + d.record_action("click", &format!("#{i}"), "page"); + } + assert_eq!(d.window_len(), 10); +} + +// 4. Three identical actions → level-1 nudge ("different approach") +#[test] +fn three_identical_trips_level_one_nudge() { + let mut d = LoopDetector::new(); + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected a nudge"); + assert!(nudge.contains("different approach"), "got: {nudge}"); +} + +// 5. After level 1, three more identical → level-2 nudge ("multiple times") +#[test] +fn three_more_identical_escalate_to_level_two() { + let mut d = LoopDetector::new(); + // Trigger level 1 + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); + // Trigger level 2 — window already has 3 identical, check_stagnation consumed level 0. + // We need to push 3 more identical entries so tail is still identical. + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected escalated nudge"); + assert!(nudge.contains("multiple times"), "got: {nudge}"); +} + +// 6. Third check → terminal "Stopping" nudge +#[test] +fn level_three_is_terminal_stop() { + let mut d = LoopDetector::new(); + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); // level 0 → emit NUDGES[0], nudge_level becomes 1 + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); // level 1 → emit NUDGES[1], nudge_level becomes 2 + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected terminal nudge"); + assert!(nudge.contains("Stopping"), "got: {nudge}"); +} + +// 7. reset() clears window and resets nudge level +#[test] +fn reset_clears_window_and_nudge_level() { + let mut d = LoopDetector::new(); + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + d.check_stagnation(); // consumes level 0 + d.reset(); + assert_eq!(d.window_len(), 0); + // After reset, 3 identical actions should produce level-1 nudge again + for _ in 0..3 { + d.record_action("click", "#btn", "same page text"); + } + let nudge = d.check_stagnation().expect("expected nudge after reset"); + assert!(nudge.contains("different approach"), "got: {nudge}"); +} + +// 8. Same action but different page_text each time → no stagnation +#[test] +fn page_change_breaks_stagnation() { + let mut d = LoopDetector::new(); + for i in 0..5 { + d.record_action("click", "#btn", &format!("page content {i}")); + } + assert!(d.check_stagnation().is_none()); +} diff --git a/tests/middleware_smoke.rs b/tests/middleware_smoke.rs new file mode 100644 index 0000000..f640b1c --- /dev/null +++ b/tests/middleware_smoke.rs @@ -0,0 +1,140 @@ +//! Smoke tests for the ToolMiddleware trait. + +use async_trait::async_trait; +use rustyclaw::browser::middleware::{MiddlewareVerdict, ToolMiddleware}; +use serde_json::Value; +use std::sync::{Arc, Mutex}; + +// ── RecordingMiddleware ────────────────────────────────────────────── + +/// Records all before/after calls for assertion. +struct RecordingMiddleware { + log: Arc>>, +} + +#[async_trait] +impl ToolMiddleware for RecordingMiddleware { + async fn before_tool(&self, tool_name: &str, _input: &Value) -> MiddlewareVerdict { + self.log + .lock() + .unwrap() + .push(format!("before:{tool_name}")); + MiddlewareVerdict::Allow + } + + async fn after_tool(&self, tool_name: &str, output: &str) { + self.log + .lock() + .unwrap() + .push(format!("after:{tool_name}:{output}")); + } +} + +#[tokio::test] +async fn recording_middleware_fires_before_and_after() { + let log = Arc::new(Mutex::new(Vec::::new())); + let mw = RecordingMiddleware { log: log.clone() }; + + let verdict = mw.before_tool("bash", &serde_json::json!({"cmd": "ls"})).await; + assert!(matches!(verdict, MiddlewareVerdict::Allow)); + + mw.after_tool("bash", "file1.rs\nfile2.rs").await; + + let entries = log.lock().unwrap(); + assert_eq!(entries.len(), 2); + assert_eq!(entries[0], "before:bash"); + assert_eq!(entries[1], "after:bash:file1.rs\nfile2.rs"); +} + +// ── Denier ─────────────────────────────────────────────────────────── + +/// Always denies tool execution. +struct Denier { + reason: String, +} + +#[async_trait] +impl ToolMiddleware for Denier { + async fn before_tool(&self, _tool_name: &str, _input: &Value) -> MiddlewareVerdict { + MiddlewareVerdict::Deny { + reason: self.reason.clone(), + } + } + + async fn after_tool(&self, _tool_name: &str, _output: &str) { + // Never called when denied. + } +} + +#[tokio::test] +async fn denier_middleware_returns_deny() { + let mw = Denier { + reason: "not allowed".into(), + }; + + let verdict = mw.before_tool("file_write", &serde_json::json!({})).await; + match verdict { + MiddlewareVerdict::Deny { reason } => { + assert_eq!(reason, "not allowed"); + } + other => panic!("Expected Deny, got {other:?}"), + } +} + +// ── RequireConfirmation ────────────────────────────────────────────── + +struct ConfirmationRequester; + +#[async_trait] +impl ToolMiddleware for ConfirmationRequester { + async fn before_tool(&self, tool_name: &str, _input: &Value) -> MiddlewareVerdict { + MiddlewareVerdict::RequireConfirmation { + reason: format!("{tool_name} needs approval"), + detail: "destructive action".into(), + } + } + + async fn after_tool(&self, _tool_name: &str, _output: &str) {} +} + +#[tokio::test] +async fn require_confirmation_variant() { + let mw = ConfirmationRequester; + let verdict = mw.before_tool("bash", &serde_json::json!({})).await; + match verdict { + MiddlewareVerdict::RequireConfirmation { reason, detail } => { + assert_eq!(reason, "bash needs approval"); + assert_eq!(detail, "destructive action"); + } + other => panic!("Expected RequireConfirmation, got {other:?}"), + } +} + +// ── Chain ordering ─────────────────────────────────────────────────── + +#[tokio::test] +async fn middleware_chain_short_circuits_on_deny() { + let log = Arc::new(Mutex::new(Vec::::new())); + let recorder = Arc::new(RecordingMiddleware { log: log.clone() }); + let denier = Arc::new(Denier { + reason: "blocked".into(), + }); + + // Denier first, recorder second — recorder should never fire. + let chain: Vec> = vec![denier, recorder]; + + let input = serde_json::json!({}); + let mut denied = false; + for mw in &chain { + match mw.before_tool("bash", &input).await { + MiddlewareVerdict::Allow => {} + MiddlewareVerdict::Deny { .. } | MiddlewareVerdict::RequireConfirmation { .. } => { + denied = true; + break; + } + } + } + + assert!(denied); + assert!(log.lock().unwrap().is_empty(), "recorder should not have fired"); +} diff --git a/tests/voice_routing_tests.rs b/tests/voice_routing_tests.rs new file mode 100644 index 0000000..74a5dd5 --- /dev/null +++ b/tests/voice_routing_tests.rs @@ -0,0 +1,27 @@ +use rustyclaw::voice::{voice_routes_to_browse, strip_browse_prefix}; + +#[test] +fn routes_browse_prefix() { + assert!(voice_routes_to_browse("browse find the flight")); + assert!(voice_routes_to_browse("Browser open flights")); + assert!(voice_routes_to_browse("web find something")); + assert!(voice_routes_to_browse("go to amazon.com")); + assert!(voice_routes_to_browse("book a flight")); + assert!(voice_routes_to_browse("shop for coffee")); + assert!(voice_routes_to_browse("order pizza")); +} + +#[test] +fn does_not_route_plain_find() { + assert!(!voice_routes_to_browse("find the bug in my code")); + assert!(!voice_routes_to_browse("what's the capital of france")); + assert!(!voice_routes_to_browse("search for todo items")); + assert!(!voice_routes_to_browse("look up documentation")); +} + +#[test] +fn strip_preserves_case() { + assert_eq!(strip_browse_prefix("browse Find Tokyo flights"), "Find Tokyo flights"); + assert_eq!(strip_browse_prefix("Book a Hotel in Paris"), "a Hotel in Paris"); + assert_eq!(strip_browse_prefix("GO TO example.com"), "example.com"); +}