Skip to content
Merged
16 changes: 16 additions & 0 deletions src/browser/actions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,22 @@ pub async fn navigate(client: &CdpClient, url: &str, timeout_ms: u64) -> Result<
Ok((title, 200))
}

/// Query the current page URL via `document.location.href`. Returns `None`
/// on error or if no URL is available (e.g. the page has no window yet).
pub async fn current_url(client: &CdpClient) -> Option<String> {
let resp = client
.send(
"Runtime.evaluate",
json!({
"expression": "document.location.href",
"returnByValue": true,
}),
)
.await
.ok()?;
resp["result"]["value"].as_str().map(|s| s.to_string())
}

/// Click an element by @ref.
pub async fn click(session: &mut BrowserSession, element_ref: &str) -> Result<String> {
let node_id = session.resolve_ref(element_ref)?;
Expand Down
77 changes: 65 additions & 12 deletions src/browser/approval_gate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,12 @@ pub struct ApprovalGateMiddleware {
denial_counts: Mutex<HashMap<String, u32>>,
/// Set when the same action is denied twice — triggers session termination.
user_denied: AtomicBool,
/// When true, also listen for a spoken "yes/approve" alongside the keyboard reply.
voice: bool,
/// Optional handle to the live browser session — used to resolve an @eN
/// ref to its real button label so button-text patterns (e.g. "buy now")
/// can actually match. None in tests where no browser is attached.
browser_session: Option<Arc<tokio::sync::Mutex<crate::browser::BrowserSession>>>,
}

impl ApprovalGateMiddleware {
Expand All @@ -239,6 +245,7 @@ impl ApprovalGateMiddleware {
current_url: Arc<tokio::sync::Mutex<String>>,
approval_tx: mpsc::Sender<ApprovalPrompt>,
step_counter: Arc<AtomicU32>,
voice: bool,
) -> Self {
Self {
gate,
Expand All @@ -248,9 +255,21 @@ impl ApprovalGateMiddleware {
step_counter,
denial_counts: Mutex::new(HashMap::new()),
user_denied: AtomicBool::new(false),
voice,
browser_session: None,
}
}

/// Attach a browser session so the gate can resolve @eN refs to button
/// labels before applying the pattern set.
pub fn with_browser_session(
mut self,
session: Option<Arc<tokio::sync::Mutex<crate::browser::BrowserSession>>>,
) -> Self {
self.browser_session = session;
self
}

/// Returns true if the user denied the same action twice, triggering termination.
pub fn is_user_denied(&self) -> bool {
self.user_denied.load(Ordering::SeqCst)
Expand Down Expand Up @@ -280,11 +299,28 @@ impl ToolMiddleware for ApprovalGateMiddleware {
}

let url = self.current_url.lock().await.clone();
let target_text = input["ref"]
// `ref_or_selector` is the raw identifier (e.g. "@e2" or ".btn-submit").
// It's what we use for the denial-counter key so the same action is
// tracked consistently across retries.
let ref_or_selector = input["ref"]
.as_str()
.or_else(|| input["selector"].as_str())
.unwrap_or("")
.to_string();
// `target_text` is what we match against `button_patterns`. For @eN
// refs, resolve to the element's accessible name via the browser
// session's ref-name map; otherwise fall back to the raw identifier.
let target_text = if ref_or_selector.starts_with('@')
&& let Some(session_arc) = &self.browser_session
{
let session = session_arc.lock().await;
session
.resolve_ref_name(&ref_or_selector)
.map(|s| s.to_string())
.unwrap_or_else(|| ref_or_selector.clone())
} else {
ref_or_selector.clone()
};

let gate_ctx = GateContext {
tool_name: tool_name.to_string(),
Expand Down Expand Up @@ -313,7 +349,8 @@ impl ToolMiddleware for ApprovalGateMiddleware {
MiddlewareVerdict::Allow
}
GateVerdict::RequireConfirmation { reason, .. } => {
let step = self.step_counter.load(Ordering::Relaxed);
// +1 because the step_emitter middleware increments after the gate runs.
let step = self.step_counter.load(Ordering::Relaxed) + 1;
let (tx, rx) = oneshot::channel();
let prompt = ApprovalPrompt {
step,
Expand All @@ -330,14 +367,34 @@ impl ToolMiddleware for ApprovalGateMiddleware {
};
}
use tokio::time::{timeout, Duration};
match timeout(Duration::from_secs(60), rx).await {
Ok(Ok(true)) => {
// If voice is on, race the keyboard reply against a voice-approval
// listener. Whichever resolves first wins. Voice only contributes
// an Approve vote (false/timeout is ignored unless no keyboard reply
// arrives either).
let approved_opt: Option<bool> = if self.voice {
tokio::select! {
kb = timeout(Duration::from_secs(60), rx) => match kb {
Ok(Ok(b)) => Some(b),
_ => None,
},
voice_yes = crate::voice::await_voice_approval(60) => {
if voice_yes { Some(true) } else { None }
}
}
} else {
match timeout(Duration::from_secs(60), rx).await {
Ok(Ok(b)) => Some(b),
_ => None,
}
};
match approved_opt {
Some(true) => {
// Approved — clear denial counter for this action.
let key = format!("{tool_name}:{target_text}");
self.denial_counts.lock().unwrap_or_else(|e| e.into_inner()).remove(&key);
MiddlewareVerdict::Allow
}
Ok(Ok(false)) => {
Some(false) => {
// User explicitly denied — increment counter; terminate after 2 denials.
let key = format!("{tool_name}:{target_text}");
let count = {
Expand All @@ -355,19 +412,15 @@ impl ToolMiddleware for ApprovalGateMiddleware {
}
MiddlewareVerdict::Deny { reason }
}
Ok(Err(_)) => MiddlewareVerdict::Deny {
reason: "approval channel dropped".into(),
},
Err(_) => MiddlewareVerdict::Deny {
reason: "approval timed out (60s)".into(),
None => MiddlewareVerdict::Deny {
reason: "approval timed out or channel dropped".into(),
},
}
}
}
}

async fn after_tool(&self, _tool_name: &str, _output: &str) {
// Increment step counter after each tool execution.
self.step_counter.fetch_add(1, Ordering::Relaxed);
// Step counting is owned by StepEmitterMiddleware (runs after this middleware).
}
}
Loading
Loading