Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/browser-use-agent/src/tools/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ pub mod definitions {
pub fn update_goal() -> ToolDefinition {
ToolDefinition {
name: "update_goal".to_string(),
description: "Update the existing goal.\nUse this tool only to mark the goal achieved or genuinely blocked.\nSet status to `complete` only when the objective has actually been achieved and no required work remains.\nSet status to `blocked` only when the same blocking condition has repeated for at least three consecutive goal turns, counting the original/user-triggered turn and any automatic continuations, and the agent cannot make meaningful progress without user input or an external-state change.\nIf the user resumes a goal that was previously marked `blocked`, treat the resumed run as a fresh blocked audit. If the same blocking condition then repeats for at least three consecutive resumed goal turns, set status to `blocked` again.\nOnce the blocked threshold is satisfied, do not keep reporting that you are still blocked while leaving the goal active; set status to `blocked`.\nDo not use `blocked` merely because the work is hard, slow, uncertain, incomplete, or would benefit from clarification.\nDo not mark a goal complete merely because its budget is nearly exhausted or because you are stopping work.\nYou cannot use this tool to pause, resume, budget-limit, or usage-limit a goal; those status changes are controlled by the user or system.\nWhen marking a budgeted goal achieved with status `complete`, report the final token usage from the tool result to the user.".to_string(),
description: "Update the existing goal. Set status to `complete` only when the objective has actually been achieved and no required work remains; set status to `blocked` only when the same blocking condition has repeated for at least three consecutive goal turns (counting the original/user-triggered turn and any automatic continuations, and restarting a fresh audit when a previously blocked goal is resumed) and the agent cannot progress without user input or an external-state change.\nDo not use `blocked` merely because the work is hard, slow, uncertain, incomplete, or would benefit from clarification; do not mark complete merely because the budget is nearly exhausted or you are stopping; and do not use this tool to pause, resume, budget-limit, or usage-limit a goal (those are controlled by the user or system). When marking a budgeted goal `complete`, report the final token usage from the tool result to the user.".to_string(),
input_schema: json!({
"type": "object",
"properties": {
Expand Down
132 changes: 122 additions & 10 deletions crates/browser-use-browser/src/browser_script_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import base64
import gzip
import ipaddress
import json
import math
import os
Expand Down Expand Up @@ -1694,16 +1695,114 @@ def json(self):
raise ValueError(f"request failed for {self.url}: {self.error}")


def http_get(url, headers=None, timeout=20.0, binary=None):
def _is_private_or_local_host(host):
"""True for hosts the fetch proxy must never see: loopback, RFC1918/link-local
ranges, .local/.internal-style suffixes, and dotless intranet shortnames.
Routing these through the remote proxy would leak the URL/headers off-box and
fetch the WRONG target (the proxy's localhost, not the caller's)."""
host = str(host or "").strip().lower().rstrip(".").strip("[]")
if not host:
return True
if host == "localhost" or host.endswith(".localhost"):
return True
if host.endswith((".local", ".internal", ".lan", ".intranet", ".corp", ".home.arpa")):
return True
try:
ip = ipaddress.ip_address(host)
except ValueError:
return "." not in host
return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_unspecified


class _ProxyFetchResponse:
"""Response shim mirroring fetch-use's FetchResponse attribute surface."""

__slots__ = ("status_code", "status", "headers", "url", "text", "content")

def __init__(self, status_code, headers, url, body, body_b64, is_binary):
self.status_code = status_code
self.status = status_code
self.headers = headers or {}
self.url = url
self.text = body or ""
if is_binary and body_b64:
self.content = base64.b64decode(body_b64)
else:
self.content = (body or "").encode("utf-8", errors="replace")


def _fetch_use_sync(url, headers=None, timeout_ms=30000, method="GET", body=None):
"""Vendored minimal Browser-Use Fetch client (mirrors the `fetch-use` pkg).

POSTs through fetch.browser-use.com so requests carry Chrome TLS
fingerprinting + rotating proxy IPs — the same un-blockable path browser-use
uses — instead of a bare urllib request that bot-protection blocks. Vendored
so it works even when the `fetch_use` package isn't installed in the sandbox.
"""
import uuid as _uuid

api_key = os.environ.get("BROWSER_USE_API_KEY", "")
if not api_key:
raise RuntimeError("BROWSER_USE_API_KEY not set")
service = (os.environ.get("FETCH_USE_URL") or "https://fetch.browser-use.com").rstrip("/")
session_id = (os.environ.get("SESSION_ID") or str(_uuid.uuid4()))[:36]
payload = {
"url": url,
"method": str(method or "GET").upper(),
"timeout_ms": min(int(timeout_ms), 120000),
"follow_redirects": True,
"max_redirects": 10,
"proxy_country": os.environ.get("FETCH_USE_PROXY_COUNTRY", "US"),
"session_id": session_id,
}
if headers:
payload["headers"] = dict(headers)
if body is not None:
payload["body"] = body
req_headers = {"Content-Type": "application/json", "X-Browser-Use-API-Key": api_key}
token = os.environ.get("FETCH_USE_AUTH_TOKEN")
if token:
req_headers["X-Fetch-Token"] = token
data = json.dumps(payload).encode("utf-8")
request = urllib.request.Request(service + "/fetch", data=data, headers=req_headers, method="POST")
with urllib.request.urlopen(request, timeout=(int(timeout_ms) / 1000) + 10) as resp:
result = json.loads(resp.read().decode("utf-8"))
if result.get("error"):
raise RuntimeError(f"fetch proxy error: {result['error']}")
return _ProxyFetchResponse(
result.get("status_code", 0),
result.get("headers", {}),
result.get("final_url", url),
result.get("body", ""),
result.get("body_base64", ""),
result.get("is_binary", False),
)


def http_get(url, headers=None, timeout=20.0, binary=None, use_proxy=None):
"""Pure HTTP fetch for static pages and APIs.

When BROWSER_USE_API_KEY is set and fetch_use is installed, route through
fetch-use like browser-harness. Otherwise fall back to local urllib with a
browser-like UA and gzip handling. Pass binary=True for bytes.
Public URLs route through the Browser-Use Fetch proxy (Chrome TLS
fingerprint + rotating IPs) when BROWSER_USE_API_KEY is set, so
bot-protected sites don't block us — preferring the installed `fetch_use`
package, else the vendored client above. Loopback/private/intranet hosts
are ALWAYS fetched directly (never sent to the proxy). On proxy failure the
request falls back to direct urllib and the proxy error is surfaced.
Pass binary=True for bytes. use_proxy: None=auto (public hosts only),
True=force the proxy, False=force direct.
"""
if os.environ.get("BROWSER_USE_API_KEY"):
proxy_error = None
want_proxy = (
use_proxy
if use_proxy is not None
else not _is_private_or_local_host(urlparse(url).hostname)
)
if want_proxy and os.environ.get("BROWSER_USE_API_KEY"):
try:
from fetch_use import fetch_sync
try:
from fetch_use import fetch_sync
except ImportError:
fetch_sync = _fetch_use_sync

response = fetch_sync(url, headers=headers, timeout_ms=int(float(timeout) * 1000))
status_code = getattr(response, "status_code", getattr(response, "status", None))
Expand All @@ -1726,8 +1825,16 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
response_headers,
response_url,
)
except ImportError:
pass
except Exception as exc:
# Proxy unavailable / auth / schema / network error — fall back to a
# direct urllib request below, but keep the proxy error visible so a
# bot-blocked direct response isn't mistaken for proxy success.
proxy_error = exc
print(
f"http_get: fetch proxy failed ({exc}); retrying direct",
file=sys.stderr,
flush=True,
)
request_headers = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip"}
if headers:
request_headers.update(headers)
Expand All @@ -1751,11 +1858,16 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
f"{exc.code} for {url}. If this is bot/login protection, retry from the browser with js(fetch(...)), "
"pass site-specific headers/cookies, or configure the Browser Use fetch proxy with BROWSER_USE_API_KEY."
)
if proxy_error is not None:
guidance += f" (fetch proxy also failed: {proxy_error})"
raise RuntimeError(guidance) from exc
except (urllib.error.URLError, TimeoutError, OSError) as exc:
raise RuntimeError(
message = (
f"http_get failed for {url}: {exc}. Try a shorter timeout, browser js(fetch(...)), or a configured proxy if the site blocks direct HTTP."
) from exc
)
if proxy_error is not None:
message += f" (fetch proxy also failed: {proxy_error})"
raise RuntimeError(message) from exc


def http_get_many(urls, headers=None, timeout=20.0, binary=None, max_workers=8, return_errors=True):
Expand Down
137 changes: 135 additions & 2 deletions crates/browser-use-browser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,20 @@ use tungstenite::{connect, Message, WebSocket};
const BU_API: &str = "https://api.browser-use.com/api/v3";
const LOG_LIMIT: usize = 250;
const SCRIPT_MAX_OUTPUT_CHARS: usize = 120_000;
const BROWSER_SCRIPT_DEFAULT_INITIAL_WAIT_MS: u64 = 15_000;
const BROWSER_SCRIPT_DEFAULT_OBSERVE_MS: u64 = 1_000;
// Cost optimization (eval-everything): a script that finishes within the start
// call returns its result in ONE tool call — no separate `observe` model turns.
// Raised 15s->30s so the common scrape script (which finishes well under 30s)
// no longer forces a poll round-trip. This is a single, non-stacking block that
// still hands control back at 30s, so a stuck script can be cancelled/finalized
// (unlike the reverted "observe30", which STACKED 30s observe blocks and starved
// the run timebox — see DEFAULT_OBSERVE_TIMEOUT_MS doc in browser.rs).
const BROWSER_SCRIPT_DEFAULT_INITIAL_WAIT_MS: u64 = 30_000;
// The `next_observe_ms` HINT surfaced to the model ("call observe with
// observe_timeout_ms=N"). Raised 1s->15s to nudge the model to long-poll instead
// of issuing 1s "still running?" peeks (the dominant observe-churn cost). This is
// only a hint — the observe floor stays at 1s, so the model keeps full agency to
// bail early; we stay under the 30s window that previously regressed.
const BROWSER_SCRIPT_DEFAULT_OBSERVE_MS: u64 = 15_000;
const BROWSER_SCRIPT_HELPERS: &str = include_str!("browser_script_helpers.py");
const BROWSER_CONNECT_LOCAL_HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(120);
const BROWSER_CONNECT_ATTACH_DEADLINE: Duration = Duration::from_secs(8);
Expand Down Expand Up @@ -13091,6 +13103,127 @@ print("http_get_many parity ok")
assert!(output.text.contains("http_get_many parity ok"));
}

#[test]
fn browser_script_http_get_vendored_proxy_private_bypass_and_error_fallback() {
let temp = tempfile::tempdir().unwrap();
let output = run_browser_script(
"script-http-get-vendored-proxy",
temp.path(),
temp.path().join("artifacts"),
r#"
import http.server
import json
import os
import socketserver
import sys
import threading

assert _is_private_or_local_host("localhost")
assert _is_private_or_local_host("127.0.0.1")
assert _is_private_or_local_host("10.1.2.3")
assert _is_private_or_local_host("192.168.0.5")
assert _is_private_or_local_host("169.254.1.1")
assert _is_private_or_local_host("printer.local")
assert _is_private_or_local_host("wiki.internal")
assert _is_private_or_local_host("intranet-host")
assert not _is_private_or_local_host("example.com")
assert not _is_private_or_local_host("8.8.8.8")

proxy_calls = []
proxy_mode = {"fail": False}

class FakeFetchProxy(http.server.BaseHTTPRequestHandler):
def log_message(self, fmt, *args):
pass

def do_POST(self):
assert self.path == "/fetch"
assert self.headers.get("X-Browser-Use-API-Key") == "test-key"
req = json.loads(self.rfile.read(int(self.headers["Content-Length"])))
proxy_calls.append(req["url"])
if proxy_mode["fail"]:
self.send_response(500)
self.end_headers()
return
body = json.dumps({
"status_code": 200,
"status": "200 OK",
"headers": {"x-proxy": "yes"},
"body": "proxied:" + req["url"],
"body_base64": "",
"is_binary": False,
"final_url": req["url"],
"redirect_count": 0,
"protocol": "HTTP/2.0",
}).encode()
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)

class DirectTarget(http.server.BaseHTTPRequestHandler):
def log_message(self, fmt, *args):
pass

def do_GET(self):
body = b"direct"
self.send_response(200)
self.send_header("Content-Type", "text/plain; charset=utf-8")
self.send_header("Content-Length", str(len(body)))
self.end_headers()
self.wfile.write(body)

proxy_server = socketserver.TCPServer(("127.0.0.1", 0), FakeFetchProxy)
target_server = socketserver.TCPServer(("127.0.0.1", 0), DirectTarget)
for server in (proxy_server, target_server):
threading.Thread(target=server.serve_forever, daemon=True).start()
target_base = f"http://127.0.0.1:{target_server.server_address[1]}"

sys.modules.pop("fetch_use", None) # force the VENDORED client path
os.environ["BROWSER_USE_API_KEY"] = "test-key"
os.environ["FETCH_USE_URL"] = f"http://127.0.0.1:{proxy_server.server_address[1]}"

try:
# 1) public URL goes through the vendored proxy client
proxied = http_get("https://public.example/data")
assert proxied == "proxied:https://public.example/data", proxied
assert proxied.status_code == 200 and proxied.headers["x-proxy"] == "yes"

# 2) loopback/private host bypasses the proxy entirely
before = len(proxy_calls)
direct = http_get(target_base + "/anything")
assert direct == "direct", direct
assert len(proxy_calls) == before, "private host must never reach the proxy"

# 3) use_proxy=True forces even a private host through the proxy
forced = http_get(target_base + "/anything", use_proxy=True)
assert forced == "proxied:" + target_base + "/anything", forced

# 4) proxy failure falls back to direct; both errors surfaced when direct also fails
proxy_mode["fail"] = True
fallback = http_get(target_base + "/anything", use_proxy=True, timeout=3)
assert fallback == "direct", fallback
try:
http_get("https://no-such-host.invalid/x", timeout=3)
except RuntimeError as exc:
assert "fetch proxy also failed" in str(exc), exc
else:
raise AssertionError("expected both proxy and direct to fail")
finally:
for server in (proxy_server, target_server):
server.shutdown()
server.server_close()
print("http_get vendored proxy ok")
"#,
20,
)
.unwrap();

assert!(output.ok, "{:?}\n{}", output.error, output.text);
assert!(output.text.contains("http_get vendored proxy ok"));
}

#[test]
fn browser_script_browser_fetch_single_returns_structured_errors_by_default() {
let temp = tempfile::tempdir().unwrap();
Expand Down
Loading
Loading