From 64d301ce07859145ae24167431881ecc1706b8b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?=
 <36313686+gregpr07@users.noreply.github.com>
Date: Fri, 12 Jun 2026 00:27:35 +0000
Subject: [PATCH 1/4] Trim verbose tool descriptions to cut fixed per-turn
 tokens

Collapse duplicated execution-model prose in the browser_script/browser/python
tool descriptions and compress the update_goal description. Keeps all helper
names, safety rules, and behavioral guidance verbatim.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .../browser-use-agent/src/tools/registry.rs   |  2 +-
 prompts/browser-script-tool-description.md    | 55 +++++-------
 prompts/browser-tool-description.md           | 89 +++++--------------
 prompts/python-tool-description.md            |  2 +-
 4 files changed, 44 insertions(+), 104 deletions(-)

diff --git a/crates/browser-use-agent/src/tools/registry.rs b/crates/browser-use-agent/src/tools/registry.rs
index 3a690e38..7c99216c 100644
--- a/crates/browser-use-agent/src/tools/registry.rs
+++ b/crates/browser-use-agent/src/tools/registry.rs
@@ -741,7 +741,7 @@ pub mod definitions {
     pub fn update_goal() -> ToolDefinition {
         ToolDefinition {
             name: "update_goal".to_string(),
-            description: "Update the existing goal.\nUse this tool only to mark the goal achieved or genuinely blocked.\nSet status to `complete` only when the objective has actually been achieved and no required work remains.\nSet status to `blocked` only when the same blocking condition has repeated for at least three consecutive goal turns, counting the original/user-triggered turn and any automatic continuations, and the agent cannot make meaningful progress without user input or an external-state change.\nIf the user resumes a goal that was previously marked `blocked`, treat the resumed run as a fresh blocked audit. If the same blocking condition then repeats for at least three consecutive resumed goal turns, set status to `blocked` again.\nOnce the blocked threshold is satisfied, do not keep reporting that you are still blocked while leaving the goal active; set status to `blocked`.\nDo not use `blocked` merely because the work is hard, slow, uncertain, incomplete, or would benefit from clarification.\nDo not mark a goal complete merely because its budget is nearly exhausted or because you are stopping work.\nYou cannot use this tool to pause, resume, budget-limit, or usage-limit a goal; those status changes are controlled by the user or system.\nWhen marking a budgeted goal achieved with status `complete`, report the final token usage from the tool result to the user.".to_string(),
+            description: "Update the existing goal. Set status to `complete` only when the objective has actually been achieved and no required work remains; set status to `blocked` only when the same blocking condition has repeated for at least three consecutive goal turns (counting the original/user-triggered turn and any automatic continuations, and restarting a fresh audit when a previously blocked goal is resumed) and the agent cannot progress without user input or an external-state change.\nDo not use `blocked` merely because the work is hard, slow, uncertain, incomplete, or would benefit from clarification; do not mark complete merely because the budget is nearly exhausted or you are stopping; and do not use this tool to pause, resume, budget-limit, or usage-limit a goal (those are controlled by the user or system). When marking a budgeted goal `complete`, report the final token usage from the tool result to the user.".to_string(),
             input_schema: json!({
                 "type": "object",
                 "properties": {
diff --git a/prompts/browser-script-tool-description.md b/prompts/browser-script-tool-description.md
index c4a354e9..0fcf0e10 100644
--- a/prompts/browser-script-tool-description.md
+++ b/prompts/browser-script-tool-description.md
@@ -6,17 +6,12 @@ Use the `browser` tool for connection/runtime work first. If the browser is not
 
 Important execution model:
 
-- Each `browser_script` call starts a fresh Python process.
-- Python variables do not persist across calls.
-- Browser/CDP state persists in Rust.
+- Each `browser_script` call starts a fresh Python process; Python variables do not persist across calls. Browser/CDP state persists in Rust.
 - Fast calls return their final result immediately. Long calls return `status: running` with a `run_id`; keep observing that same run until it finishes, fails, or is cancelled.
-- To listen to a running script, call this tool with `action="observe"`, the returned `run_id`, and optionally `observe_timeout_ms`. Prefer coarse waits such as 30000-120000 ms for long navigation or extraction scripts; do not burn many turns polling the same `run_id` with short waits.
-- To stop a running script, call this tool with `action="cancel"` and the `run_id`. Partial images and artifacts emitted before cancellation are preserved.
-- A failed `browser_script` call may include a short diagnosis. Read that diagnosis first: if it says the browser is still connected or the same page is usable, continue from the same page instead of reconnecting.
-- Helpers are preimported; you do not need imports for normal browser work.
-- CDP is the source of truth. If a helper is incomplete, use `cdp(...)` directly.
-- Keep browser actions sequential and deliberate.
-- Do not import Playwright, Selenium, or Pyppeteer.
+- To listen to a running script, call this tool with `action="observe"`, the `run_id`, and optionally `observe_timeout_ms`. Prefer coarse waits (30000-120000 ms) for long navigation/extraction; do not burn many turns polling with short waits. To stop a run, call `action="cancel"` with the `run_id`; partial images/artifacts emitted before cancellation are preserved.
+- A failed call may include a short diagnosis. Read it first: if it says the browser is still connected or the same page is usable, continue from the same page instead of reconnecting.
+- Helpers are preimported; no imports needed for normal browser work. CDP is the source of truth — if a helper is incomplete, use `cdp(...)` directly.
+- Keep browser actions sequential and deliberate. Do not import Playwright, Selenium, or Pyppeteer.
 
 Preimported helpers:
 
@@ -73,21 +68,16 @@ last_domain_skills(include_content=False)
 
 Usage guidance:
 
-- First navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url(url)` mutates the current controlled tab. Both helpers send the CDP navigation command, perform a bounded readiness check, and emit a labeled `navigation` output with `status`, `page_info`, `page_state`, and `next_step`. If that output says `navigation_ready` and `page_info.url` is the expected page, trust it and inspect/extract from the current page instead of navigating to the same URL again. If you chain more work in the same script after navigation, explicitly wait or poll for the specific selector/state you need before reading/clicking.
-- If a navigation is blocked by the user's `/domains` policy (the error says so), call `nav_policy()` to see the allowed/denied sites and plan within them; pass a URL (`nav_policy("example.com")`) to check before navigating. If the task can't be completed within the policy, tell the user which site is blocked and suggest they allow it with `/domains` or adjust the task — don't keep retrying the blocked host.
-- Keep keyboard semantics browser-harness/Rod aligned: `press_key(...)` simulates physical keys or shortcuts, while `type_text(...)` inserts/pastes text into the focused element with `Input.insertText`.
-- For React/Vue/Svelte/controlled inputs, prefer `fill_input(selector, text, timeout=...)` over direct DOM value assignment. It focuses the element, clears with Cmd/Ctrl+A plus Backspace, types through physical key events, then fires final `input`/`change` events. Use stable selectors from labels, ids, names, placeholders, or visible DOM inspection; avoid brittle positional selectors such as `input:nth-of-type(2)` unless you just verified that exact selector on the current page.
-- Do not combine `Input.dispatchKeyEvent` carrying printable `text` with a manual `char` event for the same character; that double-inserts text in Chrome.
+- First navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url(url)` mutates the current controlled tab. Both send the CDP navigation command, perform a bounded readiness check, and emit a labeled `navigation` output with `status`, `page_info`, `page_state`, and `next_step`. If that output says `navigation_ready` and `page_info.url` is the expected page, trust it and inspect/extract instead of navigating again. If you chain more work after navigation, explicitly wait or poll for the specific selector/state before reading/clicking.
+- If a navigation is blocked by the user's `/domains` policy (the error says so), call `nav_policy()` to see allowed/denied sites and plan within them; pass a URL (`nav_policy("example.com")`) to check before navigating. If the task can't be done within the policy, tell the user which site is blocked and suggest `/domains` or adjusting the task — don't keep retrying the blocked host.
+- Keyboard semantics: `press_key(...)` simulates physical keys/shortcuts; `type_text(...)` inserts/pastes text into the focused element via `Input.insertText`. Do not combine `Input.dispatchKeyEvent` carrying printable `text` with a manual `char` event for the same character; that double-inserts in Chrome.
+- For React/Vue/Svelte/controlled inputs, prefer `fill_input(selector, text, timeout=...)` over direct DOM value assignment. It focuses, clears with Cmd/Ctrl+A plus Backspace, types through physical key events, then fires final `input`/`change` events. Use stable selectors from labels, ids, names, placeholders, or visible DOM inspection; avoid brittle positional selectors like `input:nth-of-type(2)` unless you just verified that exact selector on the current page.
 - If the task is site-specific, call `domain_skills_for_url(url, include_content=True)` before inventing selectors, private API routes, or flows. `goto_url(url)` also returns matching `domain_skills` metadata when a skill root is available.
-- Be patient with loading pages by making several cheap observations, not one long blind wait. Prefer short waits such as `wait_for_load(1)`, `wait_for_element(selector, timeout=2)`, or `wait_for_network_idle(2)`, then inspect again. If a wait returns false, that is not a task failure; inspect the current page and continue from the best available state or decide whether it is stuck.
-- Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, dialogs, uploads, downloads, and final verification. For screenshot or visual-output tasks, verify the saved image is contentful and nonblank before `done`.
-- The common screenshot call is `screenshot(label)`, for example `screenshot("before_submit")`.
-- Screenshot/image artifacts are sent as `input_image` content to the next model turn. The user does not see those pixels inline in the terminal; describe what you see or provide the saved artifact path when the user asks for the screenshot.
-- If a script emits screenshots/images and then fails, the next model turn still receives the images alongside the failure diagnosis. Use those pixels to decide the next smaller retry.
-- If a running script emits screenshots/images before it finishes, `observe` returns those images as soon as they are available. Use the pixels to guide the next observe/retry.
-- Use `emit_output(value, label="...")` for structured observations that the next model turn may need, such as `page_info()`, extracted rows, selected DOM state, or API responses. The full value stays model-visible.
-- When a script emits labeled structured output, add a `# browser_summary:` JSON comment block at the top of the script that maps each emitted label to the compact transcript summary. Write the code/labels first mentally, then place or update this block before submitting the tool call; the runtime parses the whole script before execution.
-- Summary values may be literals, JSONPath-like selectors such as `$.url`, or template strings such as `Read ${$.length} employee rows`. Missing summary specs fall back to a generic `Recorded <label>` summary while preserving the full output.
+- Be patient with loading pages: make several cheap observations, not one long blind wait. Prefer short waits like `wait_for_load(1)`, `wait_for_element(selector, timeout=2)`, or `wait_for_network_idle(2)`, then inspect again. A wait returning false is not a task failure; inspect the current page and continue from the best available state or decide whether it is stuck.
+- Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, dialogs, uploads, downloads, and final verification. The common call is `screenshot(label)`, e.g. `screenshot("before_submit")`. For screenshot/visual-output tasks, verify the saved image is contentful and nonblank before `done`.
+- Screenshot/image artifacts are sent as `input_image` content to the next model turn (the user does not see those pixels inline in the terminal; describe what you see or give the saved artifact path when asked). They are delivered even when the script then fails, and `observe` returns them as soon as they are available on a running script — use the pixels to guide the next smaller retry.
+- Use `emit_output(value, label="...")` for structured observations the next turn may need (e.g. `page_info()`, extracted rows, selected DOM state, API responses). The full value stays model-visible.
+- When a script emits labeled structured output, add a `# browser_summary:` JSON comment block at the top mapping each emitted label to its compact transcript summary; the runtime parses the whole script before execution. Summary values may be literals, JSONPath-like selectors such as `$.url`, or template strings such as `Read ${$.length} employee rows`. Missing summary specs fall back to a generic `Recorded <label>` summary while preserving the full output.
 - Prefer this pattern over printing page or extraction objects:
 
 ```python
@@ -111,21 +101,16 @@ rows = [{"name": "Ada"}, {"name": "Grace"}]
 emit_output(rows, label="employee_rows")
 ```
 
-- Keep `print(...)` for short debug/status text only. Do not print large page, DOM, network, or extraction objects when `emit_output(...)` can carry the full value.
-- Prefer coordinate clicks for visible UI: screenshot, inspect pixels, `click_at_xy(x, y)`, wait, screenshot again.
-- Use `js(...)` for DOM inspection and raw `cdp(...)` for lower-level browser actions.
-- Use `js(function_source, *args)` when passing JSON-serializable Python values into JavaScript; use `target_id=` as a keyword for iframe targets.
+- Keep `print(...)` for short debug/status text only; do not print large page, DOM, network, or extraction objects when `emit_output(...)` can carry the full value.
+- Prefer coordinate clicks for visible UI: screenshot, inspect pixels, `click_at_xy(x, y)`, wait, screenshot again. Use `js(...)` for DOM inspection and raw `cdp(...)` for lower-level actions; pass JSON-serializable Python values into JavaScript with `js(function_source, *args)`, and use `target_id=` for iframe targets.
 - For real user forms, act like a browser user: screenshot, click the visible field/control, type with `type_text(...)`, `press_key(...)`, or `fill_input(...)`, then screenshot or otherwise verify. Use coordinate clicks for checkboxes, radios, buttons, dropdowns, and custom controls. Do not assign `element.value`, `element.checked`, `selectedIndex`, React private state, or MutationObserver restore loops on live forms. Do not synthesize `input`, `change`, `click`, or keyboard events in page JavaScript to make a form look filled. Those anti-patterns can desynchronize framework state from the visible DOM.
-- Use `http_get(...)` for one static page/API URL after the browser reveals a stable endpoint. Use `browser_fetch(...)` when the page's cookies, auth headers, or browser session are needed. Returned bodies are strings by default, bytes with `binary=True`, and expose `.status_code`, `.headers`, `.url`, `.text`, `.content`, and `.json()` for convenience. If direct HTTP hits bot or login protection, retry with `browser_fetch(...)`, site-specific headers/cookies, or the configured Browser Use fetch proxy. Do not replace source completion with blind bulk fetching; use small inspected chunks with progress, counts, missing fields, and source coverage.
+- Use `http_get(...)` for one static page/API URL after the browser reveals a stable endpoint. Use `browser_fetch(...)` when the page's cookies, auth headers, or browser session are needed. Returned bodies are strings by default, bytes with `binary=True`, and expose `.status_code`, `.headers`, `.url`, `.text`, `.content`, and `.json()`. If direct HTTP hits bot or login protection, retry with `browser_fetch(...)`, site-specific headers/cookies, or the configured Browser Use fetch proxy. Do not replace source completion with blind bulk fetching; use small inspected chunks with progress, counts, missing fields, and source coverage.
 
 - Extract only fields needed for the task. Do not emit full profile text, full DOM text, cookies, localStorage, or entire app caches unless you are debugging and the smaller field-level extraction failed.
-- Save complete generated result files under `outputs_dir()` or relative paths in the current working directory. Files written there are collected as artifacts automatically; `copy_artifact(...)` is for files created elsewhere.
-- For large structured results, write the full JSON/CSV/text to a file. If the task asks for an exact inline final format, return that content with `done(result=...)` and optionally include `result_file=path`; otherwise finish with `done(result_file=path)`.
-- For loops over multiple pages/items, emit short progress every item or every 2 seconds, whichever comes first. Progress can be a short `print(...)` line or compact `emit_output(..., label="progress")`.
+- Save complete generated result files under `outputs_dir()` or relative paths in the cwd — files written there are collected as artifacts automatically (`copy_artifact(...)` is for files created elsewhere). Write large structured results to a file: if the task asks for an exact inline final format, return that content with `done(result=...)` and optionally `result_file=path`; otherwise finish with `done(result_file=path)`.
+- For loops over multiple pages/items, emit short progress every item or every 2 seconds, whichever comes first (a short `print(...)` line or compact `emit_output(..., label="progress")`). Prefer bounded chunks with per-item micro timeouts and checkpoints written to files; inspect progress after each chunk, and if a chunk fails with a usable-page diagnosis, shrink the next chunk and resume from the last checkpoint.
 - For audits after a large result, run a small independent sample/count/schema check, then repair the specific gaps it finds until the required rows/fields are complete or the run is nearly out of turns. Do not rerun the whole crawl or full detail scrape just because counts fluctuate or some pages are intermittently empty; target the missing items, and mark a gap as a genuine absence only after checking its correct source path.
-- For list/profile extraction, filter the candidate list before navigating when the list page already contains enough information, such as employee versus contractor. Do not visit rows that cannot affect the final answer.
-- Poll until the record itself is ready before extracting fields. If a loaded record is missing a required field, inspect the correct source path before marking it absent; do not record required values as missing just because the first record view is null.
-- For long extraction or verification loops, prefer bounded chunks with checkpoints written to files. Use per-item micro timeouts and inspect progress after each chunk. If a chunk fails with a usable-page diagnosis, shrink the next chunk and resume from the last checkpoint.
+- For list/profile extraction, filter the candidate list before navigating when the list page already has enough information (e.g. employee versus contractor); do not visit rows that cannot affect the final answer. Poll until the record itself is ready before extracting fields; if a loaded record is missing a required field, inspect the correct source path before marking it absent — do not record required values as missing just because the first record view is null.
 
 Signing in / sign-ups: before signing up with a new email, check whether you're already logged in (you often drive the user's own profile) or have a saved credential for the site (listed under "Saved credentials") — if so, use it. If there's no existing login, ask the user whether to sign in with their own account (they save it via `/secrets`) or have you create a disposable account (you generate a throwaway inbox with `email_address()` and read its verification emails yourself), and wait for their choice. For the disposable path, call `email_address()`, record whatever context you need before submitting (`current_datetime()["utc"]`, existing `message_id`s from `email_inbox()`, or both), fill the email field, submit, then inspect/poll `email_inbox(sent_after=...)` or compare `timestamp`/`message_id` yourself (newest-first; `preview` already holds the code; `email_message(message_id)` has the full `text`/`html` for magic links).
 
diff --git a/prompts/browser-tool-description.md b/prompts/browser-tool-description.md
index 3d24b0d4..e6ef0ae1 100644
--- a/prompts/browser-tool-description.md
+++ b/prompts/browser-tool-description.md
@@ -2,98 +2,53 @@ Browser runtime control tool.
 
 This tool is the browser control plane. It manages which browser is connected, who owns it, how CDP is attached, what recovery is safe, and what the current runtime knows. It does not click, type, scrape, screenshot, run page JavaScript, or inspect pixels. Use `browser_script` for page interaction.
 
-The input is a single CLI-like command string. You may include the leading word `browser`, but it is optional:
-
-```text
-browser status --json
-browser preference --json
-browser preference use local
-browser profile suggest --domain example.com --json
-browser profile remember --domain example.com --profile google-chrome:Profile 2
-browser domain skills --domain example.com --json
-browser connect
-browser connect local
-browser local list --json
-browser local open --profile google-chrome:Profile 2
-browser local setup
-browser local setup --profile google-chrome:Profile 2
-browser connect managed --headed
-browser remote profiles --json
-browser remote start --profile-id <cloud-profile-id>
-browser recover reconnect-websocket
-browser script runs --json
-browser script cancel <run_id>
-```
+The input is a single CLI-like command string. The leading word `browser` is optional. See the full command reference under "Commands:" below.
 
 Mental model:
 
-- `browser` owns runtime/control/debug.
-- `browser_script` owns page interaction/data extraction.
-- Rust holds the CDP websocket, current target id, current session id, ownership, and connection generation.
-- Python in `browser_script` is fresh per call; Python variables do not persist.
+- `browser` owns runtime/control/debug; `browser_script` owns page interaction/data extraction.
+- Rust holds the CDP websocket, current target id, current session id, ownership, and connection generation. Python in `browser_script` is fresh per call; variables do not persist.
 - Nothing reloads, relaunches, closes, or switches tabs silently. If IDs may change, this tool reports that and you choose the next action.
-- `browser status --json` may include `last_issue`, a compact diagnosis from the most recent browser/browser_script failure. Use its `next_step`, `browser_usable`, and `page_usable` fields before deciding to reconnect.
-- `browser status --json` also lists active `browser_script` runs. Use the `browser_script` tool with `action="observe"` to listen to them; use `browser script cancel <run_id>` only for cleanup or explicit cancellation.
+- `browser status --json` may include `last_issue`, a compact diagnosis from the most recent failure; check its `next_step`, `browser_usable`, and `page_usable` before deciding to reconnect. It also lists active `browser_script` runs — use `action="observe"` to listen to them; use `browser script cancel <run_id>` only for cleanup or explicit cancellation.
 
 Preferences:
 
-- `browser preference --json` shows the remembered browser mode/profile preferences.
-- `browser preference use local|cloud|managed-headless|managed-headed` changes what plain `browser connect` means.
-- `browser profile suggest --domain <regex> --json` lists remembered and local profile options for a site in Local Chrome mode, and cloud profiles whose cookie domains match the regex in Browser Use Cloud mode.
-- `browser profile remember --domain <domain> --profile <profile-id> [--mode local|cloud]` stores the profile to use next time for that domain.
-- `browser domain skills --domain <domain> --json` lists matching browser-harness domain skill files. Use `--include-content` when you need to read the playbook before navigation.
-- If a site likely needs login and no profile is remembered, run `browser profile suggest --domain <regex> --json` before connecting. In cloud mode, choose a profile whose matching cookie domains fit the target login domain, run `browser profile remember --mode cloud --profile <profile-id>`, then `browser connect`. Do not guess friendly cloud profile names like `Work`.
+- `preference use local|cloud|managed-headless|managed-headed` changes what plain `browser connect` means.
+- `profile suggest --domain <regex>` lists remembered/local profiles (Local Chrome mode) and cloud profiles whose cookie domains match the regex (Cloud mode). If a site likely needs login and no profile is remembered, run it before connecting; in cloud mode pick a profile whose cookie domains fit the login domain, `profile remember --mode cloud --profile <profile-id>`, then `browser connect`. Do not guess friendly cloud profile names like `Work`.
 - Do not silently attach to a different local profile when a profile is remembered.
+- `domain skills --domain <domain>` lists matching browser-harness domain skill files; use `--include-content` to read the playbook before navigation.
 - Tool commands returned in `next_step` are internal actions for you to run. Never tell the user to run `browser ...` commands manually.
 
 Local real browser:
 
-- `browser connect local` checks for a local Chromium-family browser exposing CDP and attaches only after the user enables remote debugging.
+- `browser connect local` attaches to a local Chromium-family browser exposing CDP, only after the user enables remote debugging.
 - Do not guess a browser family flag. The tool auto-detects Chrome, Chrome Canary, Chromium, Edge, Brave, Arc, Dia, Comet, and common forks through DevToolsActivePort.
-- If one candidate exists, it connects. If multiple candidates exist, ask the user which candidate to use, then run `browser connect local --candidate <id>`.
-- If Chrome blocks the connection with permission evidence such as 403 and `remote_debugging_enabled` is true, the checkbox is already enabled. Do not open the checkbox page. If the popup is not visible and `profile_recovery_command` is present, run it to open/focus the saved profile window, then ask the user to click Allow in Chrome's permission popup.
-- If the tool reports `state: "cdp-disabled"`, Chrome is open but not exposing CDP because the remote debugging checkbox is off. Call `browser local setup`; tell the user to enable the checkbox in Chrome, then reconnect.
-- If the port is closed or `DevToolsActivePort` is stale, Chrome is not exposing CDP right now. Do not tell the user remote debugging is disabled. If `profile_recovery_command` is present, run it to open the saved profile window, then retry `browser connect local`. Otherwise ask which local profile/browser to use.
+- One candidate connects automatically; with multiple, ask the user which, then `browser connect local --candidate <id>`.
+- If Chrome blocks with permission evidence such as 403 and `remote_debugging_enabled` is true, the checkbox is already enabled. Do not open the checkbox page. If the popup is not visible and `profile_recovery_command` is present, run it to open/focus the saved profile window, then ask the user to click Allow.
+- If `state: "cdp-disabled"`, Chrome is open but the remote debugging checkbox is off. Call `browser local setup`, tell the user to enable the checkbox, then reconnect.
+- If the port is closed or `DevToolsActivePort` is stale, Chrome is not exposing CDP. Do not tell the user remote debugging is disabled. If `profile_recovery_command` is present, run it then retry `browser connect local`; otherwise ask which local profile/browser to use.
 - Do not launch the user's real default Chrome profile with remote-debugging flags. Real logged-in profiles are attached while already open.
 
 Local profiles:
 
-- `browser local profiles --json` is built into Rust. It scans Chromium-family profile folders on disk and does not require any external CLI.
-- Use local profile listing when the user asks which local browser profiles exist or which profile likely contains a login.
-- Profiles have stable ids like `google-chrome:Default`; use that id for inspection when possible.
-- If a profile id or name contains spaces, quote it like `browser local profiles inspect 'google-chrome:Profile 2' --domains-only`.
-- `browser local profiles inspect <profile-id-or-name> --domains-only` copies the selected profile into a temporary browser profile, starts that temporary copy with CDP, and returns only cookie domain/count/expiry metadata.
-- Raw cookie values are never returned by default. Profile inspection is for choosing the right profile, not for dumping secrets.
+- `local profiles --json` (built into Rust, no external CLI) scans Chromium-family profile folders on disk. Use it when the user asks which local profiles exist or which likely contains a login. Profiles have stable ids like `google-chrome:Default`; quote ids/names with spaces, e.g. `local profiles inspect 'google-chrome:Profile 2' --domains-only`.
+- `local profiles inspect <id-or-name> --domains-only` copies the profile into a temp profile, starts it with CDP, and returns only cookie domain/count/expiry metadata. Raw cookie values are never returned by default; inspection is for choosing the right profile, not dumping secrets.
 
 Managed browser:
 
-- `browser connect managed` starts a Rust-owned browser with a temp profile by default.
-- Use `--headless` or `--headed`; default is headless.
-- Use `--profile <path>` only for an explicit non-default automation profile.
-- Rust may stop/restart this browser because Rust owns it. It is not the user's real logged-in Chrome.
+- `browser connect managed` starts a Rust-owned browser with a temp profile by default. `--headless`/`--headed` (default headless); `--profile <path>` only for an explicit non-default automation profile. Rust owns it and may stop/restart it; it is not the user's real logged-in Chrome.
 
 Remote browsers:
 
-- `browser connect remote-cdp --url <http-url>` attaches to an external DevTools HTTP endpoint.
-- `browser connect remote-cdp --ws <ws-url>` attaches to an external CDP websocket.
-- `browser remote start ...` creates a Browser Use cloud browser and connects to it. Remote start means start and connect; do not copy the returned CDP URL into another command.
-- For login-sensitive cloud work, prefer `browser connect` after storing a cloud profile preference, or pass `--profile-id <uuid>` explicitly. If `--profile-name` fails, do not continue in a clean cloud browser; list profiles with `browser remote profiles --json` and choose by ID/cookie domains.
-- `browser remote stop` only stops a Browser Use cloud browser created by this runtime.
-- `browser remote profiles --json` lists cloud profiles without raw cookie values.
-
-Doctor:
-
-- `browser doctor` and `browser doctor --json` are read-only.
-- Doctor checks runtime state, local browser candidates, Rust local profile discovery, API key, CDP websocket health, current target health, and safe next steps.
-- Doctor never fixes state by itself. If a fix is available it prints an explicit command.
+- `browser connect remote-cdp --url <http-url>` or `--ws <ws-url>` attaches to an external DevTools HTTP endpoint or CDP websocket.
+- `browser remote start ...` creates a Browser Use cloud browser and connects to it (start and connect; do not copy the returned CDP URL into another command).
+- For login-sensitive cloud work, prefer `browser connect` after storing a cloud profile preference, or pass `--profile-id <uuid>` explicitly. If `--profile-name` fails, do not continue in a clean cloud browser; list with `remote profiles --json` and choose by ID/cookie domains.
+- `remote stop` only stops a Browser Use cloud browser created by this runtime. `remote profiles --json` lists cloud profiles without raw cookie values.
 
-Recovery:
+Doctor and recovery:
 
-- `browser recover reconnect-websocket`: reconnects the CDP websocket to the same endpoint. It never reloads the page.
-- `browser recover reattach-same-target`: attaches a fresh CDP session to the same target id. If the target is gone, it reports available targets and does not silently switch.
-- `browser recover restart-runtime`: resets the Rust connection holder and reconnects to the same endpoint. It does not kill Chrome.
-- `browser recover restart-owned-browser`: restarts only Rust-owned managed browsers.
-- `browser recover stop-owned-remote`: stops only Rust-owned Browser Use cloud browsers.
+- `browser doctor [--json]` is read-only: it checks runtime state, local candidates, profile discovery, API key, websocket/target health, and safe next steps, but never fixes state itself — if a fix is available it prints an explicit command.
+- `recover reconnect-websocket` reconnects the CDP websocket to the same endpoint (never reloads the page). `recover reattach-same-target` attaches a fresh session to the same target id (reports available targets, never silently switches, if it is gone). `recover restart-runtime` resets the Rust connection holder and reconnects to the same endpoint (does not kill Chrome). `recover restart-owned-browser` restarts only Rust-owned managed browsers; `recover stop-owned-remote` stops only Rust-owned cloud browsers.
 
 Commands:
 
diff --git a/prompts/python-tool-description.md b/prompts/python-tool-description.md
index 14991a3f..fe10ee30 100644
--- a/prompts/python-tool-description.md
+++ b/prompts/python-tool-description.md
@@ -8,7 +8,7 @@ CDP is the source of truth. Use raw CDP for basic browser control: `cdp("Page.na
 
 Use `js(function_source, *args)` when passing JSON-serializable Python values into JavaScript; use `target_id=` as a keyword for iframe targets.
 
-Do not import Playwright, Selenium, or Pyppeteer. Browser-harness workflow: first navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url` mutates the active tab. `new_tab(url)` and `goto_url(url)` have zero implicit wait: they send the CDP navigation command and then return without waiting for readyState, network idle, selectors, paint, or sleeps. If you chain more work in the same script after navigation, explicitly wait or poll before reading/clicking. If navigation is the last action before yielding to the model, the LLM call itself may provide enough elapsed time; the next call must still inspect state before assuming the page loaded. If the task is site-specific and `domain_skills_for_url(url, include_content=True)` returns files, read those domain skills before inventing selectors, private API routes, or flows. Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, menus, dialogs, downloads, uploads, form submissions, and final verification. Prefer coordinate clicks for visible targets: `capture_screenshot` or `screenshot`, inspect the pixels, `click_at_xy(x, y)`, then screenshot again. Prefer capturing the action timeline inside one Python call when possible. Do not call `screenshot` repeatedly on an unchanged viewport; after a screenshot, act, inspect with CDP/JS, navigate, scroll, call `screenshot_clip(...)` for a different CSS-pixel region, wait briefly for an async transition, inspect again, or finish. Chrome hit-testing handles iframes, shadow DOM, and cross-origin content. Use `js(...)` and raw `cdp(...)` when coordinates are the wrong tool or helpers are incomplete. `js(...)` returns Python values, so use Python slicing/methods after the call; only use JavaScript methods inside the JavaScript expression.
+Do not import Playwright, Selenium, or Pyppeteer. Browser-harness workflow: first navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url` mutates the active tab. `new_tab(url)` and `goto_url(url)` have zero implicit wait: they send the CDP navigation command and return without waiting for readyState, network idle, selectors, paint, or sleeps. If you chain more work in the same script after navigation, explicitly wait or poll before reading/clicking; if navigation is the last action before yielding, the next call must still inspect state before assuming the page loaded. If the task is site-specific and `domain_skills_for_url(url, include_content=True)` returns files, read those domain skills before inventing selectors, private API routes, or flows. Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, menus, dialogs, downloads, uploads, form submissions, and final verification. Prefer coordinate clicks for visible targets: `screenshot`, inspect the pixels, `click_at_xy(x, y)`, then screenshot again. Do not call `screenshot` repeatedly on an unchanged viewport; after a screenshot, act, inspect with CDP/JS, navigate, scroll, call `screenshot_clip(...)` for a different region, wait briefly, inspect again, or finish. Chrome hit-testing handles iframes, shadow DOM, and cross-origin content; use `js(...)` and raw `cdp(...)` when coordinates are the wrong tool or helpers are incomplete. `js(...)` returns Python values, so use Python slicing/methods after the call; only use JavaScript methods inside the JavaScript expression.
 
 To pass pixels directly to the next model turn, call raw `cdp("Page.captureScreenshot", format="png")`, `screenshot(label)`, `screenshot_clip(label, x, y, width, height)`, or `capture_screenshot(..., attach=True)`; use `emit_image(path)` for existing image files. Raw `Page.captureScreenshot` results are attached automatically. The user does not see attached pixels inline in the terminal; describe what you see or provide the saved artifact path when the user asks for a screenshot. Multiple labeled screenshots are good when they form a temporal trace, not when they repeat the same unchanged page.
 

From 1ffed02fed839282665e15d95680641e598e107f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?=
 <36313686+gregpr07@users.noreply.github.com>
Date: Fri, 12 Jun 2026 00:27:35 +0000
Subject: [PATCH 2/4] Cut browser_script poll churn: longer start-wait +
 observe hint

Raise the script start initial-wait 15s->30s so the common scrape script
finishes in one tool call (no separate observe model-turns), and raise the
next_observe HINT 1s->15s to nudge long-polling over 1s 'still running?' peeks.
Observe floor stays at 1s (agency preserved); deliberately avoids the reverted
'observe30' forced-window regression. Each observe/status poll is a full model
call replaying 20-70k tokens, so this was the largest no-architecture cost lever.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 crates/browser-use-browser/src/lib.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/crates/browser-use-browser/src/lib.rs b/crates/browser-use-browser/src/lib.rs
index 24c6fb13..b5652a32 100644
--- a/crates/browser-use-browser/src/lib.rs
+++ b/crates/browser-use-browser/src/lib.rs
@@ -28,8 +28,20 @@ use tungstenite::{connect, Message, WebSocket};
 const BU_API: &str = "https://api.browser-use.com/api/v3";
 const LOG_LIMIT: usize = 250;
 const SCRIPT_MAX_OUTPUT_CHARS: usize = 120_000;
-const BROWSER_SCRIPT_DEFAULT_INITIAL_WAIT_MS: u64 = 15_000;
-const BROWSER_SCRIPT_DEFAULT_OBSERVE_MS: u64 = 1_000;
+// Cost optimization (eval-everything): a script that finishes within the start
+// call returns its result in ONE tool call — no separate `observe` model turns.
+// Raised 15s->30s so the common scrape script (which finishes well under 30s)
+// no longer forces a poll round-trip. This is a single, non-stacking block that
+// still hands control back at 30s, so a stuck script can be cancelled/finalized
+// (unlike the reverted "observe30", which STACKED 30s observe blocks and starved
+// the run timebox — see DEFAULT_OBSERVE_TIMEOUT_MS doc in browser.rs).
+const BROWSER_SCRIPT_DEFAULT_INITIAL_WAIT_MS: u64 = 30_000;
+// The `next_observe_ms` HINT surfaced to the model ("call observe with
+// observe_timeout_ms=N"). Raised 1s->15s to nudge the model to long-poll instead
+// of issuing 1s "still running?" peeks (the dominant observe-churn cost). This is
+// only a hint — the observe floor stays at 1s, so the model keeps full agency to
+// bail early; we stay under the 30s window that previously regressed.
+const BROWSER_SCRIPT_DEFAULT_OBSERVE_MS: u64 = 15_000;
 const BROWSER_SCRIPT_HELPERS: &str = include_str!("browser_script_helpers.py");
 const BROWSER_CONNECT_LOCAL_HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(120);
 const BROWSER_CONNECT_ATTACH_DEADLINE: Duration = Duration::from_secs(8);

From b206eefeb1ffb0d298f4ff475a6a2734fa5af201 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?=
 <36313686+gregpr07@users.noreply.github.com>
Date: Fri, 12 Jun 2026 00:27:35 +0000
Subject: [PATCH 3/4] Always route http_get through the un-blockable fetch
 proxy

Vendor fetch-use's client inline so http_get uses the Browser-Use Fetch proxy
(Chrome TLS fingerprint + rotating proxy IPs) whenever BROWSER_USE_API_KEY is
set, even when the fetch_use package isn't installed in the sandbox. Falls back
to direct urllib on any proxy failure. Fixes blocked-by-bot-protection tasks
that previously returned null/partial results from native urllib.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .../src/browser_script_helpers.py             | 82 +++++++++++++++++--
 1 file changed, 77 insertions(+), 5 deletions(-)

diff --git a/crates/browser-use-browser/src/browser_script_helpers.py b/crates/browser-use-browser/src/browser_script_helpers.py
index c0ee7c37..5d3d1c8b 100644
--- a/crates/browser-use-browser/src/browser_script_helpers.py
+++ b/crates/browser-use-browser/src/browser_script_helpers.py
@@ -1694,16 +1694,86 @@ def json(self):
         raise ValueError(f"request failed for {self.url}: {self.error}")
 
 
+class _ProxyFetchResponse:
+    """Response shim mirroring fetch-use's FetchResponse attribute surface."""
+
+    __slots__ = ("status_code", "status", "headers", "url", "text", "content")
+
+    def __init__(self, status_code, headers, url, body, body_b64, is_binary):
+        self.status_code = status_code
+        self.status = status_code
+        self.headers = headers or {}
+        self.url = url
+        self.text = body or ""
+        if is_binary and body_b64:
+            self.content = base64.b64decode(body_b64)
+        else:
+            self.content = (body or "").encode("utf-8", errors="replace")
+
+
+def _fetch_use_sync(url, headers=None, timeout_ms=30000, method="GET", body=None):
+    """Vendored minimal Browser-Use Fetch client (mirrors the `fetch-use` pkg).
+
+    POSTs through fetch.browser-use.com so requests carry Chrome TLS
+    fingerprinting + rotating proxy IPs — the same un-blockable path browser-use
+    uses — instead of a bare urllib request that bot-protection blocks. Vendored
+    so it works even when the `fetch_use` package isn't installed in the sandbox.
+    """
+    import uuid as _uuid
+
+    api_key = os.environ.get("BROWSER_USE_API_KEY", "")
+    if not api_key:
+        raise RuntimeError("BROWSER_USE_API_KEY not set")
+    service = (os.environ.get("FETCH_USE_URL") or "https://fetch.browser-use.com").rstrip("/")
+    session_id = (os.environ.get("SESSION_ID") or str(_uuid.uuid4()))[:36]
+    payload = {
+        "url": url,
+        "method": str(method or "GET").upper(),
+        "timeout_ms": min(int(timeout_ms), 120000),
+        "follow_redirects": True,
+        "max_redirects": 10,
+        "proxy_country": os.environ.get("FETCH_USE_PROXY_COUNTRY", "US"),
+        "session_id": session_id,
+    }
+    if headers:
+        payload["headers"] = dict(headers)
+    if body is not None:
+        payload["body"] = body
+    req_headers = {"Content-Type": "application/json", "X-Browser-Use-API-Key": api_key}
+    token = os.environ.get("FETCH_USE_AUTH_TOKEN")
+    if token:
+        req_headers["X-Fetch-Token"] = token
+    data = json.dumps(payload).encode("utf-8")
+    request = urllib.request.Request(service + "/fetch", data=data, headers=req_headers, method="POST")
+    with urllib.request.urlopen(request, timeout=(int(timeout_ms) / 1000) + 10) as resp:
+        result = json.loads(resp.read().decode("utf-8"))
+    if result.get("error"):
+        raise RuntimeError(f"fetch proxy error: {result['error']}")
+    return _ProxyFetchResponse(
+        result.get("status_code", 0),
+        result.get("headers", {}),
+        result.get("final_url", url),
+        result.get("body", ""),
+        result.get("body_base64", ""),
+        result.get("is_binary", False),
+    )
+
+
 def http_get(url, headers=None, timeout=20.0, binary=None):
     """Pure HTTP fetch for static pages and APIs.
 
-    When BROWSER_USE_API_KEY is set and fetch_use is installed, route through
-    fetch-use like browser-harness. Otherwise fall back to local urllib with a
-    browser-like UA and gzip handling. Pass binary=True for bytes.
+    When BROWSER_USE_API_KEY is set, route through the Browser-Use Fetch proxy
+    (Chrome TLS fingerprint + rotating IPs) so bot-protected sites don't block us
+    — preferring the installed `fetch_use` package, else the vendored client
+    above. Falls back to local urllib (browser-like UA + gzip) on any proxy
+    failure or when no API key is set. Pass binary=True for bytes.
     """
     if os.environ.get("BROWSER_USE_API_KEY"):
         try:
-            from fetch_use import fetch_sync
+            try:
+                from fetch_use import fetch_sync
+            except ImportError:
+                fetch_sync = _fetch_use_sync
 
             response = fetch_sync(url, headers=headers, timeout_ms=int(float(timeout) * 1000))
             status_code = getattr(response, "status_code", getattr(response, "status", None))
@@ -1726,7 +1796,9 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
                 response_headers,
                 response_url,
             )
-        except ImportError:
+        except Exception:
+            # Proxy unavailable / network error / missing key — fall back to a
+            # direct urllib request below rather than failing the fetch outright.
             pass
     request_headers = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip"}
     if headers:

From a55c03bad8794b242d4f2550ff76fcc2ecf9957e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Gregor=20=C5=BDuni=C4=8D?=
 <36313686+gregpr07@users.noreply.github.com>
Date: Fri, 12 Jun 2026 00:55:41 +0000
Subject: [PATCH 4/4] Address review: private-host proxy bypass, error
 surfacing, vendored-path tests

- http_get never sends loopback/private/link-local/intranet hosts to the fetch
  proxy (URL+header leak, wrong-target fetch); new use_proxy=None/True/False
  override, default auto = public hosts only.
- Proxy failures are no longer swallowed: stderr note on fallback, and both
  errors surfaced when the direct request also fails (a bot-blocked direct
  response can't masquerade as proxy success).
- New test covers the vendored client against a local fake FETCH_USE_URL with
  fetch_use absent: proxy routing, private-host bypass (proxy never called),
  forced use_proxy=True, and proxy-failure fallback with dual-error message.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 .../src/browser_script_helpers.py             |  66 ++++++++--
 crates/browser-use-browser/src/lib.rs         | 121 ++++++++++++++++++
 2 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/crates/browser-use-browser/src/browser_script_helpers.py b/crates/browser-use-browser/src/browser_script_helpers.py
index 5d3d1c8b..c9b59ccf 100644
--- a/crates/browser-use-browser/src/browser_script_helpers.py
+++ b/crates/browser-use-browser/src/browser_script_helpers.py
@@ -7,6 +7,7 @@
 
 import base64
 import gzip
+import ipaddress
 import json
 import math
 import os
@@ -1694,6 +1695,25 @@ def json(self):
         raise ValueError(f"request failed for {self.url}: {self.error}")
 
 
+def _is_private_or_local_host(host):
+    """True for hosts the fetch proxy must never see: loopback, RFC1918/link-local
+    ranges, .local/.internal-style suffixes, and dotless intranet shortnames.
+    Routing these through the remote proxy would leak the URL/headers off-box and
+    fetch the WRONG target (the proxy's localhost, not the caller's)."""
+    host = str(host or "").strip().lower().rstrip(".").strip("[]")
+    if not host:
+        return True
+    if host == "localhost" or host.endswith(".localhost"):
+        return True
+    if host.endswith((".local", ".internal", ".lan", ".intranet", ".corp", ".home.arpa")):
+        return True
+    try:
+        ip = ipaddress.ip_address(host)
+    except ValueError:
+        return "." not in host
+    return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_unspecified
+
+
 class _ProxyFetchResponse:
     """Response shim mirroring fetch-use's FetchResponse attribute surface."""
 
@@ -1759,16 +1779,25 @@ def _fetch_use_sync(url, headers=None, timeout_ms=30000, method="GET", body=None
     )
 
 
-def http_get(url, headers=None, timeout=20.0, binary=None):
+def http_get(url, headers=None, timeout=20.0, binary=None, use_proxy=None):
     """Pure HTTP fetch for static pages and APIs.
 
-    When BROWSER_USE_API_KEY is set, route through the Browser-Use Fetch proxy
-    (Chrome TLS fingerprint + rotating IPs) so bot-protected sites don't block us
-    — preferring the installed `fetch_use` package, else the vendored client
-    above. Falls back to local urllib (browser-like UA + gzip) on any proxy
-    failure or when no API key is set. Pass binary=True for bytes.
+    Public URLs route through the Browser-Use Fetch proxy (Chrome TLS
+    fingerprint + rotating IPs) when BROWSER_USE_API_KEY is set, so
+    bot-protected sites don't block us — preferring the installed `fetch_use`
+    package, else the vendored client above. Loopback/private/intranet hosts
+    are ALWAYS fetched directly (never sent to the proxy). On proxy failure the
+    request falls back to direct urllib and the proxy error is surfaced.
+    Pass binary=True for bytes. use_proxy: None=auto (public hosts only),
+    True=force the proxy, False=force direct.
     """
-    if os.environ.get("BROWSER_USE_API_KEY"):
+    proxy_error = None
+    want_proxy = (
+        use_proxy
+        if use_proxy is not None
+        else not _is_private_or_local_host(urlparse(url).hostname)
+    )
+    if want_proxy and os.environ.get("BROWSER_USE_API_KEY"):
         try:
             try:
                 from fetch_use import fetch_sync
@@ -1796,10 +1825,16 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
                 response_headers,
                 response_url,
             )
-        except Exception:
-            # Proxy unavailable / network error / missing key — fall back to a
-            # direct urllib request below rather than failing the fetch outright.
-            pass
+        except Exception as exc:
+            # Proxy unavailable / auth / schema / network error — fall back to a
+            # direct urllib request below, but keep the proxy error visible so a
+            # bot-blocked direct response isn't mistaken for proxy success.
+            proxy_error = exc
+            print(
+                f"http_get: fetch proxy failed ({exc}); retrying direct",
+                file=sys.stderr,
+                flush=True,
+            )
     request_headers = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip"}
     if headers:
         request_headers.update(headers)
@@ -1823,11 +1858,16 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
             f"{exc.code} for {url}. If this is bot/login protection, retry from the browser with js(fetch(...)), "
             "pass site-specific headers/cookies, or configure the Browser Use fetch proxy with BROWSER_USE_API_KEY."
         )
+        if proxy_error is not None:
+            guidance += f" (fetch proxy also failed: {proxy_error})"
         raise RuntimeError(guidance) from exc
     except (urllib.error.URLError, TimeoutError, OSError) as exc:
-        raise RuntimeError(
+        message = (
             f"http_get failed for {url}: {exc}. Try a shorter timeout, browser js(fetch(...)), or a configured proxy if the site blocks direct HTTP."
-        ) from exc
+        )
+        if proxy_error is not None:
+            message += f" (fetch proxy also failed: {proxy_error})"
+        raise RuntimeError(message) from exc
 
 
 def http_get_many(urls, headers=None, timeout=20.0, binary=None, max_workers=8, return_errors=True):
diff --git a/crates/browser-use-browser/src/lib.rs b/crates/browser-use-browser/src/lib.rs
index 26ece3f4..309c860d 100644
--- a/crates/browser-use-browser/src/lib.rs
+++ b/crates/browser-use-browser/src/lib.rs
@@ -13103,6 +13103,127 @@ print("http_get_many parity ok")
         assert!(output.text.contains("http_get_many parity ok"));
     }
 
+    #[test]
+    fn browser_script_http_get_vendored_proxy_private_bypass_and_error_fallback() {
+        let temp = tempfile::tempdir().unwrap();
+        let output = run_browser_script(
+            "script-http-get-vendored-proxy",
+            temp.path(),
+            temp.path().join("artifacts"),
+            r#"
+import http.server
+import json
+import os
+import socketserver
+import sys
+import threading
+
+assert _is_private_or_local_host("localhost")
+assert _is_private_or_local_host("127.0.0.1")
+assert _is_private_or_local_host("10.1.2.3")
+assert _is_private_or_local_host("192.168.0.5")
+assert _is_private_or_local_host("169.254.1.1")
+assert _is_private_or_local_host("printer.local")
+assert _is_private_or_local_host("wiki.internal")
+assert _is_private_or_local_host("intranet-host")
+assert not _is_private_or_local_host("example.com")
+assert not _is_private_or_local_host("8.8.8.8")
+
+proxy_calls = []
+proxy_mode = {"fail": False}
+
+class FakeFetchProxy(http.server.BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        pass
+
+    def do_POST(self):
+        assert self.path == "/fetch"
+        assert self.headers.get("X-Browser-Use-API-Key") == "test-key"
+        req = json.loads(self.rfile.read(int(self.headers["Content-Length"])))
+        proxy_calls.append(req["url"])
+        if proxy_mode["fail"]:
+            self.send_response(500)
+            self.end_headers()
+            return
+        body = json.dumps({
+            "status_code": 200,
+            "status": "200 OK",
+            "headers": {"x-proxy": "yes"},
+            "body": "proxied:" + req["url"],
+            "body_base64": "",
+            "is_binary": False,
+            "final_url": req["url"],
+            "redirect_count": 0,
+            "protocol": "HTTP/2.0",
+        }).encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+class DirectTarget(http.server.BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        pass
+
+    def do_GET(self):
+        body = b"direct"
+        self.send_response(200)
+        self.send_header("Content-Type", "text/plain; charset=utf-8")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+proxy_server = socketserver.TCPServer(("127.0.0.1", 0), FakeFetchProxy)
+target_server = socketserver.TCPServer(("127.0.0.1", 0), DirectTarget)
+for server in (proxy_server, target_server):
+    threading.Thread(target=server.serve_forever, daemon=True).start()
+target_base = f"http://127.0.0.1:{target_server.server_address[1]}"
+
+sys.modules.pop("fetch_use", None)  # force the VENDORED client path
+os.environ["BROWSER_USE_API_KEY"] = "test-key"
+os.environ["FETCH_USE_URL"] = f"http://127.0.0.1:{proxy_server.server_address[1]}"
+
+try:
+    # 1) public URL goes through the vendored proxy client
+    proxied = http_get("https://public.example/data")
+    assert proxied == "proxied:https://public.example/data", proxied
+    assert proxied.status_code == 200 and proxied.headers["x-proxy"] == "yes"
+
+    # 2) loopback/private host bypasses the proxy entirely
+    before = len(proxy_calls)
+    direct = http_get(target_base + "/anything")
+    assert direct == "direct", direct
+    assert len(proxy_calls) == before, "private host must never reach the proxy"
+
+    # 3) use_proxy=True forces even a private host through the proxy
+    forced = http_get(target_base + "/anything", use_proxy=True)
+    assert forced == "proxied:" + target_base + "/anything", forced
+
+    # 4) proxy failure falls back to direct; both errors surfaced when direct also fails
+    proxy_mode["fail"] = True
+    fallback = http_get(target_base + "/anything", use_proxy=True, timeout=3)
+    assert fallback == "direct", fallback
+    try:
+        http_get("https://no-such-host.invalid/x", timeout=3)
+    except RuntimeError as exc:
+        assert "fetch proxy also failed" in str(exc), exc
+    else:
+        raise AssertionError("expected both proxy and direct to fail")
+finally:
+    for server in (proxy_server, target_server):
+        server.shutdown()
+        server.server_close()
+print("http_get vendored proxy ok")
+"#,
+            20,
+        )
+        .unwrap();
+
+        assert!(output.ok, "{:?}\n{}", output.error, output.text);
+        assert!(output.text.contains("http_get vendored proxy ok"));
+    }
+
     #[test]
     fn browser_script_browser_fetch_single_returns_structured_errors_by_default() {
         let temp = tempfile::tempdir().unwrap();