diff --git a/crates/browser-use-agent/src/tools/registry.rs b/crates/browser-use-agent/src/tools/registry.rs
index 3a690e38..7c99216c 100644
--- a/crates/browser-use-agent/src/tools/registry.rs
+++ b/crates/browser-use-agent/src/tools/registry.rs
@@ -741,7 +741,7 @@ pub mod definitions {
     pub fn update_goal() -> ToolDefinition {
         ToolDefinition {
             name: "update_goal".to_string(),
-            description: "Update the existing goal.\nUse this tool only to mark the goal achieved or genuinely blocked.\nSet status to `complete` only when the objective has actually been achieved and no required work remains.\nSet status to `blocked` only when the same blocking condition has repeated for at least three consecutive goal turns, counting the original/user-triggered turn and any automatic continuations, and the agent cannot make meaningful progress without user input or an external-state change.\nIf the user resumes a goal that was previously marked `blocked`, treat the resumed run as a fresh blocked audit. If the same blocking condition then repeats for at least three consecutive resumed goal turns, set status to `blocked` again.\nOnce the blocked threshold is satisfied, do not keep reporting that you are still blocked while leaving the goal active; set status to `blocked`.\nDo not use `blocked` merely because the work is hard, slow, uncertain, incomplete, or would benefit from clarification.\nDo not mark a goal complete merely because its budget is nearly exhausted or because you are stopping work.\nYou cannot use this tool to pause, resume, budget-limit, or usage-limit a goal; those status changes are controlled by the user or system.\nWhen marking a budgeted goal achieved with status `complete`, report the final token usage from the tool result to the user.".to_string(),
+            description: "Update the existing goal. Set status to `complete` only when the objective has actually been achieved and no required work remains; set status to `blocked` only when the same blocking condition has repeated for at least three consecutive goal turns (counting the original/user-triggered turn and any automatic continuations, and restarting a fresh audit when a previously blocked goal is resumed) and the agent cannot progress without user input or an external-state change.\nDo not use `blocked` merely because the work is hard, slow, uncertain, incomplete, or would benefit from clarification; do not mark complete merely because the budget is nearly exhausted or you are stopping; and do not use this tool to pause, resume, budget-limit, or usage-limit a goal (those are controlled by the user or system). When marking a budgeted goal `complete`, report the final token usage from the tool result to the user.".to_string(),
             input_schema: json!({
                 "type": "object",
                 "properties": {
diff --git a/crates/browser-use-browser/src/browser_script_helpers.py b/crates/browser-use-browser/src/browser_script_helpers.py
index c0ee7c37..c9b59ccf 100644
--- a/crates/browser-use-browser/src/browser_script_helpers.py
+++ b/crates/browser-use-browser/src/browser_script_helpers.py
@@ -7,6 +7,7 @@
 
 import base64
 import gzip
+import ipaddress
 import json
 import math
 import os
@@ -1694,16 +1695,114 @@ def json(self):
         raise ValueError(f"request failed for {self.url}: {self.error}")
 
 
-def http_get(url, headers=None, timeout=20.0, binary=None):
+def _is_private_or_local_host(host):
+    """True for hosts the fetch proxy must never see: loopback, RFC1918/link-local
+    ranges, .local/.internal-style suffixes, and dotless intranet shortnames.
+    Routing these through the remote proxy would leak the URL/headers off-box and
+    fetch the WRONG target (the proxy's localhost, not the caller's)."""
+    host = str(host or "").strip().lower().rstrip(".").strip("[]")
+    if not host:
+        return True
+    if host == "localhost" or host.endswith(".localhost"):
+        return True
+    if host.endswith((".local", ".internal", ".lan", ".intranet", ".corp", ".home.arpa")):
+        return True
+    try:
+        ip = ipaddress.ip_address(host)
+    except ValueError:
+        return "." not in host
+    return ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_unspecified
+
+
+class _ProxyFetchResponse:
+    """Response shim mirroring fetch-use's FetchResponse attribute surface."""
+
+    __slots__ = ("status_code", "status", "headers", "url", "text", "content")
+
+    def __init__(self, status_code, headers, url, body, body_b64, is_binary):
+        self.status_code = status_code
+        self.status = status_code
+        self.headers = headers or {}
+        self.url = url
+        self.text = body or ""
+        if is_binary and body_b64:
+            self.content = base64.b64decode(body_b64)
+        else:
+            self.content = (body or "").encode("utf-8", errors="replace")
+
+
+def _fetch_use_sync(url, headers=None, timeout_ms=30000, method="GET", body=None):
+    """Vendored minimal Browser-Use Fetch client (mirrors the `fetch-use` pkg).
+
+    POSTs through fetch.browser-use.com so requests carry Chrome TLS
+    fingerprinting + rotating proxy IPs — the same un-blockable path browser-use
+    uses — instead of a bare urllib request that bot-protection blocks. Vendored
+    so it works even when the `fetch_use` package isn't installed in the sandbox.
+    """
+    import uuid as _uuid
+
+    api_key = os.environ.get("BROWSER_USE_API_KEY", "")
+    if not api_key:
+        raise RuntimeError("BROWSER_USE_API_KEY not set")
+    service = (os.environ.get("FETCH_USE_URL") or "https://fetch.browser-use.com").rstrip("/")
+    session_id = (os.environ.get("SESSION_ID") or str(_uuid.uuid4()))[:36]
+    payload = {
+        "url": url,
+        "method": str(method or "GET").upper(),
+        "timeout_ms": min(int(timeout_ms), 120000),
+        "follow_redirects": True,
+        "max_redirects": 10,
+        "proxy_country": os.environ.get("FETCH_USE_PROXY_COUNTRY", "US"),
+        "session_id": session_id,
+    }
+    if headers:
+        payload["headers"] = dict(headers)
+    if body is not None:
+        payload["body"] = body
+    req_headers = {"Content-Type": "application/json", "X-Browser-Use-API-Key": api_key}
+    token = os.environ.get("FETCH_USE_AUTH_TOKEN")
+    if token:
+        req_headers["X-Fetch-Token"] = token
+    data = json.dumps(payload).encode("utf-8")
+    request = urllib.request.Request(service + "/fetch", data=data, headers=req_headers, method="POST")
+    with urllib.request.urlopen(request, timeout=(int(timeout_ms) / 1000) + 10) as resp:
+        result = json.loads(resp.read().decode("utf-8"))
+    if result.get("error"):
+        raise RuntimeError(f"fetch proxy error: {result['error']}")
+    return _ProxyFetchResponse(
+        result.get("status_code", 0),
+        result.get("headers", {}),
+        result.get("final_url", url),
+        result.get("body", ""),
+        result.get("body_base64", ""),
+        result.get("is_binary", False),
+    )
+
+
+def http_get(url, headers=None, timeout=20.0, binary=None, use_proxy=None):
     """Pure HTTP fetch for static pages and APIs.
 
-    When BROWSER_USE_API_KEY is set and fetch_use is installed, route through
-    fetch-use like browser-harness. Otherwise fall back to local urllib with a
-    browser-like UA and gzip handling. Pass binary=True for bytes.
+    Public URLs route through the Browser-Use Fetch proxy (Chrome TLS
+    fingerprint + rotating IPs) when BROWSER_USE_API_KEY is set, so
+    bot-protected sites don't block us — preferring the installed `fetch_use`
+    package, else the vendored client above. Loopback/private/intranet hosts
+    are ALWAYS fetched directly (never sent to the proxy). On proxy failure the
+    request falls back to direct urllib and the proxy error is surfaced.
+    Pass binary=True for bytes. use_proxy: None=auto (public hosts only),
+    True=force the proxy, False=force direct.
     """
-    if os.environ.get("BROWSER_USE_API_KEY"):
+    proxy_error = None
+    want_proxy = (
+        use_proxy
+        if use_proxy is not None
+        else not _is_private_or_local_host(urlparse(url).hostname)
+    )
+    if want_proxy and os.environ.get("BROWSER_USE_API_KEY"):
         try:
-            from fetch_use import fetch_sync
+            try:
+                from fetch_use import fetch_sync
+            except ImportError:
+                fetch_sync = _fetch_use_sync
 
             response = fetch_sync(url, headers=headers, timeout_ms=int(float(timeout) * 1000))
             status_code = getattr(response, "status_code", getattr(response, "status", None))
@@ -1726,8 +1825,16 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
                 response_headers,
                 response_url,
             )
-        except ImportError:
-            pass
+        except Exception as exc:
+            # Proxy unavailable / auth / schema / network error — fall back to a
+            # direct urllib request below, but keep the proxy error visible so a
+            # bot-blocked direct response isn't mistaken for proxy success.
+            proxy_error = exc
+            print(
+                f"http_get: fetch proxy failed ({exc}); retrying direct",
+                file=sys.stderr,
+                flush=True,
+            )
     request_headers = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip"}
     if headers:
         request_headers.update(headers)
@@ -1751,11 +1858,16 @@ def http_get(url, headers=None, timeout=20.0, binary=None):
             f"{exc.code} for {url}. If this is bot/login protection, retry from the browser with js(fetch(...)), "
             "pass site-specific headers/cookies, or configure the Browser Use fetch proxy with BROWSER_USE_API_KEY."
         )
+        if proxy_error is not None:
+            guidance += f" (fetch proxy also failed: {proxy_error})"
         raise RuntimeError(guidance) from exc
     except (urllib.error.URLError, TimeoutError, OSError) as exc:
-        raise RuntimeError(
+        message = (
             f"http_get failed for {url}: {exc}. Try a shorter timeout, browser js(fetch(...)), or a configured proxy if the site blocks direct HTTP."
-        ) from exc
+        )
+        if proxy_error is not None:
+            message += f" (fetch proxy also failed: {proxy_error})"
+        raise RuntimeError(message) from exc
 
 
 def http_get_many(urls, headers=None, timeout=20.0, binary=None, max_workers=8, return_errors=True):
diff --git a/crates/browser-use-browser/src/lib.rs b/crates/browser-use-browser/src/lib.rs
index 0e4f30e5..309c860d 100644
--- a/crates/browser-use-browser/src/lib.rs
+++ b/crates/browser-use-browser/src/lib.rs
@@ -28,8 +28,20 @@ use tungstenite::{connect, Message, WebSocket};
 const BU_API: &str = "https://api.browser-use.com/api/v3";
 const LOG_LIMIT: usize = 250;
 const SCRIPT_MAX_OUTPUT_CHARS: usize = 120_000;
-const BROWSER_SCRIPT_DEFAULT_INITIAL_WAIT_MS: u64 = 15_000;
-const BROWSER_SCRIPT_DEFAULT_OBSERVE_MS: u64 = 1_000;
+// Cost optimization (eval-everything): a script that finishes within the start
+// call returns its result in ONE tool call — no separate `observe` model turns.
+// Raised 15s->30s so the common scrape script (which finishes well under 30s)
+// no longer forces a poll round-trip. This is a single, non-stacking block that
+// still hands control back at 30s, so a stuck script can be cancelled/finalized
+// (unlike the reverted "observe30", which STACKED 30s observe blocks and starved
+// the run timebox — see DEFAULT_OBSERVE_TIMEOUT_MS doc in browser.rs).
+const BROWSER_SCRIPT_DEFAULT_INITIAL_WAIT_MS: u64 = 30_000;
+// The `next_observe_ms` HINT surfaced to the model ("call observe with
+// observe_timeout_ms=N"). Raised 1s->15s to nudge the model to long-poll instead
+// of issuing 1s "still running?" peeks (the dominant observe-churn cost). This is
+// only a hint — the observe floor stays at 1s, so the model keeps full agency to
+// bail early; we stay under the 30s window that previously regressed.
+const BROWSER_SCRIPT_DEFAULT_OBSERVE_MS: u64 = 15_000;
 const BROWSER_SCRIPT_HELPERS: &str = include_str!("browser_script_helpers.py");
 const BROWSER_CONNECT_LOCAL_HANDSHAKE_TIMEOUT: Duration = Duration::from_secs(120);
 const BROWSER_CONNECT_ATTACH_DEADLINE: Duration = Duration::from_secs(8);
@@ -13091,6 +13103,127 @@ print("http_get_many parity ok")
         assert!(output.text.contains("http_get_many parity ok"));
     }
 
+    #[test]
+    fn browser_script_http_get_vendored_proxy_private_bypass_and_error_fallback() {
+        let temp = tempfile::tempdir().unwrap();
+        let output = run_browser_script(
+            "script-http-get-vendored-proxy",
+            temp.path(),
+            temp.path().join("artifacts"),
+            r#"
+import http.server
+import json
+import os
+import socketserver
+import sys
+import threading
+
+assert _is_private_or_local_host("localhost")
+assert _is_private_or_local_host("127.0.0.1")
+assert _is_private_or_local_host("10.1.2.3")
+assert _is_private_or_local_host("192.168.0.5")
+assert _is_private_or_local_host("169.254.1.1")
+assert _is_private_or_local_host("printer.local")
+assert _is_private_or_local_host("wiki.internal")
+assert _is_private_or_local_host("intranet-host")
+assert not _is_private_or_local_host("example.com")
+assert not _is_private_or_local_host("8.8.8.8")
+
+proxy_calls = []
+proxy_mode = {"fail": False}
+
+class FakeFetchProxy(http.server.BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        pass
+
+    def do_POST(self):
+        assert self.path == "/fetch"
+        assert self.headers.get("X-Browser-Use-API-Key") == "test-key"
+        req = json.loads(self.rfile.read(int(self.headers["Content-Length"])))
+        proxy_calls.append(req["url"])
+        if proxy_mode["fail"]:
+            self.send_response(500)
+            self.end_headers()
+            return
+        body = json.dumps({
+            "status_code": 200,
+            "status": "200 OK",
+            "headers": {"x-proxy": "yes"},
+            "body": "proxied:" + req["url"],
+            "body_base64": "",
+            "is_binary": False,
+            "final_url": req["url"],
+            "redirect_count": 0,
+            "protocol": "HTTP/2.0",
+        }).encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "application/json")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+class DirectTarget(http.server.BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        pass
+
+    def do_GET(self):
+        body = b"direct"
+        self.send_response(200)
+        self.send_header("Content-Type", "text/plain; charset=utf-8")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+proxy_server = socketserver.TCPServer(("127.0.0.1", 0), FakeFetchProxy)
+target_server = socketserver.TCPServer(("127.0.0.1", 0), DirectTarget)
+for server in (proxy_server, target_server):
+    threading.Thread(target=server.serve_forever, daemon=True).start()
+target_base = f"http://127.0.0.1:{target_server.server_address[1]}"
+
+sys.modules.pop("fetch_use", None)  # force the VENDORED client path
+os.environ["BROWSER_USE_API_KEY"] = "test-key"
+os.environ["FETCH_USE_URL"] = f"http://127.0.0.1:{proxy_server.server_address[1]}"
+
+try:
+    # 1) public URL goes through the vendored proxy client
+    proxied = http_get("https://public.example/data")
+    assert proxied == "proxied:https://public.example/data", proxied
+    assert proxied.status_code == 200 and proxied.headers["x-proxy"] == "yes"
+
+    # 2) loopback/private host bypasses the proxy entirely
+    before = len(proxy_calls)
+    direct = http_get(target_base + "/anything")
+    assert direct == "direct", direct
+    assert len(proxy_calls) == before, "private host must never reach the proxy"
+
+    # 3) use_proxy=True forces even a private host through the proxy
+    forced = http_get(target_base + "/anything", use_proxy=True)
+    assert forced == "proxied:" + target_base + "/anything", forced
+
+    # 4) proxy failure falls back to direct; both errors surfaced when direct also fails
+    proxy_mode["fail"] = True
+    fallback = http_get(target_base + "/anything", use_proxy=True, timeout=3)
+    assert fallback == "direct", fallback
+    try:
+        http_get("https://no-such-host.invalid/x", timeout=3)
+    except RuntimeError as exc:
+        assert "fetch proxy also failed" in str(exc), exc
+    else:
+        raise AssertionError("expected both proxy and direct to fail")
+finally:
+    for server in (proxy_server, target_server):
+        server.shutdown()
+        server.server_close()
+print("http_get vendored proxy ok")
+"#,
+            20,
+        )
+        .unwrap();
+
+        assert!(output.ok, "{:?}\n{}", output.error, output.text);
+        assert!(output.text.contains("http_get vendored proxy ok"));
+    }
+
     #[test]
     fn browser_script_browser_fetch_single_returns_structured_errors_by_default() {
         let temp = tempfile::tempdir().unwrap();
diff --git a/prompts/browser-script-tool-description.md b/prompts/browser-script-tool-description.md
index c4a354e9..0fcf0e10 100644
--- a/prompts/browser-script-tool-description.md
+++ b/prompts/browser-script-tool-description.md
@@ -6,17 +6,12 @@ Use the `browser` tool for connection/runtime work first. If the browser is not
 
 Important execution model:
 
-- Each `browser_script` call starts a fresh Python process.
-- Python variables do not persist across calls.
-- Browser/CDP state persists in Rust.
+- Each `browser_script` call starts a fresh Python process; Python variables do not persist across calls. Browser/CDP state persists in Rust.
 - Fast calls return their final result immediately. Long calls return `status: running` with a `run_id`; keep observing that same run until it finishes, fails, or is cancelled.
-- To listen to a running script, call this tool with `action="observe"`, the returned `run_id`, and optionally `observe_timeout_ms`. Prefer coarse waits such as 30000-120000 ms for long navigation or extraction scripts; do not burn many turns polling the same `run_id` with short waits.
-- To stop a running script, call this tool with `action="cancel"` and the `run_id`. Partial images and artifacts emitted before cancellation are preserved.
-- A failed `browser_script` call may include a short diagnosis. Read that diagnosis first: if it says the browser is still connected or the same page is usable, continue from the same page instead of reconnecting.
-- Helpers are preimported; you do not need imports for normal browser work.
-- CDP is the source of truth. If a helper is incomplete, use `cdp(...)` directly.
-- Keep browser actions sequential and deliberate.
-- Do not import Playwright, Selenium, or Pyppeteer.
+- To listen to a running script, call this tool with `action="observe"`, the `run_id`, and optionally `observe_timeout_ms`. Prefer coarse waits (30000-120000 ms) for long navigation/extraction; do not burn many turns polling with short waits. To stop a run, call `action="cancel"` with the `run_id`; partial images/artifacts emitted before cancellation are preserved.
+- A failed call may include a short diagnosis. Read it first: if it says the browser is still connected or the same page is usable, continue from the same page instead of reconnecting.
+- Helpers are preimported; no imports needed for normal browser work. CDP is the source of truth — if a helper is incomplete, use `cdp(...)` directly.
+- Keep browser actions sequential and deliberate. Do not import Playwright, Selenium, or Pyppeteer.
 
 Preimported helpers:
 
@@ -73,21 +68,16 @@ last_domain_skills(include_content=False)
 
 Usage guidance:
 
-- First navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url(url)` mutates the current controlled tab. Both helpers send the CDP navigation command, perform a bounded readiness check, and emit a labeled `navigation` output with `status`, `page_info`, `page_state`, and `next_step`. If that output says `navigation_ready` and `page_info.url` is the expected page, trust it and inspect/extract from the current page instead of navigating to the same URL again. If you chain more work in the same script after navigation, explicitly wait or poll for the specific selector/state you need before reading/clicking.
-- If a navigation is blocked by the user's `/domains` policy (the error says so), call `nav_policy()` to see the allowed/denied sites and plan within them; pass a URL (`nav_policy("example.com")`) to check before navigating. If the task can't be completed within the policy, tell the user which site is blocked and suggest they allow it with `/domains` or adjust the task — don't keep retrying the blocked host.
-- Keep keyboard semantics browser-harness/Rod aligned: `press_key(...)` simulates physical keys or shortcuts, while `type_text(...)` inserts/pastes text into the focused element with `Input.insertText`.
-- For React/Vue/Svelte/controlled inputs, prefer `fill_input(selector, text, timeout=...)` over direct DOM value assignment. It focuses the element, clears with Cmd/Ctrl+A plus Backspace, types through physical key events, then fires final `input`/`change` events. Use stable selectors from labels, ids, names, placeholders, or visible DOM inspection; avoid brittle positional selectors such as `input:nth-of-type(2)` unless you just verified that exact selector on the current page.
-- Do not combine `Input.dispatchKeyEvent` carrying printable `text` with a manual `char` event for the same character; that double-inserts text in Chrome.
+- First navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url(url)` mutates the current controlled tab. Both send the CDP navigation command, perform a bounded readiness check, and emit a labeled `navigation` output with `status`, `page_info`, `page_state`, and `next_step`. If that output says `navigation_ready` and `page_info.url` is the expected page, trust it and inspect/extract instead of navigating again. If you chain more work after navigation, explicitly wait or poll for the specific selector/state before reading/clicking.
+- If a navigation is blocked by the user's `/domains` policy (the error says so), call `nav_policy()` to see allowed/denied sites and plan within them; pass a URL (`nav_policy("example.com")`) to check before navigating. If the task can't be done within the policy, tell the user which site is blocked and suggest `/domains` or adjusting the task — don't keep retrying the blocked host.
+- Keyboard semantics: `press_key(...)` simulates physical keys/shortcuts; `type_text(...)` inserts/pastes text into the focused element via `Input.insertText`. Do not combine `Input.dispatchKeyEvent` carrying printable `text` with a manual `char` event for the same character; that double-inserts in Chrome.
+- For React/Vue/Svelte/controlled inputs, prefer `fill_input(selector, text, timeout=...)` over direct DOM value assignment. It focuses, clears with Cmd/Ctrl+A plus Backspace, types through physical key events, then fires final `input`/`change` events. Use stable selectors from labels, ids, names, placeholders, or visible DOM inspection; avoid brittle positional selectors like `input:nth-of-type(2)` unless you just verified that exact selector on the current page.
 - If the task is site-specific, call `domain_skills_for_url(url, include_content=True)` before inventing selectors, private API routes, or flows. `goto_url(url)` also returns matching `domain_skills` metadata when a skill root is available.
-- Be patient with loading pages by making several cheap observations, not one long blind wait. Prefer short waits such as `wait_for_load(1)`, `wait_for_element(selector, timeout=2)`, or `wait_for_network_idle(2)`, then inspect again. If a wait returns false, that is not a task failure; inspect the current page and continue from the best available state or decide whether it is stuck.
-- Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, dialogs, uploads, downloads, and final verification. For screenshot or visual-output tasks, verify the saved image is contentful and nonblank before `done`.
-- The common screenshot call is `screenshot(label)`, for example `screenshot("before_submit")`.
-- Screenshot/image artifacts are sent as `input_image` content to the next model turn. The user does not see those pixels inline in the terminal; describe what you see or provide the saved artifact path when the user asks for the screenshot.
-- If a script emits screenshots/images and then fails, the next model turn still receives the images alongside the failure diagnosis. Use those pixels to decide the next smaller retry.
-- If a running script emits screenshots/images before it finishes, `observe` returns those images as soon as they are available. Use the pixels to guide the next observe/retry.
-- Use `emit_output(value, label="...")` for structured observations that the next model turn may need, such as `page_info()`, extracted rows, selected DOM state, or API responses. The full value stays model-visible.
-- When a script emits labeled structured output, add a `# browser_summary:` JSON comment block at the top of the script that maps each emitted label to the compact transcript summary. Write the code/labels first mentally, then place or update this block before submitting the tool call; the runtime parses the whole script before execution.
-- Summary values may be literals, JSONPath-like selectors such as `$.url`, or template strings such as `Read ${$.length} employee rows`. Missing summary specs fall back to a generic `Recorded <label>` summary while preserving the full output.
+- Be patient with loading pages: make several cheap observations, not one long blind wait. Prefer short waits like `wait_for_load(1)`, `wait_for_element(selector, timeout=2)`, or `wait_for_network_idle(2)`, then inspect again. A wait returning false is not a task failure; inspect the current page and continue from the best available state or decide whether it is stuck.
+- Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, dialogs, uploads, downloads, and final verification. The common call is `screenshot(label)`, e.g. `screenshot("before_submit")`. For screenshot/visual-output tasks, verify the saved image is contentful and nonblank before `done`.
+- Screenshot/image artifacts are sent as `input_image` content to the next model turn (the user does not see those pixels inline in the terminal; describe what you see or give the saved artifact path when asked). They are delivered even when the script then fails, and `observe` returns them as soon as they are available on a running script — use the pixels to guide the next smaller retry.
+- Use `emit_output(value, label="...")` for structured observations the next turn may need (e.g. `page_info()`, extracted rows, selected DOM state, API responses). The full value stays model-visible.
+- When a script emits labeled structured output, add a `# browser_summary:` JSON comment block at the top mapping each emitted label to its compact transcript summary; the runtime parses the whole script before execution. Summary values may be literals, JSONPath-like selectors such as `$.url`, or template strings such as `Read ${$.length} employee rows`. Missing summary specs fall back to a generic `Recorded <label>` summary while preserving the full output.
 - Prefer this pattern over printing page or extraction objects:
 
 ```python
@@ -111,21 +101,16 @@ rows = [{"name": "Ada"}, {"name": "Grace"}]
 emit_output(rows, label="employee_rows")
 ```
 
-- Keep `print(...)` for short debug/status text only. Do not print large page, DOM, network, or extraction objects when `emit_output(...)` can carry the full value.
-- Prefer coordinate clicks for visible UI: screenshot, inspect pixels, `click_at_xy(x, y)`, wait, screenshot again.
-- Use `js(...)` for DOM inspection and raw `cdp(...)` for lower-level browser actions.
-- Use `js(function_source, *args)` when passing JSON-serializable Python values into JavaScript; use `target_id=` as a keyword for iframe targets.
+- Keep `print(...)` for short debug/status text only; do not print large page, DOM, network, or extraction objects when `emit_output(...)` can carry the full value.
+- Prefer coordinate clicks for visible UI: screenshot, inspect pixels, `click_at_xy(x, y)`, wait, screenshot again. Use `js(...)` for DOM inspection and raw `cdp(...)` for lower-level actions; pass JSON-serializable Python values into JavaScript with `js(function_source, *args)`, and use `target_id=` for iframe targets.
 - For real user forms, act like a browser user: screenshot, click the visible field/control, type with `type_text(...)`, `press_key(...)`, or `fill_input(...)`, then screenshot or otherwise verify. Use coordinate clicks for checkboxes, radios, buttons, dropdowns, and custom controls. Do not assign `element.value`, `element.checked`, `selectedIndex`, React private state, or MutationObserver restore loops on live forms. Do not synthesize `input`, `change`, `click`, or keyboard events in page JavaScript to make a form look filled. Those anti-patterns can desynchronize framework state from the visible DOM.
-- Use `http_get(...)` for one static page/API URL after the browser reveals a stable endpoint. Use `browser_fetch(...)` when the page's cookies, auth headers, or browser session are needed. Returned bodies are strings by default, bytes with `binary=True`, and expose `.status_code`, `.headers`, `.url`, `.text`, `.content`, and `.json()` for convenience. If direct HTTP hits bot or login protection, retry with `browser_fetch(...)`, site-specific headers/cookies, or the configured Browser Use fetch proxy. Do not replace source completion with blind bulk fetching; use small inspected chunks with progress, counts, missing fields, and source coverage.
+- Use `http_get(...)` for one static page/API URL after the browser reveals a stable endpoint. Use `browser_fetch(...)` when the page's cookies, auth headers, or browser session are needed. Returned bodies are strings by default, bytes with `binary=True`, and expose `.status_code`, `.headers`, `.url`, `.text`, `.content`, and `.json()`. If direct HTTP hits bot or login protection, retry with `browser_fetch(...)`, site-specific headers/cookies, or the configured Browser Use fetch proxy. Do not replace source completion with blind bulk fetching; use small inspected chunks with progress, counts, missing fields, and source coverage.
 
 - Extract only fields needed for the task. Do not emit full profile text, full DOM text, cookies, localStorage, or entire app caches unless you are debugging and the smaller field-level extraction failed.
-- Save complete generated result files under `outputs_dir()` or relative paths in the current working directory. Files written there are collected as artifacts automatically; `copy_artifact(...)` is for files created elsewhere.
-- For large structured results, write the full JSON/CSV/text to a file. If the task asks for an exact inline final format, return that content with `done(result=...)` and optionally include `result_file=path`; otherwise finish with `done(result_file=path)`.
-- For loops over multiple pages/items, emit short progress every item or every 2 seconds, whichever comes first. Progress can be a short `print(...)` line or compact `emit_output(..., label="progress")`.
+- Save complete generated result files under `outputs_dir()` or relative paths in the cwd — files written there are collected as artifacts automatically (`copy_artifact(...)` is for files created elsewhere). Write large structured results to a file: if the task asks for an exact inline final format, return that content with `done(result=...)` and optionally `result_file=path`; otherwise finish with `done(result_file=path)`.
+- For loops over multiple pages/items, emit short progress every item or every 2 seconds, whichever comes first (a short `print(...)` line or compact `emit_output(..., label="progress")`). Prefer bounded chunks with per-item micro timeouts and checkpoints written to files; inspect progress after each chunk, and if a chunk fails with a usable-page diagnosis, shrink the next chunk and resume from the last checkpoint.
 - For audits after a large result, run a small independent sample/count/schema check, then repair the specific gaps it finds until the required rows/fields are complete or the run is nearly out of turns. Do not rerun the whole crawl or full detail scrape just because counts fluctuate or some pages are intermittently empty; target the missing items, and mark a gap as a genuine absence only after checking its correct source path.
-- For list/profile extraction, filter the candidate list before navigating when the list page already contains enough information, such as employee versus contractor. Do not visit rows that cannot affect the final answer.
-- Poll until the record itself is ready before extracting fields. If a loaded record is missing a required field, inspect the correct source path before marking it absent; do not record required values as missing just because the first record view is null.
-- For long extraction or verification loops, prefer bounded chunks with checkpoints written to files. Use per-item micro timeouts and inspect progress after each chunk. If a chunk fails with a usable-page diagnosis, shrink the next chunk and resume from the last checkpoint.
+- For list/profile extraction, filter the candidate list before navigating when the list page already has enough information (e.g. employee versus contractor); do not visit rows that cannot affect the final answer. Poll until the record itself is ready before extracting fields; if a loaded record is missing a required field, inspect the correct source path before marking it absent — do not record required values as missing just because the first record view is null.
 
 Signing in / sign-ups: before signing up with a new email, check whether you're already logged in (you often drive the user's own profile) or have a saved credential for the site (listed under "Saved credentials") — if so, use it. If there's no existing login, ask the user whether to sign in with their own account (they save it via `/secrets`) or have you create a disposable account (you generate a throwaway inbox with `email_address()` and read its verification emails yourself), and wait for their choice. For the disposable path, call `email_address()`, record whatever context you need before submitting (`current_datetime()["utc"]`, existing `message_id`s from `email_inbox()`, or both), fill the email field, submit, then inspect/poll `email_inbox(sent_after=...)` or compare `timestamp`/`message_id` yourself (newest-first; `preview` already holds the code; `email_message(message_id)` has the full `text`/`html` for magic links).
 
diff --git a/prompts/python-tool-description.md b/prompts/python-tool-description.md
index 14991a3f..fe10ee30 100644
--- a/prompts/python-tool-description.md
+++ b/prompts/python-tool-description.md
@@ -8,7 +8,7 @@ CDP is the source of truth. Use raw CDP for basic browser control: `cdp("Page.na
 
 Use `js(function_source, *args)` when passing JSON-serializable Python values into JavaScript; use `target_id=` as a keyword for iframe targets.
 
-Do not import Playwright, Selenium, or Pyppeteer. Browser-harness workflow: first navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url` mutates the active tab. `new_tab(url)` and `goto_url(url)` have zero implicit wait: they send the CDP navigation command and then return without waiting for readyState, network idle, selectors, paint, or sleeps. If you chain more work in the same script after navigation, explicitly wait or poll before reading/clicking. If navigation is the last action before yielding to the model, the LLM call itself may provide enough elapsed time; the next call must still inspect state before assuming the page loaded. If the task is site-specific and `domain_skills_for_url(url, include_content=True)` returns files, read those domain skills before inventing selectors, private API routes, or flows. Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, menus, dialogs, downloads, uploads, form submissions, and final verification. Prefer coordinate clicks for visible targets: `capture_screenshot` or `screenshot`, inspect the pixels, `click_at_xy(x, y)`, then screenshot again. Prefer capturing the action timeline inside one Python call when possible. Do not call `screenshot` repeatedly on an unchanged viewport; after a screenshot, act, inspect with CDP/JS, navigate, scroll, call `screenshot_clip(...)` for a different CSS-pixel region, wait briefly for an async transition, inspect again, or finish. Chrome hit-testing handles iframes, shadow DOM, and cross-origin content. Use `js(...)` and raw `cdp(...)` when coordinates are the wrong tool or helpers are incomplete. `js(...)` returns Python values, so use Python slicing/methods after the call; only use JavaScript methods inside the JavaScript expression.
+Do not import Playwright, Selenium, or Pyppeteer. Browser-harness workflow: first navigation should usually be `new_tab(url)`, not `goto_url(url)`, because `goto_url` mutates the active tab. `new_tab(url)` and `goto_url(url)` have zero implicit wait: they send the CDP navigation command and return without waiting for readyState, network idle, selectors, paint, or sleeps. If you chain more work in the same script after navigation, explicitly wait or poll before reading/clicking; if navigation is the last action before yielding, the next call must still inspect state before assuming the page loaded. If the task is site-specific and `domain_skills_for_url(url, include_content=True)` returns files, read those domain skills before inventing selectors, private API routes, or flows. Use screenshots as labeled temporal checkpoints: initial load, before/after meaningful clicks, scrolls, route changes, menus, dialogs, downloads, uploads, form submissions, and final verification. Prefer coordinate clicks for visible targets: `screenshot`, inspect the pixels, `click_at_xy(x, y)`, then screenshot again. Do not call `screenshot` repeatedly on an unchanged viewport; after a screenshot, act, inspect with CDP/JS, navigate, scroll, call `screenshot_clip(...)` for a different region, wait briefly, inspect again, or finish. Chrome hit-testing handles iframes, shadow DOM, and cross-origin content; use `js(...)` and raw `cdp(...)` when coordinates are the wrong tool or helpers are incomplete. `js(...)` returns Python values, so use Python slicing/methods after the call; only use JavaScript methods inside the JavaScript expression.
 
 To pass pixels directly to the next model turn, call raw `cdp("Page.captureScreenshot", format="png")`, `screenshot(label)`, `screenshot_clip(label, x, y, width, height)`, or `capture_screenshot(..., attach=True)`; use `emit_image(path)` for existing image files. Raw `Page.captureScreenshot` results are attached automatically. The user does not see attached pixels inline in the terminal; describe what you see or provide the saved artifact path when the user asks for a screenshot. Multiple labeled screenshots are good when they form a temporal trace, not when they repeat the same unchanged page.