From ddfc78c83e124d6ae1138b31cbc4a680adb49555 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 17:22:43 +0800 Subject: [PATCH 01/72] test: add failing test for CDP video recording all pages --- .github/workflows/windows-cli-test.yml | 3 - CLAUDE.md | 2 +- README.md | 46 + README_zh.md | 46 + bridgic/browser/__init__.py | 4 +- bridgic/browser/_cli_catalog.py | 2 +- bridgic/browser/_config.py | 18 +- bridgic/browser/_constants.py | 3 + bridgic/browser/cli/_client.py | 21 +- bridgic/browser/cli/_commands.py | 37 +- bridgic/browser/cli/_daemon.py | 211 ++- bridgic/browser/session/__init__.py | 4 +- bridgic/browser/session/_browser.py | 1269 +++++++++++++---- bridgic/browser/session/_video_recorder.py | 546 +++++++ docs/API.md | 4 +- docs/CDP_MODE.md | 108 ++ docs/KNOWN_LIMITATIONS.md | 69 + pyproject.toml | 2 +- .../bridgic-browser/references/cli-guide.md | 36 +- skills/bridgic-browser/references/env-vars.md | 3 + .../bridgic-browser/references/sdk-guide.md | 30 +- tests/unit/test_browser.py | 1117 ++++++++++++++- tests/unit/test_browser_methods.py | 234 +++ tests/unit/test_cli.py | 669 ++++++++- tests/unit/test_config.py | 143 ++ tests/unit/test_tools.py | 127 +- tests/unit/test_video_recorder.py | 248 ++++ uv.lock | 2 +- 28 files changed, 4666 insertions(+), 338 deletions(-) create mode 100644 bridgic/browser/session/_video_recorder.py create mode 100644 docs/CDP_MODE.md create mode 100644 docs/KNOWN_LIMITATIONS.md create mode 100644 tests/unit/test_video_recorder.py diff --git a/.github/workflows/windows-cli-test.yml b/.github/workflows/windows-cli-test.yml index c7dab9f..58f5b5e 100644 --- a/.github/workflows/windows-cli-test.yml +++ b/.github/workflows/windows-cli-test.yml @@ -22,9 +22,6 @@ jobs: matrix: python-version: ["3.10", "3.11", "3.12"] - env: - BRIDGIC_HEADLESS: "1" - steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/CLAUDE.md b/CLAUDE.md index b7a6ebc..50df904 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -194,7 +194,7 @@ Key implementation details: - **`type`**: docstring explicitly states the text goes into the **currently focused element** and that the user must `click` or `focus` the target first. - **`mouse-move` / `mouse-click` / `mouse-drag`**: coordinates are **viewport pixels from the top-left corner**; documented in both docstrings and `_cli_catalog.py`. - **`eval-on`**: CODE must be an arrow function or named function that receives the element as its argument (e.g. `"(el) => el.textContent"`); this calling convention is documented in the docstring with examples. -- **Config loading**: `Browser.__init__` auto-loads config via `_config.py:_load_config_sources()`. The `--headed` CLI flag merges `{"headless": false}` into `BRIDGIC_BROWSER_JSON` before spawning the daemon. The `--clear-user-data` CLI flag merges `{"clear_user_data": true}` into `BRIDGIC_BROWSER_JSON`. +- **Config loading**: `Browser.__init__` auto-loads config via `_config.py:_load_config_sources()`. The `--headed` CLI flag merges `{"headless": false}` into `BRIDGIC_BROWSER_JSON` before spawning the daemon. The `--clear-user-data` CLI flag merges `{"clear_user_data": true}` into `BRIDGIC_BROWSER_JSON`. The `--cdp` CLI flag resolves the CDP input (port/url/auto) via `resolve_cdp_input()` on the client side, then passes the resolved `ws://` URL to the daemon via the `BRIDGIC_CDP` env var (overriding any inherited shell value). `run_daemon()` reads `BRIDGIC_CDP` and passes `cdp_url=...` to `Browser()` after running it through `resolve_cdp_input()` (a no-op on `ws://`/`wss://` inputs). CDP resolution failure raises `RuntimeError` — no silent fallback to launch mode. - **`close` command fast-path**: the daemon calls `browser.inspect_pending_close_artifacts()` to pre-allocate a session dir, trace path, and video paths (all grouped under `~/.bridgic/bridgic-browser/tmp/close--/`), responds to the client immediately with those paths, then sets `stop_event`. Actual `browser.close()` runs after the client disconnects. After close, `_write_close_report()` writes `close-report.json` in the session dir with status (`"success"`, `"success_with_timeouts"`, `"error"`, or `"timeout"`), artifact paths, and any errors. - **Daemon cleanup ownership guard**: after `browser.close()` finishes, `run_daemon()` reads the run-info file and compares its `pid` field to `os.getpid()` before calling `transport.cleanup()` / `remove_run_info()`. This prevents the outgoing daemon from deleting the new daemon's socket when a `close` is followed immediately by a new command (which starts a new daemon before the old one's shutdown completes). If the run-info is gone (`None`) the old daemon is still the owner and cleans up normally. diff --git a/README.md b/README.md index b3c5f96..78da96a 100644 --- a/README.md +++ b/README.md @@ -202,6 +202,50 @@ BRIDGIC_BROWSER_JSON='{"headless":false,"locale":"zh-CN"}' bridgic-browser open BRIDGIC_BROWSER_JSON='{"clear_user_data":true}' bridgic-browser open URL ``` +#### CDP Mode (Connect to Existing Browser) + +Instead of launching a new browser, `bridgic-browser` can connect to an already-running Chrome/Chromium instance via the [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/). + +Start Chrome with remote debugging enabled: + +```bash +# macOS +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile + +# Linux +google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile +``` + +Then connect with `--cdp`: + +```bash +bridgic-browser open https://example.com --cdp 9222 +bridgic-browser open https://example.com --cdp ws://localhost:9222/devtools/browser/... +bridgic-browser open https://example.com --cdp wss://cloud.example.com/chromium?token=... +bridgic-browser open https://example.com --cdp auto +``` + +| Format | Description | +|--------|-------------| +| `9222` | Bare port number -- queries `localhost:9222/json/version` to discover the WebSocket URL | +| `ws://...` / `wss://...` | Direct WebSocket URL (raw CDP or Playwright WS protocol), passed through as-is | +| `http://host:port` | HTTP discovery endpoint -- queries `/json/version` on that host | +| `auto` | Auto-scan local Chrome/Chromium/Brave/Edge/Arc profile directories (+ Canary variants) for an active `DevToolsActivePort` file | + +**Closing behavior:** `bridgic-browser close` disconnects from the remote browser but does **not** terminate the Chrome process. The browser keeps running and can be reconnected. + +**Use cases:** +- Reuse an existing Chrome session with its login state and extensions +- Connect to cloud browser services (Browserless, Steel.dev, etc.) +- Automate Electron apps that expose a CDP port + +SDK equivalent: + +```python +browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/...") +``` + #### Command List | Category | Commands | @@ -480,6 +524,7 @@ browser = Browser( | `user_data_dir` | str/Path | None | Custom path for persistent profile (ignored when `clear_user_data=True`) | | `clear_user_data` | bool | False | If True, use ephemeral session (no profile); if False, use persistent profile | | `stealth` | bool/StealthConfig | True | Stealth mode configuration | +| `cdp_url` | str | None | WebSocket URL to connect to an existing Chrome via CDP (skips launch) | | `channel` | str | None | Browser channel (chrome, msedge, etc.) | | `proxy` | dict | None | Proxy settings | | `downloads_path` | str/Path | None | Download directory | @@ -579,3 +624,4 @@ MIT License - [Browser Tools Guide](docs/BROWSER_TOOLS_GUIDE.md) – Tool selection, ref vs coordinate, wait strategies, patterns. - [Snapshot and Page State](docs/SNAPSHOT_AND_STATE.md) – SnapshotOptions, EnhancedSnapshot, get_snapshot_text, get_element_by_ref. - [API Summary](docs/API.md) – Session and DownloadManager API reference. +- [Known Limitations](docs/KNOWN_LIMITATIONS.md) – Known issues and upstream bugs (e.g. Chrome "Show in Folder" not working). diff --git a/README_zh.md b/README_zh.md index 062273f..8f117bc 100644 --- a/README_zh.md +++ b/README_zh.md @@ -201,6 +201,50 @@ BRIDGIC_BROWSER_JSON='{"headless":false,"locale":"zh-CN"}' bridgic-browser open BRIDGIC_BROWSER_JSON='{"clear_user_data":true}' bridgic-browser open URL ``` +#### CDP 模式(连接已有浏览器) + +`bridgic-browser` 可以通过 [Chrome DevTools Protocol](https://chromedevtools.github.io/devtools-protocol/) 连接到已经运行的 Chrome/Chromium 实例,而非启动新浏览器。 + +首先启动 Chrome 并开启远程调试端口: + +```bash +# macOS +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile + +# Linux +google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile +``` + +然后使用 `--cdp` 连接: + +```bash +bridgic-browser open https://example.com --cdp 9222 +bridgic-browser open https://example.com --cdp ws://localhost:9222/devtools/browser/... +bridgic-browser open https://example.com --cdp wss://cloud.example.com/chromium?token=... +bridgic-browser open https://example.com --cdp auto +``` + +| 格式 | 说明 | +|--------|-------------| +| `9222` | 端口号 -- 向 `localhost:9222/json/version` 查询 WebSocket URL | +| `ws://...` / `wss://...` | 直接 WebSocket URL(原始 CDP 或 Playwright WS 协议),原样传递 | +| `http://host:port` | HTTP 发现端点 -- 向该主机的 `/json/version` 查询 | +| `auto` | 自动扫描本地 Chrome/Chromium/Brave/Edge/Arc 配置目录(含 Canary 变体),查找活跃的 `DevToolsActivePort` 文件 | + +**关闭行为:** `bridgic-browser close` 会断开与远程浏览器的连接,但**不会**终止 Chrome 进程。浏览器继续运行,可以重新连接。 + +**使用场景:** +- 复用已有 Chrome 会话及其登录状态和扩展 +- 连接云端浏览器服务(Browserless、Steel.dev 等) +- 自动化开放 CDP 端口的 Electron 应用 + +SDK 等效用法: + +```python +browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/...") +``` + #### 命令列表 | 类别 | 命令 | @@ -479,6 +523,7 @@ browser = Browser( | `user_data_dir` | str/Path | None | 持久化 profile 自定义路径(`clear_user_data=True` 时忽略) | | `clear_user_data` | bool | False | True 时使用临时会话(无 profile);False 时使用持久化 profile | | `stealth` | bool/StealthConfig | True | 隐身模式配置 | +| `cdp_url` | str | None | 通过 CDP 连接已有 Chrome 的 WebSocket URL(跳过启动) | | `channel` | str | None | 浏览器通道(chrome、msedge 等) | | `proxy` | dict | None | 代理设置 | | `downloads_path` | str/Path | None | 下载目录 | @@ -578,3 +623,4 @@ MIT 许可证 - [浏览器工具指南](docs/BROWSER_TOOLS_GUIDE.md) — 工具选择、ref 与坐标、等待策略、常见模式。 - [快照与页面状态](docs/SNAPSHOT_AND_STATE.md) — SnapshotOptions、EnhancedSnapshot、get_snapshot_text、get_element_by_ref。 - [API 摘要](docs/API.md) — Session 与 DownloadManager API 说明。 +- [已知限制](docs/KNOWN_LIMITATIONS.md) — 已知问题与上游 bug(如 Chrome「在文件夹中打开」不可用)。 diff --git a/bridgic/browser/__init__.py b/bridgic/browser/__init__.py index b6ad50a..71cec46 100644 --- a/bridgic/browser/__init__.py +++ b/bridgic/browser/__init__.py @@ -3,7 +3,7 @@ from importlib.metadata import version from .utils._logging import configure_logging -from .session._browser import Browser +from .session._browser import Browser, find_cdp_url, resolve_cdp_input from .session._snapshot import EnhancedSnapshot, RefData, SnapshotGenerator, SnapshotOptions from .session._browser_model import PageDesc, PageInfo, PageSizeInfo, FullPageInfo from .session._stealth import StealthConfig, StealthArgsBuilder, create_stealth_config @@ -27,6 +27,8 @@ "configure_logging", # Browser session "Browser", + "find_cdp_url", + "resolve_cdp_input", # Snapshot types "EnhancedSnapshot", "RefData", diff --git a/bridgic/browser/_cli_catalog.py b/bridgic/browser/_cli_catalog.py index 969a8c6..fd8bca8 100644 --- a/bridgic/browser/_cli_catalog.py +++ b/bridgic/browser/_cli_catalog.py @@ -78,7 +78,7 @@ # command_name -> (ToolCategory, one-line description) CLI_COMMAND_META: dict[str, tuple[ToolCategory, str]] = { - "open": (ToolCategory.NAVIGATION, "Navigate to URL (starts a browser session if needed) [--headed] [--clear-user-data]"), + "open": (ToolCategory.NAVIGATION, "Navigate to URL (starts a browser session if needed) [--headed] [--clear-user-data] [--cdp PORT_OR_URL]"), "back": (ToolCategory.NAVIGATION, "Go back to the previous page"), "forward": (ToolCategory.NAVIGATION, "Go forward to the next page"), "reload": (ToolCategory.NAVIGATION, "Reload the current page"), diff --git a/bridgic/browser/_config.py b/bridgic/browser/_config.py index dbc0498..16befe8 100644 --- a/bridgic/browser/_config.py +++ b/bridgic/browser/_config.py @@ -58,7 +58,11 @@ def _load_config_sources() -> Dict[str, Any]: user_cfg = BRIDGIC_BROWSER_HOME / _CONFIG_FILENAME if user_cfg.is_file(): try: - cfg.update(json.loads(user_cfg.read_text(encoding="utf-8"))) + parsed = json.loads(user_cfg.read_text(encoding="utf-8")) + if not isinstance(parsed, dict): + logger.warning("user config %s: expected JSON object, got %s", user_cfg, type(parsed).__name__) + else: + cfg.update(parsed) except Exception: logger.warning("failed to parse user config %s", user_cfg, exc_info=True) @@ -66,7 +70,11 @@ def _load_config_sources() -> Dict[str, Any]: local_cfg = Path(_CONFIG_FILENAME) if local_cfg.is_file(): try: - cfg.update(json.loads(local_cfg.read_text(encoding="utf-8"))) + parsed = json.loads(local_cfg.read_text(encoding="utf-8")) + if not isinstance(parsed, dict): + logger.warning("local config %s: expected JSON object, got %s", local_cfg, type(parsed).__name__) + else: + cfg.update(parsed) except Exception: logger.warning("failed to parse local config %s", local_cfg, exc_info=True) @@ -74,7 +82,11 @@ def _load_config_sources() -> Dict[str, Any]: raw = os.environ.get(_ENV_VAR) if raw: try: - cfg.update(json.loads(raw)) + parsed = json.loads(raw) + if not isinstance(parsed, dict): + logger.warning("%s: expected JSON object, got %s", _ENV_VAR, type(parsed).__name__) + else: + cfg.update(parsed) except Exception: logger.warning("failed to parse %s: %s", _ENV_VAR, raw, exc_info=True) diff --git a/bridgic/browser/_constants.py b/bridgic/browser/_constants.py index fb4c279..a9c339b 100644 --- a/bridgic/browser/_constants.py +++ b/bridgic/browser/_constants.py @@ -21,6 +21,9 @@ # Default persistent user data directory for browser sessions BRIDGIC_USER_DATA_DIR = BRIDGIC_BROWSER_HOME / "user_data" +# Default directory for browser downloads (app-managed fallback) +BRIDGIC_DOWNLOADS_DIR = BRIDGIC_BROWSER_HOME / "downloads" + class ToolCategory(Enum): """Browser tool categories. diff --git a/bridgic/browser/cli/_client.py b/bridgic/browser/cli/_client.py index 7bb3dc2..9e4b432 100644 --- a/bridgic/browser/cli/_client.py +++ b/bridgic/browser/cli/_client.py @@ -141,6 +141,7 @@ def send_command( start_if_needed: bool = True, headed: bool = False, clear_user_data: bool = False, + cdp_url: Optional[str] = None, ) -> str: """Send *command* with *args* to the daemon. @@ -158,12 +159,16 @@ def send_command( If True, start the daemon with ``clear_user_data=True`` (ephemeral mode — no persistent browser profile). Only meaningful when *start_if_needed* is True and the daemon is not yet running. + cdp_url: + If set, connect to an existing Chrome via this CDP WebSocket URL instead + of launching a new browser. Only meaningful when the daemon is not yet + running. """ if args is None: args = {} if start_if_needed: try: - ensure_daemon_running(headed=headed, clear_user_data=clear_user_data) + ensure_daemon_running(headed=headed, clear_user_data=clear_user_data, cdp_url=cdp_url) except BridgicBrowserCommandError: raise except Exception as exc: @@ -189,7 +194,7 @@ def send_command( # Daemon lifecycle helpers # --------------------------------------------------------------------------- -def _spawn_daemon(headed: bool = False, clear_user_data: bool = False) -> None: +def _spawn_daemon(headed: bool = False, clear_user_data: bool = False, cdp_url: Optional[str] = None) -> None: """Spawn the daemon as a detached subprocess and wait for its READY_SIGNAL. Uses a background reader thread so the 30-second timeout is always @@ -204,6 +209,12 @@ def _spawn_daemon(headed: bool = False, clear_user_data: bool = False) -> None: clear_user_data: If True, merge ``{"clear_user_data": true}`` into ``BRIDGIC_BROWSER_JSON`` so the daemon starts with an ephemeral browser profile (no persistence). + cdp_url: + If set, pass the already-resolved ws:// URL to the daemon via + ``BRIDGIC_CDP`` so it connects to an existing Chrome instance via CDP + instead of launching a new browser. Overrides any ``BRIDGIC_CDP`` + inherited from the parent shell, which matches the "CLI flag beats + env var" convention. """ env = os.environ.copy() if headed or clear_user_data: @@ -214,6 +225,8 @@ def _spawn_daemon(headed: bool = False, clear_user_data: bool = False) -> None: if clear_user_data: existing["clear_user_data"] = True env["BRIDGIC_BROWSER_JSON"] = _json.dumps(existing) + if cdp_url: + env["BRIDGIC_CDP"] = cdp_url popen_kwargs: dict[str, Any] = { "stdout": subprocess.PIPE, @@ -287,7 +300,7 @@ def _probe_socket_sync() -> bool: return get_transport().probe() -def ensure_daemon_running(headed: bool = False, clear_user_data: bool = False) -> None: +def ensure_daemon_running(headed: bool = False, clear_user_data: bool = False, cdp_url: Optional[str] = None) -> None: """Start the daemon if it is not already running.""" if RUN_INFO_PATH.exists(): if _probe_socket_sync(): @@ -306,4 +319,4 @@ def ensure_daemon_running(headed: bool = False, clear_user_data: bool = False) - ) from exc remove_run_info() - _spawn_daemon(headed=headed, clear_user_data=clear_user_data) + _spawn_daemon(headed=headed, clear_user_data=clear_user_data, cdp_url=cdp_url) diff --git a/bridgic/browser/cli/_commands.py b/bridgic/browser/cli/_commands.py index 915ed62..140c405 100644 --- a/bridgic/browser/cli/_commands.py +++ b/bridgic/browser/cli/_commands.py @@ -124,10 +124,26 @@ def cli() -> None: help="Launch the browser in headed (visible) mode.") @click.option("--clear-user-data", is_flag=True, default=False, help="Start with a fresh browser profile (no persistent user data). Ignored if a session is already running.") -def cmd_open(url: str, headed: bool, clear_user_data: bool) -> None: +@click.option( + "--cdp", default=None, metavar="PORT_OR_URL", + help=( + "Connect to a running browser instead of launching a new one. " + "Accepts: port number (9222), ws:// or wss:// URL, http://host:port, " + "or 'auto' to scan local Chrome/Chromium/Brave (+ Canary variants) profiles." + ), +) +def cmd_open(url: str, headed: bool, clear_user_data: bool, cdp: str | None) -> None: """Navigate to URL (starts a browser session if needed).""" + cdp_url: str | None = None + if cdp: + from bridgic.browser.session._browser import resolve_cdp_input + try: + cdp_url = resolve_cdp_input(cdp) + except Exception as exc: + _err(exc) + return try: - _ok(send_command("open", {"url": url}, headed=headed, clear_user_data=clear_user_data)) + _ok(send_command("open", {"url": url}, headed=headed, clear_user_data=clear_user_data, cdp_url=cdp_url)) except Exception as exc: _err(exc) @@ -934,7 +950,12 @@ def cmd_trace_chunk(title: str) -> None: @click.option("--width", default=None, type=int, help="Video width in pixels.") @click.option("--height", default=None, type=int, help="Video height in pixels.") def cmd_video_start(width: int | None, height: int | None) -> None: - """Start video recording.""" + """Start video recording on ALL pages in the context. + + Mirrors the Playwright CLI: one start call records every tab, + including tabs opened afterwards. Each page gets its own .webm file + returned by ``video-stop``. + """ try: _ok(send_command("video_start", {"width": width, "height": height}, start_if_needed=False)) except Exception as exc: @@ -944,7 +965,15 @@ def cmd_video_start(width: int | None, height: int | None) -> None: @cli.command("video-stop", context_settings=CONTEXT_SETTINGS) @click.argument("path", required=False, default=None) def cmd_video_stop(path: str | None) -> None: - """Stop video recording and save to PATH (optional).""" + """Stop video recording and save files. + + PATH is optional. When omitted, recorded files stay in the temp dir. + When given: + * a directory → each recording is saved inside it + * a file path → first recording uses that exact path; extra + recordings from additional tabs get a ``-1``, ``-2`` … suffix + inserted before the ``.webm`` extension. + """ try: abs_path = os.path.abspath(path) if path else None _ok(send_command("video_stop", {"path": abs_path}, start_if_needed=False)) diff --git a/bridgic/browser/cli/_daemon.py b/bridgic/browser/cli/_daemon.py index 81f61d7..360c567 100644 --- a/bridgic/browser/cli/_daemon.py +++ b/bridgic/browser/cli/_daemon.py @@ -20,8 +20,10 @@ import sys from pathlib import Path from typing import Any, Callable, Dict, Optional, TYPE_CHECKING +from urllib.parse import urlparse -from .._constants import BRIDGIC_BROWSER_HOME +from .._config import _load_config_sources +from .._constants import BRIDGIC_BROWSER_HOME, BRIDGIC_DOWNLOADS_DIR from ..errors import BridgicBrowserError, InvalidInputError from ._transport import ( get_transport, @@ -58,6 +60,26 @@ def _is_browser_closed_error(exc: BaseException) -> bool: return any(pat in msg for pat in _BROWSER_CLOSED_PATTERNS) +def _browser_closed_hint(cdp_url: Optional[str] = None) -> str: + """Return a BROWSER_CLOSED hint message tailored to the connection mode.""" + if cdp_url: + # For local Chrome (localhost/127.0.0.1), show port number instead of the full ws:// URL + # because the browser UUID in the URL changes on every Chrome restart. + _parsed = urlparse(cdp_url) + _host = (_parsed.hostname or "").lower() + if _host in ("localhost", "127.0.0.1", "::1"): + _cdp_hint = str(_parsed.port or 9222) + _msg = "Local Chrome closed or crashed." + else: + _cdp_hint = cdp_url + _msg = "Remote browser session closed (the cloud/remote browser disconnected or timed out)." + return ( + f"{_msg} " + f"Run: bridgic-browser close && bridgic-browser open --cdp '{_cdp_hint}'" + ) + return _BROWSER_CLOSED_HINT + + def _response( *, success: bool, @@ -452,8 +474,11 @@ async def _handle_video_stop(browser: "Browser", args: Dict[str, Any]) -> str: # ── Lifecycle ───────────────────────────────────────────────────────────────── -async def _handle_close(browser: "Browser", _args: Dict[str, Any]) -> str: - return await browser.close() +# Note: there is no `_handle_close` here. The connection handler intercepts +# the `close` command directly (see the `if command == "close"` branch +# below) so it can pre-allocate the close-session directory and respond to +# the client *before* the actual browser teardown runs in the background. +# Adding a `_HANDLERS["close"]` entry would be dead code. async def _handle_resize(browser: "Browser", args: Dict[str, Any]) -> str: @@ -541,11 +566,31 @@ async def _handle_resize(browser: "Browser", args: Dict[str, Any]) -> str: "video_start": _handle_video_start, "video_stop": _handle_video_stop, # Lifecycle - "close": _handle_close, + # ("close" is intercepted in the connection handler — see comment above + # the lifecycle section.) "resize": _handle_resize, } +async def _cdp_reconnect(browser: "Browser") -> bool: + """Stop and restart *browser* to re-establish a dropped CDP/PW-WS connection. + + Returns True if the reconnect succeeded, False otherwise. + After a successful reconnect the browser is at about:blank (new session). + """ + try: + await browser.close() + except Exception as exc: + logger.debug("[daemon] cdp_reconnect: close() error (ignored): %s", exc) + try: + await browser._start() + logger.info("[daemon] cdp_reconnect: reconnected successfully") + return True + except Exception as exc: + logger.error("[daemon] cdp_reconnect: _start() failed: %s", exc) + return False + + async def _dispatch(browser: "Browser", command: str, args: Dict[str, Any]) -> Dict[str, Any]: handler = _HANDLERS.get(command) if handler is None: @@ -554,39 +599,72 @@ async def _dispatch(browser: "Browser", command: str, args: Dict[str, Any]) -> D result=f"Unknown command: {command!r}", error_code="UNKNOWN_COMMAND", ) - try: - result = await handler(browser, args) - return _response( - success=True, - result=str(result), - ) - except BridgicBrowserError as exc: - if _is_browser_closed_error(exc): + + cdp_url: Optional[str] = getattr(browser, "_cdp_url", None) + # In CDP mode, attempt one automatic reconnect when the remote session drops. + # This helps with cloud-browser session timeouts (Browserless, Steel.dev, etc.). + # We do NOT reconnect for `close` (shutdown intent) or if there is no CDP URL. + _max_attempts = 2 if (cdp_url and command != "close") else 1 + + for _attempt in range(_max_attempts): + try: + result = await handler(browser, args) + return _response( + success=True, + result=str(result), + ) + except BridgicBrowserError as exc: + if _is_browser_closed_error(exc): + if _attempt == 0 and _max_attempts > 1: + logger.warning( + "[daemon] CDP session closed during %r, attempting one-shot reconnect", + command, + ) + if await _cdp_reconnect(browser): + continue # retry the command with the refreshed connection + return _response( + success=False, + result=_browser_closed_hint(cdp_url), + error_code="BROWSER_CLOSED", + ) return _response( success=False, - result=_BROWSER_CLOSED_HINT, - error_code="BROWSER_CLOSED", + result=exc.message, + error_code=exc.code, + data=exc.details, + meta={"retryable": exc.retryable}, ) - return _response( - success=False, - result=exc.message, - error_code=exc.code, - data=exc.details, - meta={"retryable": exc.retryable}, - ) - except Exception as exc: - if _is_browser_closed_error(exc): + except Exception as exc: + if _is_browser_closed_error(exc): + if _attempt == 0 and _max_attempts > 1: + logger.warning( + "[daemon] CDP session closed during %r, attempting one-shot reconnect", + command, + ) + if await _cdp_reconnect(browser): + continue # retry + return _response( + success=False, + result=_browser_closed_hint(cdp_url), + error_code="BROWSER_CLOSED", + ) + logger.exception("[daemon] command=%s error", command) return _response( success=False, - result=_BROWSER_CLOSED_HINT, - error_code="BROWSER_CLOSED", + result=str(exc), + error_code="HANDLER_EXCEPTION", ) - logger.exception("[daemon] command=%s error", command) - return _response( - success=False, - result=str(exc), - error_code="HANDLER_EXCEPTION", - ) + # Unreachable: every iteration of the loop above always returns. The body + # only `continue`s on a successful reconnect, and the *retried* iteration + # itself either returns success or returns one of the BROWSER_CLOSED / + # HANDLER_EXCEPTION responses. Kept as a defensive safety net so that if + # a future edit accidentally adds a code path that exits the loop without + # returning, the daemon still answers the client with a clean error. + return _response( + success=False, + result=_browser_closed_hint(cdp_url), + error_code="BROWSER_CLOSED", + ) _READ_TIMEOUT = 60.0 # seconds to wait for a command line from the client @@ -686,8 +764,8 @@ async def _handle_connection( artifacts = browser.inspect_pending_close_artifacts() except Exception as exc: logger.warning(f"[close] inspect_pending_close_artifacts failed: {exc}") - artifacts = {"session_dir": None, "trace": [], "video": []} - session_dir = artifacts.get("session_dir") or "(unknown)" + artifacts = {"session_dir": "", "trace": [], "video": []} + session_dir = artifacts.get("session_dir") or "" lines = ["Browser closing in background."] if artifacts["trace"]: @@ -696,7 +774,14 @@ async def _handle_connection( if artifacts["video"]: lines.append("Video (generating in background, check later):") lines.extend(f" {p}" for p in artifacts["video"]) - lines.append(f"Close report (generating in background, check later): {session_dir}/close-report.json") + # The close-report is only written when there is at least one + # artifact (otherwise we would leak an empty session dir per + # close call). Only advertise the path when it actually exists. + if session_dir: + lines.append( + f"Close report (generating in background, check later): " + f"{session_dir}/close-report.json" + ) resp = _response(success=True, result="\n".join(lines)) writer.write((json.dumps(resp) + "\n").encode()) @@ -763,11 +848,65 @@ def _write_close_report( logger.warning("[daemon] failed to write close-report.json: %s", exc) +def _resolve_default_downloads_dir() -> Path: + """Pick the best default downloads directory for the daemon. + + Strategy: prefer ~/Downloads (user-familiar), fall back to + ~/.bridgic/bridgic-browser/downloads/ if ~/Downloads is not + writable or cannot be created. + """ + user_downloads = Path.home() / "Downloads" + try: + user_downloads.mkdir(parents=True, exist_ok=True) + # Verify writable by testing with a temp file + probe = user_downloads / ".bridgic_probe" + probe.touch() + probe.unlink() + return user_downloads + except OSError: + pass + + BRIDGIC_DOWNLOADS_DIR.mkdir(parents=True, exist_ok=True) + logger.info( + "[daemon] ~/Downloads not writable, using fallback: %s", + BRIDGIC_DOWNLOADS_DIR, + ) + return BRIDGIC_DOWNLOADS_DIR + + async def run_daemon() -> None: - from bridgic.browser.session._browser import Browser + from bridgic.browser.session._browser import Browser, resolve_cdp_input + + # Resolve CDP connection if requested via env var. + # BRIDGIC_CDP accepts port/url/auto from the user shell, or an already- + # resolved ws:// URL injected by _spawn_daemon() when the CLI client has + # pre-resolved the --cdp flag. Both paths go through resolve_cdp_input(), + # which is a no-op on ws:///wss:// inputs. + _cdp_input: Optional[str] = os.environ.get("BRIDGIC_CDP") + cdp_url: Optional[str] = None + if _cdp_input: + try: + cdp_url = resolve_cdp_input(_cdp_input) + except (RuntimeError, ValueError, ConnectionError) as exc: + raise RuntimeError( + f"Failed to establish CDP connection: {exc}\n" + "Check that the browser is running with --remote-debugging-port " + "or that the CDP URL / port is correct." + ) from exc # Browser.__init__ auto-loads config from files and env vars. - browser = Browser() + kwargs: Dict[str, Any] = {} + if cdp_url: + kwargs["cdp_url"] = cdp_url + + # Auto-enable downloads in daemon mode. + # SDK users are unaffected (they control downloads_path explicitly). + if "downloads_path" not in kwargs: + _cfg_check = _load_config_sources() + if "downloads_path" not in _cfg_check: + kwargs["downloads_path"] = str(_resolve_default_downloads_dir()) + + browser = Browser(**kwargs) logger.info("[daemon] browser ready (lazy start, config=%s)", {k: v for k, v in browser.get_config().items() if k != "proxy"}) stop_event = asyncio.Event() diff --git a/bridgic/browser/session/__init__.py b/bridgic/browser/session/__init__.py index c5df7c7..aeb0193 100644 --- a/bridgic/browser/session/__init__.py +++ b/bridgic/browser/session/__init__.py @@ -1,4 +1,4 @@ -from ._browser import Browser +from ._browser import Browser, find_cdp_url, resolve_cdp_input from ._snapshot import EnhancedSnapshot, RefData, SnapshotGenerator, SnapshotOptions from ._browser_model import PageDesc, PageInfo, PageSizeInfo, FullPageInfo from ._stealth import StealthConfig, StealthArgsBuilder, create_stealth_config @@ -6,6 +6,8 @@ __all__ = [ "Browser", + "find_cdp_url", + "resolve_cdp_input", "EnhancedSnapshot", "RefData", "SnapshotGenerator", diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 40af57c..50c5bf7 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -8,7 +8,7 @@ import tempfile from urllib.parse import urlparse from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union, NoReturn +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Set, Union, NoReturn if TYPE_CHECKING: try: @@ -33,6 +33,7 @@ from ._browser_model import FullPageInfo, PageDesc, PageInfo, PageSizeInfo from ._stealth import StealthConfig, StealthArgsBuilder from ._download import DownloadManager, DownloadedFile +from . import _video_recorder as _video_recorder_mod from ..utils import find_page_by_id, generate_page_id, model_to_llm_string from ..errors import ( BridgicBrowserError, @@ -46,6 +47,267 @@ _DEFAULT_SNAPSHOT_LIMIT = 10000 + +# Chromium-based browser profile directories per platform. +# Used by find_cdp_url(mode="scan") to auto-discover a running browser. +# Source: https://chromium.googlesource.com/chromium/src/+/main/docs/user_data_dir.md +_CDP_SCAN_DIRS: Dict[str, List[tuple]] = { + "darwin": [ + # (browser_label, profile_base_path) + ("Chrome", "~/Library/Application Support/Google/Chrome"), + ("Chrome Canary", "~/Library/Application Support/Google/Chrome Canary"), + ("Chromium", "~/Library/Application Support/Chromium"), + ("Brave", "~/Library/Application Support/BraveSoftware/Brave-Browser"), + ], + "linux": [ + ("Chrome", "~/.config/google-chrome"), + ("Chrome Canary", "~/.config/google-chrome-unstable"), + ("Chromium", "~/.config/chromium"), + ("Brave", "~/.config/BraveSoftware/Brave-Browser"), + ], + "win32": [ + ("Chrome", r"%LOCALAPPDATA%\Google\Chrome\User Data"), + ("Chrome Canary", r"%LOCALAPPDATA%\Google\Chrome SxS\User Data"), + ("Chromium", r"%LOCALAPPDATA%\Chromium\User Data"), + ("Brave", r"%LOCALAPPDATA%\BraveSoftware\Brave-Browser\User Data"), + ], +} + + +def _read_devtools_active_port(base: str) -> Optional[str]: + """Return the ws:// URL from a DevToolsActivePort file, or None if absent/invalid.""" + port_file = os.path.join(base, "DevToolsActivePort") + try: + with open(port_file) as f: + lines = f.read().strip().splitlines() + if len(lines) >= 2: + return f"ws://localhost:{lines[0]}{lines[1]}" + except (OSError, ValueError): + pass + return None + + +def find_cdp_url( + mode: str = "port", + port: int = 9222, + host: str = "localhost", + user_data_dir: Optional[str] = None, + channel: str = "stable", + ws_endpoint: Optional[str] = None, +) -> str: + """Resolve a Chrome CDP WebSocket URL. + + Parameters + ---------- + mode: + - ``"port"`` *(recommended)*: HTTP GET ``/json/version`` on ``host:port``. + Works for both local and remote Chrome, regardless of install path. + Chrome must be started with ``--remote-debugging-port=PORT``. + - ``"file"``: Read ``DevToolsActivePort`` from the Chrome profile directory. + Use ``user_data_dir`` to specify the exact profile path; falling back to + the ``channel`` guess is unreliable with custom installs or multiple instances. + - ``"scan"``: Auto-discover a running Chromium-based browser by scanning all + known profile directories on the current machine (Chrome, Chrome Canary, + Chromium, Brave). Returns the first active one found. + Raises ``RuntimeError`` with instructions if none are running with CDP enabled. + - ``"service"``: Return ``ws_endpoint`` directly (cloud providers such as + Browserless or Steel that give you a ``wss://`` URL). + port: + Debugging port (``"port"`` / ``"file"`` modes). Default 9222. + host: + Server address (``"port"`` mode). Default ``"localhost"``. + user_data_dir: + Explicit Chrome profile directory (``"file"`` mode). + channel: + Chrome channel for built-in path lookup when ``user_data_dir`` is not given + (``"file"`` mode). Values: ``"stable"``, ``"beta"``, ``"canary"``. + ws_endpoint: + Full ``ws://`` or ``wss://`` address (``"service"`` mode). + """ + import urllib.error + import urllib.request + + if mode == "service": + if not ws_endpoint: + raise ValueError("ws_endpoint is required when mode='service'") + return ws_endpoint + + if mode == "port": + # Bracket IPv6 hosts so the URL stays parseable + # (e.g. ``::1`` → ``[::1]``). Plain IPv4 / hostnames pass through + # unchanged. + host_in_url = f"[{host}]" if host and ":" in host else host + url = f"http://{host_in_url}:{port}/json/version" + try: + # Bypass system HTTP proxy for loopback hosts. macOS reads system + # network preferences (proxy_bypass_macosx_sysconf) and may NOT + # bypass localhost even though it should — when a system proxy is + # active, probes return misleading "HTTP 502 Bad Gateway" instead + # of the real "Connection refused" / "Connection timed out". + # Remote hosts (cloud browser services, SSH-tunneled CDP, etc.) + # MUST keep proxy support, so this branch is loopback-only. + host_lower = (host or "").lower().strip() + is_loopback = host_lower in ("localhost", "127.0.0.1", "::1") + if is_loopback: + opener = urllib.request.build_opener( + urllib.request.ProxyHandler({}) + ) + resp = opener.open(url, timeout=5) + else: + resp = urllib.request.urlopen(url, timeout=5) + data = json.loads(resp.read()) + ws_url: str = data["webSocketDebuggerUrl"] + except urllib.error.URLError as exc: + # URLError is the parent of HTTPError; catches connection refused, + # timeouts, DNS failures, and HTTP error responses alike. OSError + # subclasses (e.g. raw socket errors) also flow through URLError + # in practice via urlopen, so this single clause is sufficient. + raise ConnectionError( + f"Cannot reach Chrome debugging interface at {url}: {exc}\n" + f"Make sure Chrome was started with --remote-debugging-port={port}" + ) from exc + except (KeyError, json.JSONDecodeError) as exc: + raise ValueError(f"Failed to parse /json/version response: {exc}") from exc + # Chrome always reports localhost in the URL; replace with the actual + # host when the user passed a remote address. Compare against the + # *normalized* host_lower so callers passing "LOCALHOST" or other + # mixed-case loopback variants still get a clean lowercase URL + # rather than ws://LOCALHOST:9222/... For IPv6 hosts we substitute + # the bracketed form so the resulting URL is parseable. + if host_lower != "localhost": + ws_url = ws_url.replace("localhost", host_in_url, 1) + return ws_url + + if mode == "scan": + platform = sys.platform + candidates = _CDP_SCAN_DIRS.get(platform, []) + if not candidates: + raise RuntimeError(f"Auto-scan is not supported on platform: {platform}") + for label, raw_path in candidates: + base = os.path.expandvars(os.path.expanduser(raw_path)) + ws_url = _read_devtools_active_port(base) + if ws_url: + logger.info("find_cdp_url(scan): found active CDP port via %s (%s)", label, base) + return ws_url + # Nothing found — build a helpful error with instructions + _browsers = ", ".join(label for label, _ in candidates) + raise RuntimeError( + "No locally running browser with remote debugging enabled was found.\n" + f"Scanned profiles for: {_browsers}.\n\n" + "To enable remote debugging, start your browser with:\n" + " --remote-debugging-port=9222\n\n" + "Examples:\n" + ' # macOS Chrome\n' + ' /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome \\\n' + ' --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile\n\n' + ' # Or connect to a cloud browser service:\n' + ' bridgic-browser open --cdp "wss:///chromium/playwright?token=..."' + ) + + if mode != "file": + raise ValueError( + f"Unknown mode {mode!r}. Valid modes: 'port', 'file', 'scan', 'service'." + ) + + if user_data_dir: + base = os.path.expanduser(str(user_data_dir)) + else: + _dirs: Dict[str, Dict[str, str]] = { + "darwin": { + "stable": "~/Library/Application Support/Google/Chrome", + "beta": "~/Library/Application Support/Google/Chrome Beta", + "canary": "~/Library/Application Support/Google/Chrome Canary", + }, + "linux": { + "stable": "~/.config/google-chrome", + "beta": "~/.config/google-chrome-beta", + "canary": "~/.config/google-chrome-unstable", + }, + "win32": { + "stable": r"%LOCALAPPDATA%\Google\Chrome\User Data", + "beta": r"%LOCALAPPDATA%\Google\Chrome Beta\User Data", + "canary": r"%LOCALAPPDATA%\Google\Chrome SxS\User Data", + }, + } + if sys.platform not in _dirs: + raise RuntimeError(f"Unsupported platform for mode='file': {sys.platform}") + _platform_dirs = _dirs[sys.platform] + if channel not in _platform_dirs: + raise ValueError( + f"Unknown channel '{channel}' for platform '{sys.platform}'. " + f"Valid options: {list(_platform_dirs)}" + ) + base = os.path.expandvars(os.path.expanduser(_platform_dirs[channel])) + + port_file = os.path.join(base, "DevToolsActivePort") + if not os.path.exists(port_file): + extra = "" if user_data_dir else "\nOr specify user_data_dir explicitly instead of relying on channel path." + raise FileNotFoundError( + f"DevToolsActivePort not found: {port_file}\n" + f"Make sure Chrome has remote debugging enabled." + extra + ) + with open(port_file) as f: + lines = f.read().strip().splitlines() + if len(lines) < 2: + raise ValueError( + f"DevToolsActivePort file is malformed (expected 2 lines, got {len(lines)}): {port_file}" + ) + return f"ws://localhost:{lines[0]}{lines[1]}" + + +def resolve_cdp_input(value: str) -> str: + """Resolve a user-supplied CDP value to a WebSocket URL. + + Parameters + ---------- + value: + Accepted formats: + + - ``"9222"`` — local Chrome on port 9222; queries /json/version + - ``"ws://..."`` / ``"wss://..."`` — used as-is (raw CDP or Playwright WS protocol) + - ``"http://host:port"`` — HTTP discovery; queries /json/version on that host + - ``"auto"`` / ``"scan"`` — auto-scan known Chrome/Chromium/Brave profile dirs (+ Canary variants) + + Returns + ------- + str + A ``ws://`` or ``wss://`` WebSocket URL ready to pass to ``Browser(cdp_url=...)``. + + Raises + ------ + ValueError + Input format is not recognised. + RuntimeError + ``auto``/``scan`` mode: no running browser with CDP found. + ConnectionError + Port/HTTP mode: cannot reach Chrome at the specified address. + """ + v = value.strip() + # Auto-scan all known local Chrome/Chromium/Brave profile directories + # (matches _CDP_SCAN_DIRS, including Canary variants) + if v.lower() in ("auto", "scan"): + return find_cdp_url(mode="scan") + # Direct WebSocket URL — pass through unchanged + if v.startswith("ws://") or v.startswith("wss://"): + return v + # HTTP discovery endpoint — extract host/port and query /json/version + if v.startswith("http://") or v.startswith("https://"): + parsed = urlparse(v) + host = parsed.hostname or "localhost" + port = parsed.port or 9222 + return find_cdp_url(mode="port", host=host, port=port) + # Bare port number — localhost auto-discover via /json/version + if v.isdigit(): + return find_cdp_url(mode="port", host="localhost", port=int(v)) + raise ValueError( + f"Invalid --cdp value: {v!r}.\n" + "Accepted formats:\n" + " 9222 — local Chrome on port 9222\n" + " ws://host:port/… — WebSocket URL (raw CDP or Playwright WS protocol)\n" + " http://host:port — HTTP discovery endpoint\n" + " auto — auto-scan local Chrome/Chromium/Brave profiles (+ Canary variants)" + ) + _LAUNCH_DEBUG_LOG = str(BRIDGIC_TMP_DIR / "launch-debug.json") @@ -399,7 +661,7 @@ class Browser: - device_scale_factor, is_mobile, has_touch: Device emulation - reduced_motion, forced_colors, contrast: Accessibility - accept_downloads: Auto-accept downloads - - record_har_*, record_video_*: Recording options + - record_har_*: HAR recording options - base_url, strict_selectors, service_workers: Navigation/selector options - client_certificates: TLS client authentication @@ -452,6 +714,8 @@ def __init__( clear_user_data: Optional[bool] = None, # === Stealth mode (enabled by default for best anti-detection) === stealth: Union[bool, StealthConfig, None] = None, + # === CDP connection (connect to an existing Chrome instance) === + cdp_url: Optional[str] = None, # === Browser launch parameters (commonly used) === channel: Optional[str] = None, executable_path: Optional[Union[str, Path]] = None, @@ -480,6 +744,25 @@ def __init__( # Resolve parameters: explicit (non-None) > config > default. # Always pop named-param keys from _cfg so they don't leak into # _extra_kwargs (which would corrupt get_config() and Playwright options). + cdp_url = cdp_url if cdp_url is not None else _cfg.pop('cdp_url', None) + # Normalize cdp_url for *all* sources (config file, explicit ctor arg). + # The CLI client and the daemon already run resolve_cdp_input() before + # they pass us a value, but a config file like {"cdp_url": "9222"} or + # {"cdp_url": "auto"} would otherwise reach Playwright's + # connect_over_cdp() unchanged and crash deep in the driver. ws:// + # and wss:// inputs short-circuit (no extra work, no I/O). + if cdp_url is not None and not ( + isinstance(cdp_url, str) + and (cdp_url.startswith("ws://") or cdp_url.startswith("wss://")) + ): + try: + cdp_url = resolve_cdp_input(str(cdp_url)) + except (RuntimeError, ValueError, ConnectionError) as exc: + raise InvalidInputError( + f"Failed to resolve cdp_url={cdp_url!r}: {exc}", + code="INVALID_CDP_URL", + details={"cdp_url": cdp_url, "source": "config_or_argument"}, + ) from exc headless = headless if headless is not None else _cfg.pop('headless', True) stealth = stealth if stealth is not None else _cfg.pop('stealth', True) viewport = viewport if viewport is not None else _cfg.pop('viewport', None) @@ -503,11 +786,11 @@ def __init__( color_scheme = color_scheme if color_scheme is not None else _cfg.pop('color_scheme', None) # Remove any named-param keys that were skipped above (explicit value won) for _named_key in ( - 'headless', 'stealth', 'viewport', 'user_data_dir', 'clear_user_data', 'channel', - 'executable_path', 'proxy', 'timeout', 'slow_mo', 'args', - 'ignore_default_args', 'downloads_path', 'devtools', 'user_agent', - 'locale', 'timezone_id', 'ignore_https_errors', 'extra_http_headers', - 'offline', 'color_scheme', + 'cdp_url', 'headless', 'stealth', 'viewport', 'user_data_dir', + 'clear_user_data', 'channel', 'executable_path', 'proxy', 'timeout', + 'slow_mo', 'args', 'ignore_default_args', 'downloads_path', 'devtools', + 'user_agent', 'locale', 'timezone_id', 'ignore_https_errors', + 'extra_http_headers', 'offline', 'color_scheme', ): _cfg.pop(_named_key, None) @@ -540,7 +823,7 @@ def __init__( # Stealth configuration self._stealth_config: Optional[StealthConfig] = None self._stealth_builder: Optional[StealthArgsBuilder] = None - self._temp_video_dir: Optional[str] = None # For auto-created video dir + self._preallocated_trace_path: Optional[str] = None self._close_session_dir: Optional[str] = None @@ -560,6 +843,17 @@ def __init__( if self._stealth_config and self._stealth_config.enabled: self._stealth_builder = StealthArgsBuilder(self._stealth_config) + # CDP connection URL (if set, connect_over_cdp() is used instead of launch) + self._cdp_url = cdp_url + # Whether bridgic created the CDP context (vs borrowing an existing one). + # When True, close() will close the context; when False it only disconnects. + self._cdp_context_owned = False + # Pages bridgic explicitly created inside the (possibly borrowed) CDP + # context. close() uses this to clean up bridgic's own tabs without + # touching the user's existing tabs. Unused in non-CDP modes (kept + # empty as a defensive default). + self._cdp_owned_pages: Set[Any] = set() + # Browser launch parameters self._channel = channel self._executable_path = Path(executable_path).expanduser() if executable_path else None @@ -612,10 +906,15 @@ def __init__( # Context-scoped state (keyed by _get_context_key) self._tracing_state: Dict[str, bool] = {} self._video_state: Dict[str, bool] = {} - # Deferred video save requests from stop_video(): context_key → target filename. - # None means save to the Playwright temp path (stop_video called without filename). - # Key absent means stop_video was not called for this context. - self._pending_video_save_path: Dict[str, Optional[str]] = {} + # Multi-page CDP screencast video recording state. + # Mirrors Playwright CLI behaviour (packages/playwright-core/src/tools/ + # backend/context.ts — ``startVideoRecording`` / ``stopVideoRecording``): + # one ``start_video`` call records EVERY page in the context, including + # pages opened after start, each to its own .webm file. + self._video_recorders: Dict[Any, "_video_recorder_mod.VideoRecorder"] = {} + # When a recording session is active, holds {"width", "height", + # "context", "page_listener"}. None means no active session. + self._video_session: Optional[Dict[str, Any]] = None # ==================== Properties ==================== @@ -624,9 +923,14 @@ def use_persistent_context(self) -> bool: """Whether to use persistent context mode (unrelated to headless/headed mode). Priority (highest to lowest): + - cdp_url is set → always False (connect to existing browser) - clear_user_data=True → always False (fresh launch+new_context, user_data_dir ignored) - clear_user_data=False → always True (persistent; user_data_dir if set, else default dir) """ + # CDP mode: connect to existing browser, never use persistent context + if self._cdp_url is not None: + return False + return not self._clear_user_data @property @@ -683,6 +987,47 @@ def channel(self) -> Optional[str]: """Browser distribution channel.""" return self._channel + @property + def last_close_artifacts(self) -> Dict[str, List[str]]: + """Trace and video paths produced by the most recent ``close()`` call. + + Returns + ------- + Dict[str, List[str]] + ``{"trace": [...], "video": [...]}``. The lists are empty + when ``close()`` ran but produced no artifacts, and also + when ``close()`` has never been called on this instance. + + Notes + ----- + Returns a fresh shallow copy on every access — mutating the + returned dict (or its inner lists) does not affect the + browser's internal state, and a subsequent ``close()`` will + not clobber the copy you already hold. + """ + src = self._last_shutdown_artifacts or {} + return { + "trace": list(src.get("trace", [])), + "video": list(src.get("video", [])), + } + + @property + def last_close_errors(self) -> List[str]: + """Warnings/errors collected during the most recent ``close()`` call. + + Returns + ------- + List[str] + One entry per cleanup step that raised. Empty when + ``close()`` succeeded cleanly or has never been called. + + Notes + ----- + Returns a fresh copy on every access; mutating it does not + affect the browser's internal state. + """ + return list(self._last_shutdown_errors or []) + def get_config(self) -> Dict[str, Any]: """Get all current browser configuration. @@ -714,6 +1059,7 @@ def get_config(self) -> Dict[str, Any]: "extra_http_headers": self._extra_http_headers, "offline": self._offline, "color_scheme": self._color_scheme, + "cdp_url": self._cdp_url, "use_persistent_context": self.use_persistent_context, **self._extra_kwargs, } @@ -839,9 +1185,13 @@ def _get_launch_options(self) -> Dict[str, Any]: options["devtools"] = self._devtools if self._proxy is not None: options["proxy"] = self._proxy - # NOTE: Don't pass downloads_path to Playwright - DownloadManager handles it - # Passing downloads_path to Playwright causes files to be saved with hash names - # Our DownloadManager uses download.save_as() to save with correct filenames + # NOTE: We intentionally do NOT pass downloads_path to Playwright. + # Playwright uses CDP `Browser.setDownloadBehavior(allowAndName)` to + # intercept all downloads, which breaks Chrome's native download UI + # (e.g. "Show in Folder" does nothing). This is a known Chromium bug: + # https://issues.chromium.org/issues/324282051 + # Instead, DownloadManager uses download.save_as() to copy files with + # correct filenames to the user's downloads_path. if self._slow_mo is not None: options["slow_mo"] = self._slow_mo @@ -911,21 +1261,12 @@ def _get_context_options(self) -> Dict[str, Any]: "accept_downloads", "base_url", "strict_selectors", "service_workers", "record_har_path", "record_har_omit_content", "record_har_url_filter", "record_har_mode", "record_har_content", - "record_video_dir", "record_video_size", "client_certificates" } for key in context_keys: if key in self._extra_kwargs: options[key] = self._extra_kwargs[key] - # Auto-create a default video dir so video recording is always available - if "record_video_dir" not in options: - if not self._temp_video_dir: - self._temp_video_dir = str(BRIDGIC_TMP_DIR) - os.makedirs(self._temp_video_dir, exist_ok=True) - logger.info(f"Using default video dir: {self._temp_video_dir}") - options["record_video_dir"] = self._temp_video_dir - return options def _get_persistent_context_options(self) -> Dict[str, Any]: @@ -978,7 +1319,63 @@ async def _start(self) -> None: try: self._playwright = await async_playwright().start() - if self.use_persistent_context: + if self._cdp_url: + # Mode 0: Connect to an already-running Chrome via raw CDP. + # Stealth launch args and extensions cannot be applied to an existing + # browser process, so they are skipped here. The JS init script is + # still registered so that new pages opened in this session receive it. + logger.info("Using CDP connect mode (url=%s)", self._cdp_url) + self._browser = await self._playwright.chromium.connect_over_cdp(self._cdp_url) + # Playwright invariant for connect_over_cdp() (verified + # against playwright-core 1.57): + # chromium.ts _connectOverCDPImpl always passes + # persistent={noDefaultViewport: true}, so + # crBrowser.ts:_connect skips the early `if (!options.persistent)` + # branch and creates `_defaultContext`. The Node-side + # browserDispatcher then dispatches it as a `context` + # event, which the Python client appends to + # `Browser._contexts`. + # Net effect: ``self._browser.contexts`` is never empty + # in current Playwright versions. The else branch below + # is a defensive fallback in case this invariant ever + # changes upstream. + if self._browser.contexts: + self._context = self._browser.contexts[0] + self._cdp_context_owned = False + else: + self._context = await self._browser.new_context(**self._get_context_options()) + self._cdp_context_owned = True + + # Inject JS stealth patches only in headless mode. Headed mode + # skips the script to avoid breaking Cloudflare Turnstile (same + # rationale as the non-CDP code path below). + if self._stealth_builder and self._headless: + init_script = self._stealth_builder.get_init_script(locale=self._locale) + if init_script: + await self._context.add_init_script(init_script) + + # Always create a new bridgic-owned tab. We never reuse a + # borrowed user tab — the very next navigate_to() would + # otherwise overwrite whatever the user was looking at. + # In owned-context mode the new context is empty anyway, so + # this is a no-op cost. + existing_count = len(self._context.pages) + self._page = await self._context.new_page() + self._cdp_owned_pages.add(self._page) + logger.info( + "[CDP] connected; created new bridgic tab " + "(borrowed_context=%s, preserved_existing_tabs=%d)", + not self._cdp_context_owned, + existing_count, + ) + + if self._download_manager: + self._download_manager.attach_to_context(self._context) + + logger.info("Playwright started (mode=cdp, stealth_js=%s)", self.stealth_enabled) + return + + elif self.use_persistent_context: # Mode 1: Persistent context (clear_user_data=False) logger.info("Using persistent context mode") persistent_options = self._get_persistent_context_options() @@ -1063,8 +1460,6 @@ async def _ensure_started(self) -> None: # shutdown so that a hung beforeunload handler cannot block forever. _PAGE_CLOSE_TIMEOUT = 5.0 _TRACE_STOP_TIMEOUT = 10.0 - _VIDEO_PATH_TIMEOUT = 10.0 - _VIDEO_SAVE_AS_TIMEOUT = 120.0 # save_as copies a file; large recordings need more time _CONTEXT_CLOSE_TIMEOUT = 15.0 _BROWSER_CLOSE_TIMEOUT = 15.0 _PLAYWRIGHT_STOP_TIMEOUT = 15.0 @@ -1193,21 +1588,21 @@ def inspect_pending_close_artifacts(self) -> Dict[str, Any]: Returns ------- Dict with keys: - session_dir : str — unique per-close directory under BRIDGIC_TMP_DIR + session_dir : str — unique per-close directory under + BRIDGIC_TMP_DIR, or "" when no + artifact will be produced trace : List[str] — pre-created trace path (if tracing is active) video : List[str] — pre-allocated video paths in session dir - """ - import random - from datetime import datetime - - ts = datetime.now().strftime("%Y%m%d-%H%M%S") - session_name = f"close-{ts}-{random.randint(0, 0xffff):04x}" - session_dir = Path(str(BRIDGIC_TMP_DIR)) / session_name - session_dir.mkdir(parents=True, exist_ok=True) - self._close_session_dir = str(session_dir) + Notes + ----- + We deliberately skip creating the session directory when no + tracing/video session is active. Otherwise every SDK ``close()`` + call would leak an empty ``close--`` directory under + ``BRIDGIC_TMP_DIR``, which previously accumulated indefinitely. + """ artifacts: Dict[str, Any] = { - "session_dir": str(session_dir), + "session_dir": "", "trace": [], "video": [], } @@ -1217,33 +1612,37 @@ def inspect_pending_close_artifacts(self) -> Dict[str, Any]: context_key = _get_context_key(self._context) + tracing_active = bool(self._tracing_state.get(context_key)) + video_count = len(self._video_recorders) + if not tracing_active and video_count == 0: + # Nothing to write — don't create a directory. + return artifacts + + import random + from datetime import datetime + + ts = datetime.now().strftime("%Y%m%d-%H%M%S") + session_name = f"close-{ts}-{random.randint(0, 0xffff):04x}" + session_dir = Path(str(BRIDGIC_TMP_DIR)) / session_name + session_dir.mkdir(parents=True, exist_ok=True) + self._close_session_dir = str(session_dir) + artifacts["session_dir"] = str(session_dir) + # Pre-allocate trace path inside session dir - if self._tracing_state.get(context_key): + if tracing_active: trace_path = str(session_dir / "trace.zip") Path(trace_path).touch() # create empty file; tracing.stop() will overwrite self._preallocated_trace_path = trace_path artifacts["trace"].append(trace_path) - # Determine video artifact info - _absent: Any = object() - pending_raw = self._pending_video_save_path.get(context_key, _absent) - has_pending = pending_raw is not _absent - - if self._video_state.get(context_key) or has_pending: - if has_pending and pending_raw: - artifacts["video"].append(os.path.abspath(str(pending_raw))) + # Pre-allocate one video path per active recorder. Multi-page + # recording produces N files: video.webm, video-1.webm, ... + for i in range(video_count): + if i == 0: + video_path = str(session_dir / "video.webm") else: - # Pre-allocate video paths inside session dir so all artifacts - # are grouped together instead of scattered in tmp/ with hashes. - pages_with_video = [ - p for p in list(self._context.pages) - if getattr(p, "video", None) is not None - ] - need_suffix = len(pages_with_video) > 1 - for i in range(len(pages_with_video)): - suffix = f"_{i + 1}" if need_suffix else "" - video_path = str(session_dir / f"video{suffix}.webm") - artifacts["video"].append(video_path) + video_path = str(session_dir / f"video-{i}.webm") + artifacts["video"].append(video_path) return artifacts @@ -1256,6 +1655,11 @@ async def close(self) -> str: beforehand. Active tracing/video sessions are auto-finalized and their paths included in the result. + **CDP mode**: only disconnects the Playwright session from the remote + browser — pages, tabs, and borrowed contexts are left intact. A + context created by bridgic (when ``connect_over_cdp`` returned no + existing contexts) is closed normally. + Safe to call even when the browser was never started — returns ``"Browser closed."`` immediately without raising. @@ -1283,6 +1687,10 @@ async def close(self) -> str: # the very end. This ensures no Playwright/Chromium process is left orphaned # just because one step was interrupted. _pending_cancel: Optional[BaseException] = None + _is_cdp = self._cdp_url is not None + # True when we are a guest on someone else's browser — must not + # close pages, navigate, or destroy the borrowed context. + _cdp_borrowed = _is_cdp and not self._cdp_context_owned # Auto-stop active tracing before context/page teardown so trace data is saved. if self._context: @@ -1334,102 +1742,61 @@ async def close(self) -> str: finally: self._tracing_state[context_key] = False - # Always clear page-scoped listeners/caches for every context page. - for page in list(self._context.pages): - self._clear_page_scoped_state(page, errors) - - # Navigate all pages to about:blank before video finalization to - # terminate service workers and ongoing network activity. This - # prevents context.close() from hanging later. + # Stop every active CDP screencast recorder (one per page). + # Mirrors Playwright CLI's context.ts ``dispose()`` → + # ``stopVideoRecording()``: when the context closes, every + # per-page recorder is finalized. # - # Must run BEFORE video finalization because _finalize_video() - # calls page.close() for each page — after that the page list is - # empty and about:blank would be a no-op. - for _nav_page in list(self._context.pages): - try: - await asyncio.wait_for( - _nav_page.goto("about:blank", wait_until="commit"), - timeout=self._PAGE_CLOSE_TIMEOUT, - ) - except Exception as exc: - logger.debug("close: about:blank navigation failed: %s", exc) - except BaseException as e: - if _pending_cancel is None: - _pending_cancel = e - - # Save videos when: (a) video_start() was called and never stopped, or - # (b) stop_video() deferred the save to close time. - # Use a sentinel because pop() returns None both for "absent" and "stored None". - _absent: Any = object() - pending_save_raw = self._pending_video_save_path.pop(context_key, _absent) - has_pending_save = pending_save_raw is not _absent - pending_filename: Optional[str] = pending_save_raw if has_pending_save else None # type: ignore[assignment] - - if self._video_state.get(context_key) or has_pending_save: - pages_with_video = [ - (p, p.video) - for p in list(self._context.pages) - if getattr(p, "video", None) is not None - ] - - need_suffix = len(pages_with_video) > 1 - dest_dir: Optional[str] = None - dest_stem: Optional[str] = None - dest_ext = ".webm" - if pending_filename: - dest_dir = os.path.dirname(pending_filename) - dest_stem = os.path.splitext(os.path.basename(pending_filename))[0] - elif self._close_session_dir: - # No explicit filename — save into session dir so all - # close artifacts are grouped together. - dest_dir = self._close_session_dir - dest_stem = "video" - - async def _finalize_video(page_: Any, video_: Any, idx: int) -> Optional[str]: - await asyncio.wait_for(page_.close(), timeout=self._PAGE_CLOSE_TIMEOUT) - if dest_dir is not None and dest_stem is not None: - suffix = f"_{idx}" if need_suffix else "" - dest = os.path.join(dest_dir, f"{dest_stem}{suffix}{dest_ext}") - await asyncio.wait_for( - video_.save_as(dest), - timeout=self._VIDEO_SAVE_AS_TIMEOUT, + # Why we snapshot the dict before awaiting: + # stop_video() and close() can race in the daemon flow. We + # clear the dict first so the other path observes "no work + # left" and skips the duplicate stop() call. + if self._video_recorders or self._video_session is not None: + # Detach the context "page" listener so new pages aren't + # auto-started during shutdown. + if self._video_session: + _listener = self._video_session.get("page_listener") + if _listener is not None: + try: + self._context.remove_listener("page", _listener) + except Exception: + pass + _recorders = list(self._video_recorders.items()) + self._video_recorders.clear() + self._video_session = None + for _idx, (_page_ref, _recorder) in enumerate(_recorders): + try: + rec_path = await asyncio.wait_for( + _recorder.stop(), timeout=10.0 ) - return dest - vp = await asyncio.wait_for( - video_.path(), - timeout=self._VIDEO_PATH_TIMEOUT, - ) - return os.path.abspath(str(vp)) + # Move the video file into the close-session dir, + # next to the trace. + if self._close_session_dir: + if _idx == 0: + dest_name = "video.webm" + else: + dest_name = f"video-{_idx}.webm" + dest = os.path.join(self._close_session_dir, dest_name) + self._move_video_local(Path(rec_path), dest) + shutdown_artifacts["video"].append(dest) + else: + shutdown_artifacts["video"].append(rec_path) + except asyncio.TimeoutError: + errors.append("video_recorder.stop: timeout after 10.0s") + except Exception as e: + errors.append(f"video_recorder.stop: {e}") + except BaseException as e: + errors.append(f"video_recorder.stop: {e}") + if _pending_cancel is None: + _pending_cancel = e + self._video_state.pop(context_key, None) - results = await asyncio.gather( - *(_finalize_video(p, v, i + 1) for i, (p, v) in enumerate(pages_with_video)), - return_exceptions=True, - ) - for r in results: - if isinstance(r, BaseException): - errors.append(f"video.finalize: {r}") - elif r is not None: - shutdown_artifacts["video"].append(r) - - self._video_state[context_key] = False - # We may have closed the current page above. - self._page = None + # Always clear page-scoped listeners/caches for every context page. + for page in list(self._context.pages): + self._clear_page_scoped_state(page, errors) else: self._clear_page_scoped_state(self._page, errors) - # Close page (with timeout to guard against hung beforeunload handlers) - if self._page: - _page = self._page - self._page = None - try: - await asyncio.wait_for( - _page.close(), timeout=self._PAGE_CLOSE_TIMEOUT, - ) - except BaseException as e: - errors.append(f"page.close: {e}") - if not isinstance(e, Exception) and _pending_cancel is None: - _pending_cancel = e - # Detach download manager before context closes to remove handlers if self._download_manager and self._context: try: @@ -1437,24 +1804,69 @@ async def _finalize_video(page_: Any, video_: Any, idx: int) -> Optional[str]: except Exception as e: errors.append(f"download_manager.detach: {e}") - # Close all remaining pages in context before closing context. - # This avoids context.close() hanging on beforeunload handlers of extra - # tabs the user may have opened manually (or pages we didn't track). + # Close every page in parallel (replaces the old serial + # about:blank → close walk). + # + # Why we no longer navigate to about:blank first: + # The previous code navigated each page to about:blank to stop + # service workers, then closed it. Playwright CLI does not do + # this — it just calls close() directly. ``run_before_unload= + # False`` already aborts in-flight activity, and parallel close + # is much faster than serial about:blank + close. + # + # Why asyncio.gather: + # Tab closes are independent; serializing them would compound + # the per-page timeout. Reference: Playwright's + # browserContext.ts ``close()`` also closes pages in parallel. + # + # CDP borrowed context: only close pages bridgic created itself + # (``_cdp_owned_pages``); never touch the user's existing tabs. + self._page = None if self._context: - for extra_page in list(self._context.pages): - try: - await asyncio.wait_for( - extra_page.close(run_before_unload=False), - timeout=self._PAGE_CLOSE_TIMEOUT, + if _cdp_borrowed: + # Borrowed CDP context: only close pages bridgic created. + # Skip pages already closed by the user (via Chrome UI). + owned = [ + p for p in self._cdp_owned_pages + if not p.is_closed() + ] + if owned: + page_results = await asyncio.gather( + *(asyncio.wait_for( + p.close(run_before_unload=False), + timeout=self._PAGE_CLOSE_TIMEOUT, + ) for p in owned), + return_exceptions=True, ) - except BaseException as e: - if not isinstance(e, Exception) and _pending_cancel is None: - _pending_cancel = e - # best-effort; context.close() will handle remaining pages + for r in page_results: + if isinstance(r, BaseException): + if not isinstance(r, Exception) and _pending_cancel is None: + _pending_cancel = r + elif isinstance(r, Exception): + errors.append(f"cdp_owned_page.close: {r}") + else: + # Launch / persistent / owned-CDP-context: close all pages. + all_pages = list(self._context.pages) + if all_pages: + page_results = await asyncio.gather( + *(asyncio.wait_for( + p.close(run_before_unload=False), + timeout=self._PAGE_CLOSE_TIMEOUT, + ) for p in all_pages), + return_exceptions=True, + ) + for r in page_results: + if isinstance(r, BaseException): + if not isinstance(r, Exception) and _pending_cancel is None: + _pending_cancel = r + elif isinstance(r, Exception): + errors.append(f"page.close: {r}") # Close context # NOTE: In persistent context mode, closing context will auto close browser - if self._context: + # CDP mode: only close the context if bridgic created it; borrowed contexts + # belong to the remote browser and must not be destroyed. + if self._context and not _cdp_borrowed: _context = self._context self._context = None try: @@ -1479,9 +1891,15 @@ async def _finalize_video(page_: Any, video_: Any, idx: int) -> Optional[str]: errors.append(f"context.close: {e}") if _pending_cancel is None: _pending_cancel = e + elif self._context: + # CDP borrowed context: release reference without closing + self._context = None - # Close browser (only needed in normal launch mode, not persistent context) - # In persistent context mode, browser is None or already closed + # Close browser. + # - Normal launch mode: closes browser process. + # - Persistent context mode: browser is None or already closed via context. + # - CDP mode: close() disconnects the Playwright session without killing the + # remote Chrome process (the process continues running after disconnect). if self._browser: _browser = self._browser self._browser = None @@ -1543,7 +1961,7 @@ async def _finalize_video(page_: Any, video_: Any, idx: int) -> Optional[str]: self._dialog_handlers.clear() self._tracing_state.clear() self._video_state.clear() - self._pending_video_save_path.clear() + self._cdp_owned_pages.clear() trace_paths = shutdown_artifacts.get("trace", []) video_paths = shutdown_artifacts.get("video", []) @@ -1651,6 +2069,8 @@ async def navigate_to( # All tabs were closed (e.g. via close_tab); _context is still alive. logger.info("No page is open, creating a new page in existing context") self._page = await self._context.new_page() + if self._cdp_url: + self._cdp_owned_pages.add(self._page) kwargs: Dict[str, Any] = {"wait_until": wait_until} if timeout is not None: @@ -1684,6 +2104,8 @@ async def _new_page( code="NO_BROWSER_CONTEXT", ) self._page = await self._context.new_page() + if self._cdp_url: + self._cdp_owned_pages.add(self._page) if url: await self.navigate_to(url, wait_until=wait_until, timeout=timeout) await self._page.bring_to_front() @@ -1714,8 +2136,26 @@ async def get_all_page_descs(self) -> List[PageDesc]: return page_descs def get_pages(self) -> List[Page]: + """Return the pages bridgic considers part of its session. + + Launch / persistent / CDP-with-owned-context modes: every page in + the context belongs to bridgic, so we return them all. + + CDP borrowed-context mode: bridgic is a guest on the user's real + Chrome session. We must only expose tabs bridgic explicitly + created (tracked in ``_cdp_owned_pages``); the user's tabs and + any pop-ups they spawn are off-limits — they should not appear + in ``get_tabs``, be selectable via ``switch_tab``, become the + fallback "current page" after ``close_tab``, or be visible to + any of the page-iterating tools. This invariant is documented + in ``docs/CDP_MODE.md``. + """ if not self._context: return [] + if self._cdp_url and not self._cdp_context_owned: + # Preserve the underlying tab order so indices stay stable. + owned = self._cdp_owned_pages + return [p for p in self._context.pages if p in owned] return self._context.pages async def switch_to_page(self, page_id: str) -> tuple[bool, str]: @@ -1776,11 +2216,42 @@ async def _close_page(self, page: Page | str) -> tuple[bool, str]: if not page: logger.warning("Page is None, can't close") return False, "Page is None, can't close" - await page.close() - # If the closed page is the current page, switch to another + # If the page being closed is currently recording, stop its + # recorder first and remove it from the registry. Why: a + # VideoRecorder's CDP session is bound to a specific page; once + # the page is closed the CDP session is dead and any later + # stop()/detach() call would block waiting on a 10 s timeout. + # Recorders for other pages stay active — same multi-page + # semantics as Playwright CLI. + if page in self._video_recorders: + _recorder = self._video_recorders.pop(page) + try: + await asyncio.wait_for(_recorder.stop(), timeout=10.0) + logger.debug("[_close_page] auto-stopped video recorder for closing page") + except Exception as e: + logger.debug("[_close_page] video recorder stop error: %s", e) + + try: + await page.close() + finally: + # Drop our ownership reference now that the close attempt is + # done. Without this discard, every CDP-mode new-tab + + # close-tab cycle would leak a Page object (frames, + # listeners, cached resources) for the lifetime of the + # daemon. We discard inside `finally` so a raised + # page.close() can't leave a stale reference behind. + # discard() is a no-op in launch / persistent / + # owned-CDP-context modes where _cdp_owned_pages stays empty. + self._cdp_owned_pages.discard(page) + + # If the closed page is the current page, switch to another. + # Use the bridgic-visible page list (get_pages) so that in CDP + # borrowed mode we never silently land on a user tab — operating + # on the user's banking / email page after a close_tab would be + # a serious privacy violation. if self._page == page: - pages = self._context.pages + pages = self.get_pages() self._page = pages[0] if pages else None # Clear snapshot cache self._last_snapshot = None @@ -6450,90 +6921,363 @@ async def stop_tracing(self, filename: Optional[str] = None) -> str: logger.error(f"[stop_tracing] {error_msg}") _raise_operation_error(error_msg) + @staticmethod + def _allocate_video_temp_path() -> str: + """Generate a unique temp .webm path for one page's recording. + + Uses ``tempfile.mkstemp`` (O_EXCL) so the path is guaranteed + unique even when many recorders are allocated within the same + second — a previous timestamp+random scheme had a non-zero + collision risk under burst multi-page start_video() calls. + We immediately remove the empty file because ffmpeg insists on + creating the output itself. + """ + os.makedirs(BRIDGIC_TMP_DIR, exist_ok=True) + fd, path = tempfile.mkstemp( + prefix="video_", suffix=".webm", dir=str(BRIDGIC_TMP_DIR) + ) + os.close(fd) + try: + os.unlink(path) + except OSError: + pass + return path + + async def _start_page_video_recorder(self, page: Page) -> None: + """Start a VideoRecorder for one page within the active session. + + Idempotent — a no-op if the session is inactive or the page is + already being recorded. Mirrors Playwright CLI's + ``Context._startPageVideo`` (``tools/backend/context.ts``). + + In CDP borrowed-context mode, skip pages bridgic does not own + (the user's existing tabs and any pop-ups they spawn). Recording + a user's banking / email tab without consent would be a serious + privacy violation, and contradicts the tab-ownership invariant + documented in ``docs/CDP_MODE.md``. + """ + if self._video_session is None: + return + if page in self._video_recorders: + return + if page.is_closed(): + return + # CDP borrowed context: only record bridgic-owned tabs. + if self._cdp_url and not self._cdp_context_owned and page not in self._cdp_owned_pages: + return + + output_path = self._allocate_video_temp_path() + w = int(self._video_session["width"]) + h = int(self._video_session["height"]) + recorder = _video_recorder_mod.VideoRecorder( + page.context, page, output_path, (w, h), + ) + try: + await recorder.start() + except Exception as e: + logger.warning( + "[start_video] failed to start recorder on page %s: %s", page, e, + ) + return + self._video_recorders[page] = recorder + logger.info("[start_video] recording page → %s", output_path) + async def start_video( self, width: Optional[int] = None, height: Optional[int] = None, ) -> str: - """Mark the current page's video recording session as active. - - Video recording is always running — Playwright starts recording as soon - as a page is created (using the ``record_video_dir`` set at browser - creation, which defaults to ``~/.bridgic/bridgic-browser/tmp``). This method simply - marks the session as "started" so that :meth:`stop_video` can later - register where to save the file. + """Start video recording on ALL pages in the context. - Use ``stop_video(filename)`` to designate a save path; the actual file - is written when the browser closes. + Mirrors the Playwright CLI behaviour + (``packages/playwright-core/src/tools/backend/context.ts`` — + ``startVideoRecording``): a single call records every page in the + browser context, and any page opened afterwards is auto-recorded + to its own .webm file. Uses CDP ``Page.startScreencast`` to + capture frames and pipes them to ffmpeg for VP8/WebM encoding — + no Playwright RPC streaming needed. Parameters ---------- width : Optional[int], optional - Accepted for API compatibility but **not used** — video resolution - is determined by ``record_video_size`` passed at ``Browser()`` - creation time, not here. + Video width in pixels. Defaults to the current viewport width + (rounded down to an even number). Pass an explicit value to + override — e.g. to downscale a 4K viewport. height : Optional[int], optional - Accepted for API compatibility but **not used** — see ``width``. + Video height in pixels. Defaults to the current viewport height + (rounded down to an even number). Returns ------- str - "Video recording started". - - Raises - ------ - StateError - If no active page is available, or if no video is attached to the - current page (should not occur under normal operation). - OperationError - If an unexpected error occurs. + "Video recording started" (plus the number of pages being + recorded). """ + logger.info(f"[start_video] start width={width} height={height}") + + # Validation runs BEFORE any state mutation so that "already active" / + # "no active page" errors cannot trigger the rollback path below — that + # path would otherwise tear down the *previous* successful session. + page = await self.get_current_page() + if page is None: + _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") + + context = page.context + context_key = _get_context_key(context) + + if self._video_session is not None or self._video_state.get(context_key): + _raise_state_error("Video recording already active", code="VIDEO_ALREADY_ACTIVE") + + # Compute the recording size. + # + # NOTE: this intentionally diverges from Playwright's screencast.ts + # ``startScreencast()`` (lines 90-98), which caps the longest side at + # 800 px to keep encoder cost low. That cap is the dominant source of + # blur for bridgic recordings: with a typical 1280×800 viewport, Chrome + # downsamples to 800×500 *inside the browser* before frames ever reach + # ffmpeg, so no encoder tuning can recover the lost detail. Bridgic + # videos are usually replayed by humans inspecting an LLM session where + # legibility wins over a few extra MB of CPU and disk. + # + # Default policy: record at the page's actual CSS pixel dimensions. + # We query ``window.innerWidth/innerHeight`` directly instead of + # trusting ``page.viewport_size``: + # + # - launch mode with explicit viewport: both agree + # - launch mode without an explicit viewport: both agree + # - CDP attach mode: ``page.viewport_size`` is ``None`` because + # bridgic never called ``setViewportSize`` on the foreign Chrome. + # Falling back to a hard-coded ``800×600`` is almost always wrong: + # the real window is wider (typically 16:9), so Chrome downsamples + # to fit within 800×600 and ffmpeg's ``pad`` filter adds a gray + # strip at the bottom to make up the difference. Querying + # ``window.innerWidth/innerHeight`` returns the true visible area + # for any of the three modes. + # ``& ~1``: round down to an even number — VP8 requires even + # width and height. + viewport_width = 1280 + viewport_height = 720 try: - logger.info(f"[start_video] start width={width} height={height}") + dims = await page.evaluate( + "() => ({w: window.innerWidth, h: window.innerHeight})" + ) + qw = int(dims.get("w") or 0) + qh = int(dims.get("h") or 0) + if qw > 0 and qh > 0: + viewport_width = qw + viewport_height = qh + else: + raise ValueError(f"non-positive dimensions: {dims}") + except Exception as exc: + # Fall back to viewport_size, then the hard default above. Logged + # but non-fatal so a hardened CSP page can still record. + logger.warning( + "[start_video] could not query window dimensions (%s); " + "falling back to page.viewport_size", exc, + ) + vp = page.viewport_size + if vp: + viewport_width = int(vp["width"]) or viewport_width + viewport_height = int(vp["height"]) or viewport_height + + w = (width or viewport_width) & ~1 + h = (height or viewport_height) & ~1 + + # Build the session record up front so _start_page_video_recorder + # picks up the parameters. From this point on, any failure must + # roll back the partially-set-up session state. + self._video_session = { + "width": w, + "height": h, + "context": context, + "page_listener": None, + } + self._video_recorders = {} + self._video_state[context_key] = True - page = await self.get_current_page() - if page is None: - _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") + try: + # Start a recorder on every existing page. + # Mirrors Playwright CLI's context.ts ``startVideoRecording``: + # for (const page of browserContext.pages()) + # await this._startPageVideo(page); + # + # CDP borrowed context: restrict to bridgic-owned pages so we + # never record the user's existing tabs. + # _start_page_video_recorder double-checks this, but + # filtering up front makes intent obvious and avoids spurious + # "page start failed" log lines. + if self._cdp_url and not self._cdp_context_owned: + existing_pages = [ + p for p in self._cdp_owned_pages if not p.is_closed() + ] + else: + existing_pages = [p for p in context.pages if not p.is_closed()] + for p in existing_pages: + try: + await self._start_page_video_recorder(p) + except Exception as e: + logger.warning("[start_video] page start failed: %s", e) + + # Subscribe to future pages so newly opened tabs are also + # recorded. Mirrors Playwright CLI's context.ts + # ``_onPageCreated`` → ``_startPageVideo``. Playwright + # Python's context.on("page") calls the handler synchronously + # with the new Page, so async work has to be scheduled as a + # task. + def _on_page_created(new_page: Page) -> None: + try: + asyncio.get_running_loop().create_task( + self._start_page_video_recorder(new_page), + ) + except RuntimeError: + logger.warning( + "[start_video] no running loop to record new page", + ) - context = page.context - context_key = _get_context_key(context) + context.on("page", _on_page_created) + self._video_session["page_listener"] = _on_page_created - if page.video: - self._video_state[context_key] = True - result = "Video recording started" - logger.info(f"[start_video] done {result}") - return result - else: - _raise_state_error("No video recording available for this page", code="NO_ACTIVE_RECORDING") - except BridgicBrowserError: - raise + count = len(self._video_recorders) + result = f"Video recording started ({count} page{'s' if count != 1 else ''})" + logger.info("[start_video] %s", result) + return result except Exception as e: + # Rollback the session state we set up above so future + # start_video() calls are not blocked by a phantom session. + self._video_session = None + for _rec in list(self._video_recorders.values()): + try: + await _rec.stop() + except Exception: + pass + self._video_recorders.clear() + self._video_state.pop(context_key, None) + if isinstance(e, BridgicBrowserError): + raise error_msg = f"Failed to start video: {str(e)}" logger.error(f"[start_video] {error_msg}") _raise_operation_error(error_msg) + @staticmethod + def _resolve_video_dest(filename: str) -> str: + """Resolve a user-supplied filename to an absolute path. + + Three input shapes are accepted: + "demo.webm" → cwd/demo.webm + "./videos/" → ./videos/video_.webm (auto-named) + "demo" → cwd/demo.webm (".webm" suffix auto-added) + """ + if filename.endswith(os.sep) or filename.endswith("/") or os.path.isdir(filename): + import time as _time + dest_dir = os.path.abspath(filename) + resolved = os.path.join(dest_dir, f"video_{_time.strftime('%Y%m%d_%H%M%S')}.webm") + else: + if not filename.lower().endswith(".webm"): + filename = f"{filename}.webm" + resolved = os.path.abspath(filename) + dest_dir = os.path.dirname(resolved) + if dest_dir: + os.makedirs(dest_dir, exist_ok=True) + return resolved + + @staticmethod + def _move_video_local(src: Path, dest: str) -> str: + """Move a video file locally (rename, falling back to copy). + + Why we do not use Playwright's ``video.save_as()``: + save_as() streams the file across the Node RPC bridge in 1 MB + base64 chunks. Large recordings can take tens of seconds or + even time out. A local ``os.rename`` is O(1); even when we + fall back to copy2 (cross-device move), it is orders of + magnitude faster than the RPC stream. + """ + os.makedirs(os.path.dirname(dest) or ".", exist_ok=True) + try: + os.rename(str(src), dest) + except OSError: + import shutil + shutil.copy2(str(src), dest) + try: + src.unlink(missing_ok=True) + except Exception: + pass + return os.path.abspath(dest) + + @staticmethod + def _resolve_multi_video_dests( + filename: Optional[str], count: int, + ) -> Optional[List[str]]: + """Build N destination paths for ``count`` recorded video files. + + Parameters + ---------- + filename : Optional[str] + User-supplied destination. ``None`` leaves files in temp dir. + A directory (``./videos/`` or existing dir) → each file keeps + its auto-generated basename inside that dir. + A file path (``./out.webm``) → first file uses the exact path, + subsequent files get ``-1``, ``-2``, … suffix inserted before + the extension. + count : int + Number of recorded videos. + + Returns + ------- + Optional[List[str]] + ``None`` when ``filename`` is ``None`` (keep temp paths), + otherwise a list of ``count`` destination paths. + """ + if filename is None: + return None + if count == 0: + return [] + is_dir = ( + filename.endswith(os.sep) + or filename.endswith("/") + or os.path.isdir(filename) + ) + if is_dir: + import time as _time + dest_dir = os.path.abspath(filename) + os.makedirs(dest_dir, exist_ok=True) + ts = _time.strftime("%Y%m%d_%H%M%S") + out: List[str] = [] + for i in range(count): + name = f"video_{ts}.webm" if i == 0 else f"video_{ts}-{i}.webm" + out.append(os.path.join(dest_dir, name)) + return out + # Single-file target: use as base name; append -N for extras. + base = filename if filename.lower().endswith(".webm") else f"{filename}.webm" + base_abs = os.path.abspath(base) + dest_dir = os.path.dirname(base_abs) + if dest_dir: + os.makedirs(dest_dir, exist_ok=True) + stem, ext = os.path.splitext(base_abs) + return [base_abs if i == 0 else f"{stem}-{i}{ext}" for i in range(count)] + async def stop_video(self, filename: Optional[str] = None) -> str: - """Stop video recording. + """Stop video recording on all pages and save the files. - Marks the current recording session as stopped and registers the - destination path. The actual video files are written by Playwright - when pages close, so saving is deferred to ``browser_close()`` / - ``close_tab()`` — no pages are touched here. + Files are saved immediately — no need to wait for browser close. + Mirrors Playwright CLI context.ts ``stopVideoRecording``: returns + one .webm file per page that was being recorded. Parameters ---------- filename : Optional[str], optional - Destination path for the video file(s). Accepts a file path + Destination for the video files. Accepts a file path (``./videos/demo.webm``) or a directory (``./videos/``). The ``.webm`` extension is added automatically when missing. - If not provided, Playwright writes files to the temporary - recording directory automatically on page close. + When multiple pages are recorded and ``filename`` is a single + file path, the first file uses the given name and subsequent + files get a ``-1``, ``-2``, … suffix inserted before the + extension. If not provided, the files stay in the temporary + directory. Returns ------- str - Confirmation that recording was stopped and where files will be - saved (``Video will be saved to: on browser close``). + Confirmation with the saved file path(s). """ try: logger.info(f"[stop_video] start filename={filename}") @@ -6542,46 +7286,71 @@ async def stop_video(self, filename: Optional[str] = None) -> str: _raise_state_error("No context is open", code="NO_CONTEXT") context_key = _get_context_key(self._context) - if not self._video_state.get(context_key): - _raise_state_error("No active video recording. Use video-start first.", code="NO_ACTIVE_RECORDING") + if self._video_session is None and not self._video_recorders: + _raise_state_error( + "No active video recording. Use video-start first.", + code="NO_ACTIVE_RECORDING", + ) - # Resolve destination path now (before any context changes) and - # create the directory so the user gets an early error if the path - # is invalid. Actual file writing is deferred to browser close. - resolved: Optional[str] = None - if filename: - if filename.endswith(os.sep) or filename.endswith("/") or os.path.isdir(filename): - import time as _time - dest_dir = os.path.abspath(filename) - resolved = os.path.join(dest_dir, f"video_{_time.strftime('%Y%m%d_%H%M%S')}.webm") - else: - if not filename.lower().endswith(".webm"): - filename = f"{filename}.webm" - resolved = os.path.abspath(filename) - dest_dir = os.path.dirname(resolved) - if dest_dir: - os.makedirs(dest_dir, exist_ok=True) - - # Defer the actual save; no pages are closed or navigated here. - self._pending_video_save_path[context_key] = resolved + # Detach page-creation listener so stopping recording in + # parallel with a tab open doesn't race into a new recorder. + if self._video_session is not None: + listener = self._video_session.get("page_listener") + if listener is not None: + try: + self._context.remove_listener("page", listener) + except Exception: + pass + + # Snap the recorder dict to a local list first so a concurrent + # close() won't also try to stop them. + recorders = list(self._video_recorders.items()) + self._video_recorders = {} + self._video_session = None self._video_state[context_key] = False - if resolved: - dest_dir_display = os.path.dirname(resolved) - stem_display = os.path.splitext(os.path.basename(resolved))[0] - result = ( - f"Video recording stopped. " - f"Files will be saved to {dest_dir_display}/ " - f"as {stem_display}.webm (single tab) or " - f"{stem_display}_1.webm, {stem_display}_2.webm, ... (multiple tabs) " - f"when browser closes." - ) + if not recorders: + return "Video recording stopped (no pages were recorded)" + + # Stop every recorder; preserve order of pages. + async def _stop_one( + rec: "_video_recorder_mod.VideoRecorder", + ) -> Optional[str]: + try: + return await rec.stop() + except Exception as exc: + logger.warning("[stop_video] recorder stop failed: %s", exc) + return None + + temp_paths: List[Optional[str]] = [] + for _page_ref, _rec in recorders: + temp_paths.append(await _stop_one(_rec)) + good_paths = [p for p in temp_paths if p] + + if not good_paths: + return "Video recording stopped (no files were produced)" + + dests = self._resolve_multi_video_dests(filename, len(good_paths)) + if dests is None: + saved = list(good_paths) else: - result = ( - "Video recording stopped. " - "Files will be auto-saved to the recording directory when browser closes." - ) - logger.info(f"[stop_video] done (deferred) {result}") + saved = [] + for src, dest in zip(good_paths, dests): + try: + self._move_video_local(Path(src), dest) + saved.append(dest) + except Exception as move_err: + logger.error( + "[stop_video] move failed, file stays at: %s (%s)", + src, move_err, + ) + saved.append(src) + + if len(saved) == 1: + result = f"Video saved to: {saved[0]}" + else: + result = "Video files saved:\n" + "\n".join(saved) + logger.info(f"[stop_video] done: {result}") return result except BridgicBrowserError: raise diff --git a/bridgic/browser/session/_video_recorder.py b/bridgic/browser/session/_video_recorder.py new file mode 100644 index 0000000..6723f8b --- /dev/null +++ b/bridgic/browser/session/_video_recorder.py @@ -0,0 +1,546 @@ +"""CDP screencast video recorder — encodes to WebM via ffmpeg. + +The architecture mirrors Playwright CLI's recording pipeline: + Playwright CLI sources: + screencast.ts → manages the CDP screencast session + videoRecorder.ts → receives JPEG frames → pipes them to ffmpeg → WebM + This file combines both responsibilities. + +How it works: + 1. ``Page.startScreencast`` (CDP) tells Chrome to push JPEG snapshots. + 2. Each frame produced by Chrome fires a ``Page.screencastFrame`` event. + 3. We forward the frame bytes to an ffmpeg subprocess via its stdin pipe. + 4. ffmpeg encodes the JPEG stream as VP8/WebM straight to the output file. + 5. On stop(), closing the pipe lets ffmpeg flush and the file is immediately + usable. + +Compared with Playwright Python's ``record_video_dir`` option: + record_video_dir: starts ffmpeg at context-create time, records every page, + and the file is streamed back via RPC (1 MB base64 chunks). + CDP screencast: starts on demand, the file is ready as soon as stop() + returns, with zero RPC overhead. + +Reference paths in the Playwright monorepo: + packages/playwright-core/src/server/screencast.ts + packages/playwright-core/src/server/chromium/videoRecorder.ts + +Usage:: + + recorder = VideoRecorder(context, page, "/tmp/video.webm", (800, 600)) + await recorder.start() + # ... drive the browser ... + path = await recorder.stop() # file is ready +""" + +from __future__ import annotations + +import asyncio +import base64 +import io +import logging +import math +import os +import platform +import re +import shutil +import time +from pathlib import Path +from typing import Any, List, Optional, Tuple + +logger = logging.getLogger("bridgic.browser") + +# 25 fps — matches Playwright's videoRecorder.ts (line 17: ``const fps = 25;``). +_FPS = 25 + +# Matches "ffmpeg-" so we can sort version directories numerically +# rather than lexicographically (otherwise "ffmpeg-999" sorts above +# "ffmpeg-1011", which would pin us to an older binary). +_FFMPEG_VERSION_RE = re.compile(r"^ffmpeg-(\d+)$") + + +# --------------------------------------------------------------------------- +# ffmpeg path discovery +# --------------------------------------------------------------------------- + +def _find_ffmpeg() -> str: + """Locate Playwright's bundled ffmpeg, or fall back to the one on PATH. + + When Playwright installs browsers it also downloads ffmpeg into its cache: + macOS: ~/Library/Caches/ms-playwright/ffmpeg-{revision}/ffmpeg-mac + Linux: ~/.cache/ms-playwright/ffmpeg-{revision}/ffmpeg-linux + Windows: %LOCALAPPDATA%/ms-playwright/ffmpeg-{revision}/ffmpeg-win64.exe + + The cache root can be overridden with ``PLAYWRIGHT_BROWSERS_PATH``. If no + Playwright copy is found we fall back to ``ffmpeg`` from ``$PATH``. + """ + browsers_path = os.environ.get("PLAYWRIGHT_BROWSERS_PATH") + if not browsers_path: + system = platform.system() + if system == "Darwin": + browsers_path = os.path.expanduser("~/Library/Caches/ms-playwright") + elif system == "Linux": + browsers_path = os.path.expanduser("~/.cache/ms-playwright") + else: + browsers_path = os.path.join( + os.environ.get("LOCALAPPDATA", ""), "ms-playwright" + ) + + bp = Path(browsers_path) + if bp.is_dir(): + suffix_map = {"Darwin": "mac", "Linux": "linux", "Windows": "win64.exe"} + suffix = suffix_map.get(platform.system(), "linux") + # Pick the highest numeric revision (e.g. ffmpeg-1011 > ffmpeg-999). + # Lexicographic sort would pick ffmpeg-999 here, which is wrong. + candidates: List[Tuple[int, Path]] = [] + try: + entries = list(bp.iterdir()) + except OSError: + entries = [] + for entry in entries: + match = _FFMPEG_VERSION_RE.match(entry.name) + if match: + candidates.append((int(match.group(1)), entry)) + for _, entry in sorted(candidates, key=lambda c: c[0], reverse=True): + candidate = entry / f"ffmpeg-{suffix}" + if candidate.exists(): + return str(candidate) + + system_ffmpeg = shutil.which("ffmpeg") + if system_ffmpeg: + return system_ffmpeg + + raise FileNotFoundError( + "ffmpeg not found. Run 'playwright install ffmpeg' or install ffmpeg." + ) + + +# --------------------------------------------------------------------------- +# Empty-recording fallback frame +# --------------------------------------------------------------------------- + +# A baked 1×1 white JPEG. Used in the rare case where no real frame arrived +# before stop() (e.g. start_video → immediate stop_video). ffmpeg refuses to +# produce a valid WebM when its input pipe is empty, so we feed it this single +# byte sequence; the ``pad=W:H:0:0:gray`` filter then expands it to the target +# resolution by adding gray padding (the original 1×1 white pixel ends up in +# the top-left corner). The resulting frame is intentionally minimal — the +# only goal is "produce a playable file", not "produce a meaningful frame". +# Playwright's videoRecorder.ts has an analogous fallback in writeFrame(). +_FALLBACK_WHITE_JPEG_1X1 = ( + b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00" + b"\xff\xdb\x00C\x00\x08\x06\x06\x07\x06\x05\x08\x07\x07\x07\t\t" + b"\x08\n\x0c\x14\r\x0c\x0b\x0b\x0c\x19\x12\x13\x0f\x14\x1d\x1a" + b"\x1f\x1e\x1d\x1a\x1c\x1c $.\x27 \",.+\x1c\x1c(7),01444\x1f\x27" + b"9=82<.342\xff\xc0\x00\x0b\x08\x00\x01\x00\x01\x01\x01\x11\x00" + b"\xff\xc4\x00\x1f\x00\x00\x01\x05\x01\x01\x01\x01\x01\x01\x00" + b"\x00\x00\x00\x00\x00\x00\x00\x01\x02\x03\x04\x05\x06\x07\x08" + b"\t\n\x0b\xff\xda\x00\x08\x01\x01\x00\x00?\x00T\xdb\x9e\x97\xf0" + b"\x07\xff\xd9" +) + + +def _create_white_jpeg(width: int, height: int) -> bytes: + """Return a JPEG used as a placeholder when no real frame was captured. + + If Pillow is available we render a true ``width × height`` white JPEG; + otherwise we return the baked 1×1 fallback above and rely on ffmpeg's + ``pad`` filter to expand it. The fallback path is taken in production + because Pillow is not a project dependency — see the comment on + ``_FALLBACK_WHITE_JPEG_1X1`` for the implications. + + Reference: Playwright's videoRecorder.ts uses an equivalent + "ensure at least one frame" fallback inside writeFrame(). + """ + try: + from PIL import Image # type: ignore[import-untyped] + + img = Image.new("RGB", (width, height), (255, 255, 255)) + buf = io.BytesIO() + img.save(buf, format="JPEG", quality=80) + return buf.getvalue() + except ImportError: + return _FALLBACK_WHITE_JPEG_1X1 + + +# --------------------------------------------------------------------------- +# VideoRecorder +# --------------------------------------------------------------------------- + +class VideoRecorder: + """Records a page to WebM via CDP screencast + ffmpeg. + + Pipeline overview: + + start(): + ┌─────────────┐ stdin (JPEG frames) ┌──────────┐ + │ Chrome CDP │ ──────────────────────► │ ffmpeg │ ──► output.webm + │ screencast │ Page.screencastFrame │ subproc │ + └─────────────┘ └──────────┘ + + stop(): + 1. CDP Page.stopScreencast — Chrome stops pushing frames. + 2. Pad the tail with ≥1 second of the last frame so the video isn't + truncated. + 3. Close ffmpeg's stdin → ffmpeg flushes → output file is ready. + 4. Detach the CDP session. + + Parameters + ---------- + context : Playwright BrowserContext + Used to create the CDP session (``context.new_cdp_session(page)``). + page : Playwright Page + The page to record. + output_path : str + Output file path; must end in ``.webm``. + size : (width, height) + Output dimensions. Both must be even (a VP8 encoder requirement). + """ + + def __init__( + self, + context: Any, + page: Any, + output_path: str, + size: Tuple[int, int], + ) -> None: + if not output_path.endswith(".webm"): + raise ValueError("Output file must have .webm extension") + self._context = context + self._page = page + self._output_path = output_path + self._width = size[0] + self._height = size[1] + + self._cdp_session: Any = None + self._ffmpeg: Optional[asyncio.subprocess.Process] = None + + # Frame state — mirrors FfmpegVideoRecorder in Playwright's + # videoRecorder.ts (lines 98-103). + self._first_frame_ts: float = 0.0 # timestamp of the first frame; used to compute frame numbers + self._last_frame: Optional[Tuple[bytes, float, int]] = None # (jpeg_bytes, timestamp, frame_number) + self._last_write_time: float = 0.0 # monotonic time of the last write_frame() call + self._frame_queue: List[bytes] = [] # frames waiting to be written to ffmpeg's stdin + self._is_stopped = False + self._write_lock = asyncio.Lock() # serializes writes to ffmpeg's stdin + self._flush_pending = False # dedup flag: avoid scheduling a flush task per frame + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def start(self) -> None: + """Start recording: spawn ffmpeg, then start the CDP screencast.""" + ffmpeg_path = _find_ffmpeg() + os.makedirs(os.path.dirname(self._output_path) or ".", exist_ok=True) + + w, h = self._width, self._height + + # ffmpeg arguments — based on Playwright's videoRecorder.ts + # ``_startProcess()`` but tuned for legibility instead of raw speed. + # Playwright's defaults (``-b:v 1M -crf 8 -deadline realtime + # -speed 8 -qmax 50``) bias hard toward "encode as fast as possible", + # which leaves browser text smeared. Bridgic's recordings are usually + # replayed by humans inspecting an LLM session, so sharpness matters + # more than encode CPU. + # + # Input: + # -f image2pipe read an image stream from stdin + # -c:v mjpeg input is a JPEG stream + # Output: + # -c:v vp8 VP8-encoded WebM + # -b:v 5M 5 Mbps target — enough headroom for crisp + # text at typical 1280×800 viewports + # -crf 4 constant-rate factor (0 = best, 63 = worst); + # 4 is high quality but still bounded + # -qmin 0 -qmax 30 tighter quantizer cap → no muddy frames when + # the page is busy (vs. Playwright's qmax 50) + # -deadline good balanced encoder mode instead of "realtime"; + # ~2-3× slower per frame but visibly cleaner + # -speed 2 slower preset (valid 0-5 with deadline=good) + # -threads 2 extra worker to keep up with the slower preset + # Filters: + # pad={w}:{h}:0:0:gray pad smaller frames with a gray border + # crop={w}:{h}:0:0 crop to exact target size + args = [ + ffmpeg_path, + "-loglevel", "error", + "-f", "image2pipe", + "-avioflags", "direct", + "-fpsprobesize", "0", + "-probesize", "32", + "-analyzeduration", "0", + "-c:v", "mjpeg", + "-i", "pipe:0", + "-y", "-an", + "-r", str(_FPS), + "-c:v", "vp8", + "-qmin", "0", "-qmax", "30", + "-crf", "4", + "-deadline", "good", + "-speed", "2", + "-b:v", "5M", + "-threads", "2", + "-vf", f"pad={w}:{h}:0:0:gray,crop={w}:{h}:0:0", + self._output_path, + ] + # stdout/stderr → DEVNULL: ffmpeg launches with `-loglevel error`, so + # the streams are silent on the happy path, but on errors (corrupt + # JPEG, encoder failures) the text can be large enough to fill the OS + # pipe buffer (~64 KB on Linux). With nothing reading those pipes, + # ffmpeg's next write() blocks → its stdin reader stalls → + # bridgic's `await stdin.drain()` deadlocks → stop() hangs and + # eventually force-kills ffmpeg, corrupting the output. We never read + # the streams anyway, so dropping them in-kernel is the simplest fix. + self._ffmpeg = await asyncio.create_subprocess_exec( + *args, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + ) + + # Create the CDP session and start the screencast. + # Reference: Playwright's screencast.ts startScreencast(). + # Reference: Chrome DevTools Protocol — Page.startScreencast + # https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-startScreencast + # + # If the CDP setup fails we must kill the already-started ffmpeg + # subprocess; otherwise it leaks. + try: + self._cdp_session = await self._context.new_cdp_session(self._page) + self._cdp_session.on( + "Page.screencastFrame", self._on_screencast_frame + ) + await self._cdp_session.send("Page.startScreencast", { + "format": "jpeg", + # JPEG quality of source frames coming out of Chrome. This + # caps the ceiling regardless of encoder tuning — Playwright + # uses q=80 by default but that visibly smudges browser + # text. q=95 is essentially visually lossless and the cost + # is bandwidth we already have to spare on a local CDP + # connection. + "quality": 95, + # maxWidth/maxHeight is a *clamp*, not a target: Chrome + # downsamples (preserving aspect) to fit within these bounds + # but never upsamples. They MUST equal the actual viewport + # the caller computed, otherwise an aspect-ratio mismatch + # makes Chrome ship a frame smaller than (self._width, + # self._height) and ffmpeg's pad filter fills the gap with + # gray. See bridgic/browser/session/_browser.py + # ``start_video()`` for the dimension-resolution comment. + "maxWidth": self._width, + "maxHeight": self._height, + }) + except BaseException: + if self._ffmpeg: + self._ffmpeg.kill() + self._ffmpeg = None + raise + logger.debug( + "[VideoRecorder] started screencast %dx%d → %s", + self._width, self._height, self._output_path, + ) + + async def stop(self) -> str: + """Stop recording and return the output path. + + On return the file is fully written. The shutdown sequence mirrors + ``stop()`` in Playwright's videoRecorder.ts (lines 130-155). + """ + if self._is_stopped: + return self._output_path + + # Step 1: tell Chrome to stop pushing frames. + # Reference: CDP Page.stopScreencast. + if self._cdp_session: + try: + await self._cdp_session.send("Page.stopScreencast") + except Exception: + pass + + # Step 2: make sure at least one frame has been written. ffmpeg + # refuses to produce a valid container with an empty input stream. + # Reference: videoRecorder.ts lines 136-138. + if not self._last_frame: + white = _create_white_jpeg(self._width, self._height) + self._write_frame(white, time.monotonic()) + + # Step 3: pad the tail with ≥1 second of the last frame so the + # output never ends abruptly. Sending an empty frame (b"") is the + # sentinel that tells _write_frame to advance the frame counter + # without replacing the cached JPEG bytes. + # Reference: videoRecorder.ts lines 140-144. + add_time = max(time.monotonic() - self._last_write_time, 1.0) + self._write_frame(b"", self._last_frame[1] + add_time) # type: ignore[index] + + self._is_stopped = True + + # Step 4: drain any frames still queued for ffmpeg's stdin. + await self._flush_queue() + + # Step 5: close ffmpeg's stdin so it can finalize the file. + if self._ffmpeg and self._ffmpeg.stdin: + try: + self._ffmpeg.stdin.close() + await self._ffmpeg.stdin.wait_closed() + except Exception: + pass + try: + await asyncio.wait_for(self._ffmpeg.wait(), timeout=10.0) + except asyncio.TimeoutError: + self._ffmpeg.kill() + logger.warning("[VideoRecorder] ffmpeg killed after timeout") + + # Step 6: detach the CDP session. + if self._cdp_session: + try: + await self._cdp_session.detach() + except Exception: + pass + self._cdp_session = None + + logger.debug("[VideoRecorder] stopped → %s", self._output_path) + return self._output_path + + @property + def output_path(self) -> str: + return self._output_path + + @property + def is_stopped(self) -> bool: + return self._is_stopped + + # ------------------------------------------------------------------ + # Frame handling — mirrors _writeFrame() in Playwright's + # videoRecorder.ts (lines 195-213). + # ------------------------------------------------------------------ + + def _on_screencast_frame(self, params: dict) -> None: + """Handle a ``Page.screencastFrame`` CDP event from Chrome. + + Reference: Chrome DevTools Protocol — Page.screencastFrame + https://chromedevtools.github.io/devtools-protocol/tot/Page/#event-screencastFrame + + ``params`` carries: + - data: base64-encoded JPEG bytes + - metadata.timestamp: frame timestamp in seconds (wall-clock) + - sessionId: ack token; Chrome will not send the next frame until + ``Page.screencastFrameAck`` is replied with this id + """ + if self._is_stopped: + return + + try: + data = base64.b64decode(params["data"]) + except Exception as exc: + # Should not happen — Chrome always provides valid base64 — but + # we want to surface a hint without taking down the event loop. + logger.warning("[VideoRecorder] dropping malformed frame: %s", exc) + return + metadata = params.get("metadata", {}) + timestamp: float = metadata.get("timestamp", time.time()) + + # Ack the frame so Chrome will push the next one. After stop() the + # CDP session is detached, so the ack may fail — swallow the + # exception via add_done_callback to keep the event loop quiet. + # Reference: CDP Page.screencastFrameAck. + session_id = params.get("sessionId") + if session_id and self._cdp_session: + task = asyncio.create_task( + self._cdp_session.send( + "Page.screencastFrameAck", {"sessionId": session_id} + ) + ) + task.add_done_callback(lambda t: t.exception() if not t.cancelled() else None) + + self._write_frame(data, timestamp) + + def _write_frame(self, frame: bytes, timestamp: float) -> None: + """Queue a frame for ffmpeg, padding gaps with the previous frame. + + Mirrors ``_writeFrame()`` in Playwright's videoRecorder.ts + (lines 195-213). + + Why padding: Chrome's screencast emits frames irregularly — it does + not push anything while the page is idle — but ffmpeg's input needs + a constant 25 fps. So whenever the new frame's frame_number is + ``N`` and the previous one was ``M``, we re-emit the last JPEG + ``N - M`` times to fill the gap. + + frame_number = floor((timestamp - first_frame_timestamp) * 25) + repeat_count = current frame_number - previous frame_number + + Sentinel: an empty ``frame`` (b"") signals the tail-padding case + used by stop(). It advances the frame counter without replacing the + cached JPEG bytes. + """ + if self._is_stopped and frame: + return + + if not self._first_frame_ts: + self._first_frame_ts = timestamp + + # Compute the current frame number — videoRecorder.ts line 200. + frame_number = math.floor((timestamp - self._first_frame_ts) * _FPS) + + # Repeat the last frame to cover the gap up to the current frame + # number. Reference: videoRecorder.ts lines 203-207. + if self._last_frame is not None: + repeat_count = frame_number - self._last_frame[2] + for _ in range(max(repeat_count, 0)): + self._frame_queue.append(self._last_frame[0]) + # Schedule an async flush. The dedup flag ensures that we only + # have a single flush task pending at any time even if many + # frames arrive in quick succession. + if not self._flush_pending: + try: + loop = asyncio.get_running_loop() + self._flush_pending = True + def _schedule_flush() -> None: + t = asyncio.create_task(self._flush_and_reset()) + t.add_done_callback(lambda _t: _t.exception() if not _t.cancelled() else None) + loop.call_soon(_schedule_flush) + except RuntimeError: + self._flush_pending = False + + if frame: + # Real frame: replace the cached entry. + self._last_frame = (frame, timestamp, frame_number) + else: + # Empty-frame sentinel: advance the counter, keep the JPEG + # bytes (used by stop() to extend the tail). + if self._last_frame is not None: + self._last_frame = (self._last_frame[0], timestamp, frame_number) + self._last_write_time = time.monotonic() + + async def _flush_queue(self) -> None: + """Drain the frame queue into ffmpeg's stdin under a write lock.""" + async with self._write_lock: + while self._frame_queue: + frame_data = self._frame_queue.pop(0) + await self._send_frame(frame_data) + + async def _flush_and_reset(self) -> None: + """Flush the queue and clear the dedup flag (always, via finally).""" + try: + await self._flush_queue() + finally: + self._flush_pending = False + + async def _send_frame(self, frame: bytes) -> None: + """Write a JPEG frame to ffmpeg's stdin. + + Errors are logged at WARNING level (with one-shot dedup) so a broken + encoder is visible in the daemon log. Subsequent failures during the + same recording are downgraded to DEBUG to avoid log spam. + """ + if not self._ffmpeg or not self._ffmpeg.stdin or self._ffmpeg.stdin.is_closing(): + return + try: + self._ffmpeg.stdin.write(frame) + await self._ffmpeg.stdin.drain() + except Exception as e: + if not getattr(self, "_ffmpeg_write_warned", False): + logger.warning("[VideoRecorder] ffmpeg write error: %s", e) + self._ffmpeg_write_warned = True + else: + logger.debug("[VideoRecorder] ffmpeg write error: %s", e) diff --git a/docs/API.md b/docs/API.md index 7162491..5e89417 100644 --- a/docs/API.md +++ b/docs/API.md @@ -6,7 +6,7 @@ Short reference for the main session and download APIs. For tool lists and selec | Method / property | Description | |------------------|-------------| -| `Browser(...)` | Constructor. Key args: `headless`, `viewport`, `user_data_dir`, `clear_user_data`, `stealth`, `channel`, `proxy`, `downloads_path`, etc. | +| `Browser(...)` | Constructor. Key args: `headless`, `viewport`, `user_data_dir`, `clear_user_data`, `stealth`, `cdp_url`, `channel`, `proxy`, `downloads_path`, etc. When `cdp_url` is set, connects to an existing Chrome via CDP (`connect_over_cdp`) instead of launching a new browser. | | `await browser._start()` | Launch browser and create context. Called automatically by `navigate_to` / `search` (lazy start); call directly only when you need explicit startup before any navigation. | | `await browser.close()` | Stop the browser, auto-cleans active capture listeners. No-op if never started. | | `await browser.navigate_to(url, wait_until="domcontentloaded", timeout=None)` | Navigate to URL with optional auto-prefix when missing protocol. `wait_until`: `"domcontentloaded"` (default), `"load"`, `"networkidle"`, or `"commit"`. `timeout` in seconds. | @@ -26,6 +26,8 @@ Short reference for the main session and download APIs. For tool lists and selec | `browser.stealth_enabled` | `bool` — whether stealth mode is active. | | `browser.stealth_config` | `StealthConfig` or `None` — current stealth configuration. | | `browser.use_persistent_context` | `bool` — `True` when using `launch_persistent_context` (`clear_user_data=False`); `False` when using ephemeral `launch`+`new_context` (`clear_user_data=True`). | +| `browser.last_close_artifacts` | `dict` — trace and video paths produced by the most recent `close()` call. Shape: `{"trace": [str, ...], "video": [str, ...]}`. Empty lists before the first close, or when no tracing/video was active. Returns a fresh shallow copy on every access — mutating it does not affect the browser's internal state. | +| `browser.last_close_errors` | `list[str]` — warnings/errors collected during the most recent `close()` call (e.g. trace-stop timeouts, video-finalize failures). Empty list before the first close, or on a clean shutdown. Returns a fresh copy on every access. | ## DownloadManager diff --git a/docs/CDP_MODE.md b/docs/CDP_MODE.md new file mode 100644 index 0000000..18527bd --- /dev/null +++ b/docs/CDP_MODE.md @@ -0,0 +1,108 @@ +# CDP Connection Mode + +Connect to an already-running Chrome instance instead of launching a new one. + +```python +from bridgic.browser import Browser + +# SDK +browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc") + +# CLI +bridgic-browser open https://example.com --cdp 9222 +bridgic-browser open https://example.com --cdp auto +bridgic-browser open https://example.com --cdp "ws://localhost:9222/..." +``` + +## How it works + +`Browser(cdp_url=...)` calls Playwright's `connect_over_cdp()` instead of `launch()`. The existing browser's default context is borrowed — bridgic operates as a guest on someone else's browser, sharing cookies, localStorage, and login state with the user's real Chrome session. (That session sharing is the whole point of CDP mode.) + +## Tab ownership in CDP mode + +After connecting via CDP, bridgic **always opens its own brand-new tab** in the borrowed browser context. **Your existing tabs are never navigated, refreshed, or closed.** When `close()` runs (or the daemon shuts down), bridgic only closes the tabs it created itself. + +Each call to `bridgic-browser new-tab` creates an additional bridgic-owned tab; all of them are tracked and cleaned up on shutdown. Tabs you opened manually in Chrome — or pop-ups (`target=_blank` etc.) spawned by pages bridgic was driving — are **not** tracked and will not be touched by bridgic. + +When bridgic connects, the daemon log records which Chrome instance was joined and how many user tabs were preserved: + +``` +[CDP] connected; created new bridgic tab (borrowed_context=True, preserved_existing_tabs=3) +``` + +This is especially useful with `--cdp auto` (scan mode), where bridgic auto-discovers a running Chrome instance — check this log line to confirm bridgic actually attached to the browser you expected. + +## Limitations + +### Launch parameters are ignored + +The browser is already running, so these constructor parameters have **no effect** in CDP mode: + +| Parameter | Reason | +|-----------|--------| +| `headless` | Cannot change headed/headless after launch | +| `args` / `ignore_default_args` | Chrome flags must be set at launch time | +| `channel` / `executable_path` | Binary already selected | +| `proxy` | Proxy must be configured at launch time | +| `slow_mo` / `timeout` | These are `launch()`-level parameters | +| `devtools` | Cannot toggle DevTools panel | + +### Context options do not apply to borrowed contexts + +When connecting via CDP, bridgic borrows the browser's existing default context (`browser.contexts[0]`). Context-level options cannot be changed after creation: + +| Parameter | Status | +|-----------|--------| +| `viewport` | Keeps the existing context's viewport | +| `user_agent` | Cannot modify | +| `locale` / `timezone_id` | Cannot modify | +| `color_scheme` | Cannot modify | +| `ignore_https_errors` | Cannot modify | +| `extra_http_headers` | Cannot modify | +| `user_data_dir` | Ignored — CDP mode never uses persistent context | + +### Stealth mode is partially effective + +| Stealth capability | CDP status | Reason | +|--------------------|-----------|--------| +| Chrome launch args (50+ flags) | **Not applied** | Browser already running | +| `--disable-component-update`, etc. | **Not applied** | Same as above | +| JS init script (navigator patches) | **Headless only** | Injected via `add_init_script()` — works on new pages | +| Headed-mode system Chrome switch | **Not applied** | Browser already running | + +If the remote Chrome was not started with stealth flags, bridgic's JS patches can cover some fingerprints (navigator, webdriver, plugins) but cannot modify signals that require launch arguments (e.g., Blink feature disabling). + +### Video recording is restricted to bridgic-owned tabs + +bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), **not** Playwright's `record_video` context option — so video recording works on borrowed contexts. There are two CDP-mode constraints worth knowing: + +- **Only bridgic's own tabs are recorded.** `start_video()` skips every page in the borrowed context that bridgic did not create itself, and the future-page listener applies the same filter. The user's banking, email, or chat tabs are never captured. Pop-ups (`target=_blank`) spawned by pages bridgic was driving are also untracked, and therefore not recorded either. +- **Recording stops cleanly without touching user tabs.** `stop_video()` only finalizes the screencast sessions for bridgic-owned pages, so no user page is closed or refreshed. + +**Tracing is not affected** — `tracing.stop()` works at any time without closing pages or contexts. + +### `close()` only disconnects + +`close()` in CDP mode preserves the remote browser state — only bridgic's own tabs are cleaned up: + +| Operation | Launch mode | CDP (borrowed context) | +|-----------|------------|----------------------| +| Navigate pages to about:blank | Yes | **Skipped** | +| `page.close()` on user tabs | Yes | **Skipped** | +| `page.close()` on bridgic-owned tabs | Yes | Yes | +| `context.close()` | Yes | **Skipped** | +| `browser.close()` | Kills process | **Disconnects only** | +| Save tracing artifacts | Yes | Yes | +| Save video artifacts | Yes | Yes (bridgic-owned tabs only) | + +After `close()`, the remote Chrome continues running with all of the **user's** tabs intact; only the tabs bridgic explicitly created are gone. + +### Connection drops + +The CDP WebSocket connection can be lost due to: + +- Remote browser closed or crashed +- Network interruption +- Cloud browser service timeout (Browserless, Steel.dev, etc.) + +The CLI daemon automatically attempts **one reconnect** when a command fails with a connection error. After reconnect the session starts fresh (about:blank) — previous page state is lost. If the remote browser is gone, the reconnect fails and the error is reported to the user. diff --git a/docs/KNOWN_LIMITATIONS.md b/docs/KNOWN_LIMITATIONS.md new file mode 100644 index 0000000..746b59f --- /dev/null +++ b/docs/KNOWN_LIMITATIONS.md @@ -0,0 +1,69 @@ +# Known Limitations + +## Chrome "Show in Folder" Does Not Work for Downloads + +### Symptom + +When using bridgic-browser in headed mode, files download successfully with correct +filenames to the configured `downloads_path`. However, clicking **"Show in Folder"** +(or "Show in Finder" on macOS) in Chrome's download panel has no effect — the +button does nothing, or shows "file deleted". + +### Root Cause + +This is a **Chromium bug**: when the CDP command `Browser.setDownloadBehavior` +is called with `eventsEnabled: true`, links on Chrome's download page and +download bubble become non-clickable. The bug was originally reported by a +Puppeteer user ([puppeteer #11871](https://github.com/puppeteer/puppeteer/issues/11871)) +and then filed upstream on the Chromium bug tracker: +[chromium #324282051](https://issues.chromium.org/issues/324282051). + +**Any tool that uses this CDP command (Puppeteer, Playwright, etc.) is affected.** + +Playwright internally uses `Browser.setDownloadBehavior` with +`behavior: 'allowAndName'` to intercept all downloads, so it is equally +affected: + +```js +// Playwright internal code (chromium/crBrowser.ts) +behavior: this._options.acceptDownloads === 'accept' ? 'allowAndName' : 'deny' +``` + +Once `setDownloadBehavior(allowAndName)` is active: + +1. Links on `chrome://downloads` page and inside the download bubble + (including "Show in Folder") become broken. +2. Playwright saves files to an internal temp directory with UUID filenames. +3. bridgic-browser's `DownloadManager` then copies files via `download.save_as()` + to the user's `downloads_path` with correct filenames. + +### Verification + +This was verified by testing with **raw Playwright** (no bridgic-browser code): + +```python +context = await p.chromium.launch_persistent_context( + user_data_dir="...", + headless=False, + accept_downloads=True, + downloads_path=str(Path.home() / "Downloads"), +) +``` + +The same "Show in Folder" failure occurs — confirming it is a Chromium-level bug +triggered by the CDP `setDownloadBehavior` command, not a bridgic-browser issue. + +### Workarounds + +- **Manual navigation**: Open the downloads folder directly in your file manager. + The files are saved with correct filenames at the configured `downloads_path` + (defaults to `~/Downloads` in daemon mode). +- **Programmatic access**: Use `DownloadManager.downloaded_files` to get the list + of downloaded files with their paths. + +### References + +- **[Chromium #324282051 — setDownloadBehavior breaks download page links and download bubble (root cause)](https://issues.chromium.org/issues/324282051)** +- [Puppeteer #11871 — Original bug report with reproduction steps](https://github.com/puppeteer/puppeteer/issues/11871) +- [Playwright #19885 — Playwright maintainer confirms setDownloadBehavior as the cause](https://github.com/microsoft/playwright/issues/19885) +- [Playwright Downloads Documentation](https://playwright.dev/python/docs/downloads) diff --git a/pyproject.toml b/pyproject.toml index d0e1c12..f8b3cc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bridgic-browser" -version = "0.0.3" +version = "0.0.4.dev1" license = {text = "MIT"} classifiers = [ "Programming Language :: Python :: 3.10", diff --git a/skills/bridgic-browser/references/cli-guide.md b/skills/bridgic-browser/references/cli-guide.md index de15afd..b65c88b 100644 --- a/skills/bridgic-browser/references/cli-guide.md +++ b/skills/bridgic-browser/references/cli-guide.md @@ -9,8 +9,9 @@ Use this guide when the task should be executed directly from terminal commands 3. [Command Groups](#command-groups) 4. [High-Frequency Examples](#high-frequency-examples) 5. [Runtime and Configuration](#runtime-and-configuration) -6. [Non-Obvious CLI Behavior](#non-obvious-cli-behavior) -7. [When to Load Other References](#when-to-load-other-references) +6. [CDP Mode (Connect to Existing Browser)](#cdp-mode-connect-to-existing-browser) +7. [Non-Obvious CLI Behavior](#non-obvious-cli-behavior) +8. [When to Load Other References](#when-to-load-other-references) ## Quick Start @@ -120,6 +121,37 @@ Config precedence (low -> high): Environment variables and login state persistence are documented in `env-vars.md`. +## CDP Mode (Connect to Existing Browser) + +Connect to a running Chrome instead of launching a new one: + +```bash +# Start Chrome with remote debugging +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome \ + --remote-debugging-port=9222 --user-data-dir=/tmp/cdp-profile + +# Connect by port +bridgic-browser open https://example.com --cdp 9222 + +# Connect by WebSocket URL +bridgic-browser open https://example.com --cdp ws://localhost:9222/devtools/browser/... + +# Connect to cloud service +bridgic-browser open https://example.com --cdp wss://cloud.example.com/chromium?token=... + +# Auto-scan local Chrome/Brave/Edge/Arc profiles +bridgic-browser open https://example.com --cdp auto +``` + +| Format | Description | +|--------|-------------| +| `9222` | Bare port -- queries `localhost:9222/json/version` | +| `ws://...` / `wss://...` | Direct WebSocket URL, passed through as-is | +| `http://host:port` | HTTP discovery endpoint | +| `auto` | Scan local browser profiles for `DevToolsActivePort` | + +`close` disconnects from the remote browser but does **not** kill the Chrome process. + ## Non-Obvious CLI Behavior - Refs come from the latest snapshot. If page changed, re-run `snapshot` before interaction. diff --git a/skills/bridgic-browser/references/env-vars.md b/skills/bridgic-browser/references/env-vars.md index d766c0e..c0573ea 100644 --- a/skills/bridgic-browser/references/env-vars.md +++ b/skills/bridgic-browser/references/env-vars.md @@ -8,6 +8,7 @@ Use this reference when the task needs environment variable behavior or login st |---|---|---|---| | `BRIDGIC_LOG_LEVEL` | SDK + CLI | `INFO` | Log level for the `bridgic.browser` logger. | | `BRIDGIC_BROWSER_JSON` | SDK + CLI | unset | JSON string to override Browser constructor kwargs. Loaded by `Browser()` and CLI daemon. | +| `BRIDGIC_CDP` | CLI daemon | unset | Connect to an existing Chrome via CDP. Accepts: port (`9222`), `ws://`/`wss://` URL, `http://host:port`, or `auto` (scan local profiles). Resolved at daemon startup. Also set internally by the CLI client (as an already-resolved `ws://` URL) when `--cdp` is passed, so the flag overrides any value inherited from the shell. | | `BRIDGIC_SOCKET` | CLI (Unix only) | platform default | Override Unix socket path for the daemon client/transport. | | `BRIDGIC_DAEMON_RESPONSE_TIMEOUT` | CLI client | `90` | Seconds to wait for a daemon response. | | `BRIDGIC_DAEMON_STOP_TIMEOUT` | CLI daemon | `45` | Seconds to wait for daemon shutdown. | @@ -17,6 +18,7 @@ Notes: - Config file precedence (SDK + CLI, lowest -> highest): defaults, `~/.bridgic/bridgic-browser/bridgic-browser.json`, `./bridgic-browser.json`, `BRIDGIC_BROWSER_JSON`. - To start the daemon in headed mode, pass `--headed` to `bridgic-browser open` / `bridgic-browser search`, or set `{"headless": false}` in `BRIDGIC_BROWSER_JSON`. - To start with an ephemeral (no persistent profile) session, pass `--clear-user-data` to `bridgic-browser open` / `bridgic-browser search`, or set `{"clear_user_data": true}` in `BRIDGIC_BROWSER_JSON`. These flags are only meaningful when starting a new daemon; they are ignored if a session is already running. +- To connect to an existing Chrome via CDP, pass `--cdp` to `bridgic-browser open`, or set the `BRIDGIC_CDP` env var. The `--cdp` flag accepts a port number, `ws://`/`wss://` URL, `http://host:port`, or `auto`. - When `headless=false` (headed mode) with stealth enabled and neither `channel` nor `executable_path` is specified, the daemon **auto-switches to system Chrome** (`channel=”chrome”`) if detected on the machine. This avoids Playwright’s bundled “Chrome for Testing” which is blocked by Google OAuth and shows a “test” label in the macOS Dock. If system Chrome is not installed, it falls back to Chrome for Testing. ### Config Files and `BRIDGIC_BROWSER_JSON` Values @@ -31,6 +33,7 @@ Notes: | `viewport` | `{ "width": int, "height": int }` or `null` | Default `1600x900` when `no_viewport` is not set. | | `user_data_dir` | string (path) | Custom path for persistent profile. Ignored when `clear_user_data=true`. | | `clear_user_data` | `true | false` | Default `false`. If `true`, use ephemeral session (`launch`+`new_context`, no profile saved). If `false`, use persistent profile (defaults to `~/.bridgic/bridgic-browser/user_data/`). | +| `cdp_url` | string (ws:// URL) | Connect to existing Chrome via CDP instead of launching. Set via `BRIDGIC_CDP` env var or `--cdp` CLI flag (not via config JSON). | | `stealth` | `true | false` or object | Object uses the StealthConfig keys below. | | `channel` | string | Examples: `"chrome"`, `"msedge"`, `"chromium"`. | | `executable_path` | string (path) | Custom browser binary path. | diff --git a/skills/bridgic-browser/references/sdk-guide.md b/skills/bridgic-browser/references/sdk-guide.md index 76445d9..1d3b815 100644 --- a/skills/bridgic-browser/references/sdk-guide.md +++ b/skills/bridgic-browser/references/sdk-guide.md @@ -10,9 +10,10 @@ Use this guide when the output should be Python automation code (`bridgic.browse 4. [Snapshot and Ref Rules](#snapshot-and-ref-rules) 5. [Frequent SDK Methods](#frequent-sdk-methods) 6. [Tool Set Builder (for Agent Integration)](#tool-set-builder-for-agent-integration) -7. [Non-Obvious SDK Behavior](#non-obvious-sdk-behavior) -8. [SDK Error Handling](#sdk-error-handling) -9. [When to Load Other References](#when-to-load-other-references) +7. [CDP Mode (Connect to Existing Browser)](#cdp-mode-connect-to-existing-browser) +8. [Non-Obvious SDK Behavior](#non-obvious-sdk-behavior) +9. [SDK Error Handling](#sdk-error-handling) +10. [When to Load Other References](#when-to-load-other-references) ## Installation and Imports @@ -123,6 +124,29 @@ builder2 = BrowserToolSetBuilder.for_tool_names(browser, "verify_url") tools = [*builder1.build()["tool_specs"], *builder2.build()["tool_specs"]] ``` +## CDP Mode (Connect to Existing Browser) + +To connect to an already-running Chrome instead of launching a new one, pass `cdp_url`: + +```python +browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/...") +``` + +Use `resolve_cdp_input()` to convert user-friendly formats (port, URL, `"auto"`) into a WebSocket URL: + +```python +from bridgic.browser import resolve_cdp_input + +ws_url = resolve_cdp_input("9222") # queries localhost:9222/json/version +ws_url = resolve_cdp_input("auto") # scans local Chrome/Brave/Edge profiles +browser = Browser(cdp_url=ws_url) +``` + +Notes: +- Stealth launch args are **not** applied (the Chrome process is already running), but the JS init script is still registered for new pages. +- `close()` disconnects from the remote browser but does **not** terminate the Chrome process. +- The daemon auto-reconnects once if the CDP session drops (useful for cloud browser session timeouts). + ## Non-Obvious SDK Behavior - `wait_for` uses seconds for all time parameters: diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index 9b49809..d5a1800 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -555,10 +555,19 @@ async def test_stop_auto_saves_active_trace_and_video(self, mock_playwright): context.tracing = MagicMock() context.tracing.stop = AsyncMock() context.pages = [page] - - page.video = MagicMock() - page.video.path = AsyncMock(return_value="/tmp/playwright-video.webm") - page.video.save_as = AsyncMock() + context.remove_listener = MagicMock() + + # Create a mock CDP screencast recorder + import tempfile + _tmp_video_fd, _tmp_video_path = tempfile.mkstemp(suffix=".webm") + os.close(_tmp_video_fd) + mock_recorder = MagicMock() + mock_recorder.stop = AsyncMock(return_value=_tmp_video_path) + browser._video_recorders = {page: mock_recorder} + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } context_key = browser_module._get_context_key(context) browser._tracing_state[context_key] = True @@ -572,12 +581,10 @@ async def test_stop_auto_saves_active_trace_and_video(self, mock_playwright): assert "close-" in trace_path assert trace_path.endswith("trace.zip") - page.close.assert_awaited() + mock_recorder.stop.assert_awaited_once() assert browser._last_shutdown_artifacts["trace"] == [os.path.abspath(trace_path)] - # Video saved via save_as into session dir assert len(browser._last_shutdown_artifacts["video"]) == 1 video_path = browser._last_shutdown_artifacts["video"][0] - assert "close-" in video_path assert "video" in video_path assert context_key not in browser._tracing_state assert context_key not in browser._video_state @@ -601,10 +608,19 @@ async def test_stop_reports_auto_saved_paths(self, mock_playwright): context.tracing = MagicMock() context.tracing.stop = AsyncMock() context.pages = [page] - - page.video = MagicMock() - page.video.path = AsyncMock(return_value="/tmp/auto_video.webm") - page.video.save_as = AsyncMock() + context.remove_listener = MagicMock() + + # Create a mock CDP screencast recorder + import tempfile + _tmp_video_fd, _tmp_video_path = tempfile.mkstemp(suffix=".webm") + os.close(_tmp_video_fd) + mock_recorder = MagicMock() + mock_recorder.stop = AsyncMock(return_value=_tmp_video_path) + browser._video_recorders = {page: mock_recorder} + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } context_key = browser_module._get_context_key(context) browser._tracing_state[context_key] = True @@ -616,6 +632,94 @@ async def test_stop_reports_auto_saved_paths(self, mock_playwright): assert "trace.zip" in result assert "video" in result + @pytest.mark.asyncio + async def test_close_auto_stops_cdp_recorder(self, mock_playwright): + """close() should auto-stop the CDP screencast recorder and save the video.""" + from bridgic.browser.session import _browser as browser_module + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + context = browser._context + page = browser._page + assert context is not None + assert page is not None + context.pages = [page] + context.remove_listener = MagicMock() + + # Create a mock VideoRecorder + import tempfile + _tmp_fd, _tmp_path = tempfile.mkstemp(suffix=".webm") + os.close(_tmp_fd) + mock_recorder = MagicMock() + mock_recorder.stop = AsyncMock(return_value=_tmp_path) + browser._video_recorders = {page: mock_recorder} + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } + + context_key = browser_module._get_context_key(context) + browser._video_state[context_key] = True + + await browser.close() + + mock_recorder.stop.assert_awaited_once() + assert browser._video_recorders == {} + assert browser._video_session is None + assert len(browser._last_shutdown_artifacts["video"]) == 1 + assert context_key not in browser._video_state + + @pytest.mark.asyncio + async def test_close_page_auto_stops_recorder(self, mock_playwright): + """_close_page() should auto-stop the recorder when closing the recorded page.""" + from bridgic.browser.session import _browser as browser_module + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + context = browser._context + page = browser._page + assert context is not None + assert page is not None + + # Mock a second page so _close_page has a tab to switch to + second_page = MagicMock() + second_page.url = "https://example.com/2" + second_page.title = AsyncMock(return_value="Page 2") + second_page.close = AsyncMock() + context.pages = [page, second_page] + + # Set up mock recorder on the current page only (second page + # is not being recorded in this scenario) + mock_recorder = MagicMock() + mock_recorder.stop = AsyncMock(return_value="/tmp/rec.webm") + browser._video_recorders = {page: mock_recorder} + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } + + context_key = browser_module._get_context_key(context) + browser._video_state[context_key] = True + + # Close the page that has the recorder + success, msg = await browser._close_page(page) + assert success + + # Recorder for the closed page should have been stopped and removed + mock_recorder.stop.assert_awaited_once() + assert page not in browser._video_recorders + # Session stays active since other tabs may still be recording + assert browser._video_session is not None + assert browser._video_state.get(context_key) is True + @pytest.mark.asyncio async def test_stop_warns_on_trace_finalize_failure(self, mock_playwright): """stop() should report warnings when trace auto-save fails.""" @@ -664,11 +768,6 @@ async def test_stop_clears_page_scoped_handlers_before_auto_video_finalize(self, assert page is not None context.pages = [page] - page.video = MagicMock() - page.video.path = AsyncMock(return_value="/tmp/auto_listener_video.webm") - - context_key = browser_module._get_context_key(context) - browser._video_state[context_key] = True page_key = browser_module._get_page_key(page) console_handler = MagicMock() @@ -714,6 +813,129 @@ async def _slow_close(): for warning in browser._last_shutdown_errors ) + def test_last_close_properties_default_empty_before_close(self): + """Before any close() runs, both properties return empty defaults.""" + browser = Browser(stealth=False) + assert browser.last_close_artifacts == {"trace": [], "video": []} + assert browser.last_close_errors == [] + + @pytest.mark.asyncio + async def test_last_close_properties_after_clean_close(self, mock_playwright): + """A clean close() with no tracing/video leaves both properties empty.""" + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + await browser.close() + + assert browser.last_close_artifacts == {"trace": [], "video": []} + assert browser.last_close_errors == [] + + @pytest.mark.asyncio + async def test_last_close_properties_populated_after_trace_video_close(self, mock_playwright): + """close() with active trace+video populates the properties, and the + returned objects are independent copies (mutating them does not affect + the browser's internal state).""" + from bridgic.browser.session import _browser as browser_module + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + context = browser._context + page = browser._page + assert context is not None + assert page is not None + + context.tracing = MagicMock() + context.tracing.stop = AsyncMock() + context.pages = [page] + context.remove_listener = MagicMock() + + import tempfile + _tmp_video_fd, _tmp_video_path = tempfile.mkstemp(suffix=".webm") + os.close(_tmp_video_fd) + mock_recorder = MagicMock() + mock_recorder.stop = AsyncMock(return_value=_tmp_video_path) + browser._video_recorders = {page: mock_recorder} + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } + + context_key = browser_module._get_context_key(context) + browser._tracing_state[context_key] = True + browser._video_state[context_key] = True + + await browser.close() + + artifacts = browser.last_close_artifacts + assert len(artifacts["trace"]) == 1 + assert artifacts["trace"][0].endswith("trace.zip") + assert len(artifacts["video"]) == 1 + assert "video" in artifacts["video"][0] + assert browser.last_close_errors == [] + + # Defensive copy: mutating the returned dict and lists must + # not affect the browser's stored state. + artifacts["trace"].clear() + artifacts["video"].clear() + artifacts["trace"].append("hacked") + errors = browser.last_close_errors + errors.append("hacked") + + re_read = browser.last_close_artifacts + assert len(re_read["trace"]) == 1 + assert re_read["trace"][0].endswith("trace.zip") + assert len(re_read["video"]) == 1 + assert browser.last_close_errors == [] + + @pytest.mark.asyncio + async def test_inspect_close_artifacts_skips_dir_when_nothing_active(self, mock_playwright): + """Regression: SDK close() with no tracing/video must not leak an + empty close-session directory under BRIDGIC_TMP_DIR. + + Previously inspect_pending_close_artifacts() always created a + ``close--/`` directory, so every plain ``Browser.close()`` + accumulated an empty directory for the SDK user. The fix returns an + empty ``session_dir`` when there is nothing to write, and close() + propagates that — no directory should be created. + """ + from bridgic.browser._constants import BRIDGIC_TMP_DIR + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + # Snapshot the existing close-* directories so we can verify + # nothing new was created (the directory may already exist + # from prior tests/sessions in the same temp root). + tmp_root = Path(str(BRIDGIC_TMP_DIR)) + before = set() + if tmp_root.exists(): + before = {p.name for p in tmp_root.iterdir() if p.name.startswith("close-")} + + artifacts = browser.inspect_pending_close_artifacts() + assert artifacts["session_dir"] == "" + assert artifacts["trace"] == [] + assert artifacts["video"] == [] + assert browser._close_session_dir is None + + await browser.close() + + after = set() + if tmp_root.exists(): + after = {p.name for p in tmp_root.iterdir() if p.name.startswith("close-")} + new_dirs = after - before + assert new_dirs == set(), ( + f"close() leaked empty session dirs: {new_dirs}" + ) + @pytest.mark.asyncio async def test_ensure_started_recovers_from_inconsistent_state(self, mock_playwright, mock_context, mock_page): """_ensure_started() resets cleanly when _playwright is set but _context is None.""" @@ -732,6 +954,32 @@ async def test_ensure_started_recovers_from_inconsistent_state(self, mock_playwr assert browser._playwright is not None assert browser._context is not None + @pytest.mark.asyncio + async def test_launch_mode_close_records_page_close_failure(self, mock_playwright, mock_page): + """Launch / persistent mode: page.close() failures must be recorded in + _last_shutdown_errors, mirroring the borrowed-CDP branch (symmetry). + + Regression guard for H1: the non-borrowed branch in Browser.close() used + to silently swallow regular Exception results from asyncio.gather(). + """ + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + # Simulate page.close() raising a regular Exception (not BaseException). + mock_page.close = AsyncMock(side_effect=RuntimeError("page-boom")) + + await browser.close() + + assert any("page-boom" in e for e in browser._last_shutdown_errors), ( + f"expected 'page-boom' in errors, got: {browser._last_shutdown_errors}" + ) + # Downstream cleanup must still complete. + assert browser._page is None + assert browser._context is None + class TestBrowserNavigation: """Tests for Browser navigation methods.""" @@ -1259,3 +1507,840 @@ async def test_aria_ref_iframe_uses_frame_locator_chain(self): browser._page.frame_locator.assert_called_once_with("iframe") frame_locator_mock.nth.assert_called_once_with(0) nth_mock.locator.assert_called_once_with("aria-ref=f1e99") + + +# ───────────────────────────────────────────────────────────────────────────── +# Browser._start() CDP mode +# ───────────────────────────────────────────────────────────────────────────── + +class TestBrowserStartCdp: + """Tests for Browser._start() in CDP connect mode (connect_over_cdp).""" + + def _make_cdp_mocks(self, pages=None, contexts_count=1): + """Return (mock_pw, mock_cdp_browser, mock_ctx, mock_page) tuple.""" + mock_pg = MagicMock() + mock_pg.bring_to_front = AsyncMock() + + mock_ctx = MagicMock() + mock_ctx.add_init_script = AsyncMock() + mock_ctx.new_page = AsyncMock(return_value=mock_pg) + mock_ctx.pages = pages if pages is not None else [mock_pg] + + mock_cdp_browser = MagicMock() + mock_cdp_browser.contexts = [mock_ctx] * contexts_count + mock_cdp_browser.new_context = AsyncMock(return_value=mock_ctx) + + mock_pw = MagicMock() + mock_pw.chromium.connect_over_cdp = AsyncMock(return_value=mock_cdp_browser) + mock_pw.stop = AsyncMock() + + return mock_pw, mock_cdp_browser, mock_ctx, mock_pg + + @pytest.mark.asyncio + async def test_cdp_url_calls_connect_over_cdp(self): + mock_pw, mock_cdp_brow, mock_ctx, _ = self._make_cdp_mocks() + cdp_url = "ws://localhost:9222/devtools/browser/abc" + browser = Browser(cdp_url=cdp_url, stealth=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_pw.chromium.connect_over_cdp.assert_awaited_once_with(cdp_url) + mock_pw.chromium.launch.assert_not_called() + + @pytest.mark.asyncio + async def test_existing_contexts_reused(self): + mock_pw, mock_cdp_brow, mock_ctx, _ = self._make_cdp_mocks() + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + assert browser._context is mock_ctx + mock_cdp_brow.new_context.assert_not_called() + + @pytest.mark.asyncio + async def test_empty_contexts_calls_new_context(self): + mock_pw, mock_cdp_brow, mock_ctx, _ = self._make_cdp_mocks(contexts_count=0) + mock_cdp_brow.contexts = [] + mock_cdp_brow.new_context = AsyncMock(return_value=mock_ctx) + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_cdp_brow.new_context.assert_awaited_once() + + @pytest.mark.asyncio + async def test_stealth_true_headless_calls_add_init_script(self): + mock_pw, _, mock_ctx, _ = self._make_cdp_mocks() + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=True, headless=True) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_ctx.add_init_script.assert_awaited_once() + + @pytest.mark.asyncio + async def test_stealth_true_headed_skips_init_script(self): + """Headed CDP mode must skip init script (same as non-CDP headed mode).""" + mock_pw, _, mock_ctx, _ = self._make_cdp_mocks() + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=True, headless=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_ctx.add_init_script.assert_not_called() + + @pytest.mark.asyncio + async def test_stealth_false_no_add_init_script(self): + mock_pw, _, mock_ctx, _ = self._make_cdp_mocks() + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_ctx.add_init_script.assert_not_called() + + @pytest.mark.asyncio + async def test_cdp_always_creates_new_page_in_borrowed_context(self): + """CDP mode must NEVER reuse a borrowed user tab. Always create a new + bridgic-owned page so the user's existing tabs stay untouched.""" + page1, page2 = MagicMock(), MagicMock() + page1.bring_to_front = AsyncMock() + page2.bring_to_front = AsyncMock() + # mock_pg is the page returned by mock_ctx.new_page() — this is the + # page bridgic should adopt as self._page, NOT page2. + mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks(pages=[page1, page2]) + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_ctx.new_page.assert_awaited_once() + assert browser._page is mock_pg + assert browser._page is not page2 # CRITICAL: never hijack user's tab + assert browser._page in browser._cdp_owned_pages + + @pytest.mark.asyncio + async def test_cdp_new_page_called_unconditionally(self): + """Even when the borrowed context has no pages, _start() still calls + new_page() and tracks the result in _cdp_owned_pages.""" + mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks(pages=[]) + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=False) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + mock_ctx.new_page.assert_awaited_once() + assert browser._page is mock_pg + assert browser._page in browser._cdp_owned_pages + + @pytest.mark.asyncio + async def test_download_manager_attached(self, tmp_path): + mock_pw, _, mock_ctx, _ = self._make_cdp_mocks() + downloads_dir = tmp_path / "dl" + downloads_dir.mkdir() + browser = Browser( + cdp_url="ws://localhost:9222/devtools/browser/abc", + stealth=False, + downloads_path=str(downloads_dir), + ) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + with patch.object(browser._download_manager, "attach_to_context") as mock_attach: + await browser._start() + mock_attach.assert_called_once_with(mock_ctx) + + +# ───────────────────────────────────────────────────────────────────────────── +# Browser.use_persistent_context — CDP mode +# ───────────────────────────────────────────────────────────────────────────── + +class TestBrowserUsePersistentContextCdp: + """Tests for use_persistent_context property in CDP vs normal mode.""" + + def test_cdp_url_returns_false(self): + browser = Browser( + cdp_url="ws://localhost:9222/devtools/browser/abc", + user_data_dir="/tmp/profile", + ) + assert browser.use_persistent_context is False + + def test_no_cdp_with_user_data_dir_returns_true(self): + browser = Browser(user_data_dir="/tmp/profile") + assert browser.use_persistent_context is True + + +# ───────────────────────────────────────────────────────────────────────────── +# Browser.close() — CDP mode +# ───────────────────────────────────────────────────────────────────────────── + +class TestBrowserCloseCdp: + """Tests for Browser.close() in CDP mode — must disconnect without + destroying pages/context in the remote browser.""" + + def _make_cdp_mocks(self, pages=None, contexts_count=1): + """Return (mock_pw, mock_cdp_browser, mock_ctx, mock_page) tuple. + + ``mock_page`` is the page returned by ``mock_ctx.new_page()`` — i.e. the + bridgic-owned page in CDP mode.""" + mock_pg = MagicMock() + mock_pg.bring_to_front = AsyncMock() + mock_pg.close = AsyncMock() + mock_pg.goto = AsyncMock() + mock_pg.video = None + mock_pg.is_closed = MagicMock(return_value=False) + + mock_ctx = MagicMock() + mock_ctx.add_init_script = AsyncMock() + mock_ctx.new_page = AsyncMock(return_value=mock_pg) + mock_ctx.pages = pages if pages is not None else [mock_pg] + mock_ctx.close = AsyncMock() + mock_ctx.tracing = MagicMock() + mock_ctx.tracing.stop = AsyncMock() + + mock_cdp_browser = MagicMock() + mock_cdp_browser.contexts = [mock_ctx] * contexts_count + mock_cdp_browser.new_context = AsyncMock(return_value=mock_ctx) + mock_cdp_browser.close = AsyncMock() + + mock_pw = MagicMock() + mock_pw.chromium.connect_over_cdp = AsyncMock(return_value=mock_cdp_browser) + mock_pw.stop = AsyncMock() + + return mock_pw, mock_cdp_browser, mock_ctx, mock_pg + + async def _start_cdp_browser(self, mock_pw, *, cdp_url="ws://localhost:9222/devtools/browser/abc", **kwargs): + """Create and start a Browser in CDP mode.""" + browser = Browser(cdp_url=cdp_url, stealth=False, **kwargs) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + return browser + + @pytest.mark.asyncio + async def test_cdp_close_does_not_close_borrowed_pages(self): + """close() in CDP borrowed context must NOT close any user tabs. + It MUST close the bridgic-owned page (the one new_page() returned).""" + # Set up two distinct pages: a borrowed user tab and the bridgic-owned + # page that mock_ctx.new_page() returns. + borrowed_pg = MagicMock() + borrowed_pg.close = AsyncMock() + borrowed_pg.goto = AsyncMock() + borrowed_pg.bring_to_front = AsyncMock() + borrowed_pg.video = None + borrowed_pg.is_closed = MagicMock(return_value=False) + + mock_pw, _, mock_ctx, bridgic_pg = self._make_cdp_mocks(pages=[borrowed_pg]) + bridgic_pg.is_closed = MagicMock(return_value=False) + browser = await self._start_cdp_browser(mock_pw) + + # After _start: bridgic should have created its own page; borrowed page untouched. + assert browser._page is bridgic_pg + assert bridgic_pg in browser._cdp_owned_pages + + await browser.close() + + # User's borrowed tab MUST NOT be closed. + borrowed_pg.close.assert_not_called() + # bridgic's owned page MUST be closed. + bridgic_pg.close.assert_awaited_once() + + @pytest.mark.asyncio + async def test_cdp_close_does_not_close_borrowed_context(self): + """close() in CDP mode must NOT call context.close() on borrowed context.""" + mock_pw, _, mock_ctx, _ = self._make_cdp_mocks() + browser = await self._start_cdp_browser(mock_pw) + + assert browser._cdp_context_owned is False + await browser.close() + + mock_ctx.close.assert_not_called() + + @pytest.mark.asyncio + async def test_cdp_close_closes_owned_context(self): + """close() in CDP mode SHOULD close a context that bridgic created.""" + mock_pw, mock_cdp_browser, mock_ctx, _ = self._make_cdp_mocks(contexts_count=0) + mock_cdp_browser.contexts = [] + browser = await self._start_cdp_browser(mock_pw) + + assert browser._cdp_context_owned is True + await browser.close() + + mock_ctx.close.assert_awaited_once() + + @pytest.mark.asyncio + async def test_cdp_close_does_not_navigate_about_blank(self): + """close() in CDP mode must NOT navigate pages to about:blank.""" + mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks() + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + mock_pg.goto.assert_not_called() + + @pytest.mark.asyncio + async def test_cdp_close_disconnects_browser(self): + """close() in CDP mode must call _browser.close() to disconnect.""" + mock_pw, mock_cdp_browser, _, _ = self._make_cdp_mocks() + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + mock_cdp_browser.close.assert_awaited_once() + + @pytest.mark.asyncio + async def test_cdp_close_stops_playwright(self): + """close() in CDP mode must stop the Playwright driver.""" + mock_pw, _, _, _ = self._make_cdp_mocks() + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + mock_pw.stop.assert_awaited_once() + + @pytest.mark.asyncio + async def test_cdp_close_clears_internal_references(self): + """close() in CDP mode must clear all internal references.""" + mock_pw, _, _, _ = self._make_cdp_mocks() + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + assert browser._playwright is None + assert browser._browser is None + assert browser._context is None + assert browser._page is None + + @pytest.mark.asyncio + async def test_cdp_close_multiple_borrowed_pages_not_closed(self): + """close() in CDP borrowed mode must leave all user tabs alone but + still close the bridgic-owned page.""" + page1 = MagicMock() + page1.close = AsyncMock() + page1.goto = AsyncMock() + page1.bring_to_front = AsyncMock() + page1.video = None + page1.is_closed = MagicMock(return_value=False) + page2 = MagicMock() + page2.close = AsyncMock() + page2.goto = AsyncMock() + page2.bring_to_front = AsyncMock() + page2.video = None + page2.is_closed = MagicMock(return_value=False) + mock_pw, _, mock_ctx, bridgic_pg = self._make_cdp_mocks(pages=[page1, page2]) + bridgic_pg.is_closed = MagicMock(return_value=False) + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + # User tabs MUST NOT be closed or navigated. + page1.close.assert_not_called() + page2.close.assert_not_called() + page1.goto.assert_not_called() + page2.goto.assert_not_called() + # bridgic's owned page MUST be closed. + bridgic_pg.close.assert_awaited_once() + + # --- Owned CDP context: pages and context ARE cleaned up --- + + @pytest.mark.asyncio + async def test_cdp_owned_context_closes_page(self): + """Owned CDP context: page.close() IS called (bridgic created it).""" + mock_pw, mock_cdp_browser, mock_ctx, mock_pg = self._make_cdp_mocks(contexts_count=0) + mock_cdp_browser.contexts = [] + browser = await self._start_cdp_browser(mock_pw) + + assert browser._cdp_context_owned is True + await browser.close() + + # page.close() is called (either directly or via extra-pages loop) + assert mock_pg.close.await_count >= 1 + + @pytest.mark.asyncio + async def test_cdp_owned_context_closes_pages(self): + """Owned CDP context: pages are closed in parallel before context close.""" + mock_pw, mock_cdp_browser, mock_ctx, mock_pg = self._make_cdp_mocks(contexts_count=0) + mock_cdp_browser.contexts = [] + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + # Pages are closed via parallel asyncio.gather with run_before_unload=False + mock_pg.close.assert_awaited() + + +# ───────────────────────────────────────────────────────────────────────────── +# Browser._cdp_owned_pages — bridgic-owned page tracking in CDP mode +# ───────────────────────────────────────────────────────────────────────────── + +class TestBrowserCdpOwnedPages: + """Tests for the _cdp_owned_pages tracking set used to clean up only + bridgic-created tabs in CDP borrowed-context mode.""" + + def _make_cdp_mocks(self, pages=None, contexts_count=1, new_page_factory=None): + """Return (mock_pw, mock_cdp_browser, mock_ctx, mock_page) tuple.""" + mock_pg = MagicMock() + mock_pg.bring_to_front = AsyncMock() + mock_pg.close = AsyncMock() + mock_pg.goto = AsyncMock() + mock_pg.video = None + mock_pg.is_closed = MagicMock(return_value=False) + + mock_ctx = MagicMock() + mock_ctx.add_init_script = AsyncMock() + if new_page_factory is not None: + mock_ctx.new_page = AsyncMock(side_effect=new_page_factory) + else: + mock_ctx.new_page = AsyncMock(return_value=mock_pg) + mock_ctx.pages = pages if pages is not None else [mock_pg] + mock_ctx.close = AsyncMock() + mock_ctx.tracing = MagicMock() + mock_ctx.tracing.stop = AsyncMock() + + mock_cdp_browser = MagicMock() + mock_cdp_browser.contexts = [mock_ctx] * contexts_count + mock_cdp_browser.new_context = AsyncMock(return_value=mock_ctx) + mock_cdp_browser.close = AsyncMock() + + mock_pw = MagicMock() + mock_pw.chromium.connect_over_cdp = AsyncMock(return_value=mock_cdp_browser) + mock_pw.stop = AsyncMock() + + return mock_pw, mock_cdp_browser, mock_ctx, mock_pg + + async def _start_cdp_browser(self, mock_pw, **kwargs): + browser = Browser( + cdp_url="ws://localhost:9222/devtools/browser/abc", + stealth=False, + **kwargs, + ) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + return browser + + @staticmethod + def _make_page(name="page"): + p = MagicMock(name=name) + p.bring_to_front = AsyncMock() + p.close = AsyncMock() + p.goto = AsyncMock() + p.video = None + p.is_closed = MagicMock(return_value=False) + return p + + @pytest.mark.asyncio + async def test_cdp_owned_page_added_on_start(self): + """After _start() in CDP mode, _cdp_owned_pages contains exactly the + bridgic-owned page.""" + mock_pw, _, _, mock_pg = self._make_cdp_mocks(pages=[self._make_page("user")]) + browser = await self._start_cdp_browser(mock_pw) + + assert len(browser._cdp_owned_pages) == 1 + assert browser._page in browser._cdp_owned_pages + assert browser._page is mock_pg + + @pytest.mark.asyncio + async def test_cdp_borrowed_close_keeps_borrowed_pages_closes_owned(self): + """Two borrowed user tabs survive close(); the bridgic-owned page is closed.""" + u1 = self._make_page("u1") + u2 = self._make_page("u2") + mock_pw, _, _, bridgic_pg = self._make_cdp_mocks(pages=[u1, u2]) + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + u1.close.assert_not_called() + u2.close.assert_not_called() + bridgic_pg.close.assert_awaited_once() + + @pytest.mark.asyncio + async def test_cdp_owned_page_already_closed_handled(self): + """If the bridgic-owned page was already closed (user closed via Chrome + UI), close() must NOT call page.close() on it and must not raise.""" + mock_pw, _, _, bridgic_pg = self._make_cdp_mocks(pages=[self._make_page("u")]) + browser = await self._start_cdp_browser(mock_pw) + + # Simulate user closing the bridgic tab manually before close(). + bridgic_pg.is_closed = MagicMock(return_value=True) + + await browser.close() + + bridgic_pg.close.assert_not_called() + assert browser._cdp_owned_pages == set() + + @pytest.mark.asyncio + async def test_cdp_owned_page_close_failure_recorded(self): + """If a bridgic-owned page raises during close(), the error is recorded + in _last_shutdown_errors and close() still completes.""" + mock_pw, _, _, bridgic_pg = self._make_cdp_mocks(pages=[self._make_page("u")]) + bridgic_pg.close = AsyncMock(side_effect=RuntimeError("boom")) + browser = await self._start_cdp_browser(mock_pw) + + await browser.close() + + # close() must complete and record the failure + assert any("boom" in e for e in browser._last_shutdown_errors), ( + f"expected error to mention 'boom', got: {browser._last_shutdown_errors}" + ) + # downstream cleanup still ran + assert browser._page is None + assert browser._context is None + + @pytest.mark.asyncio + async def test_cdp_new_page_helper_tracks_owned(self): + """In CDP mode, calling _new_page() also adds the new page to + _cdp_owned_pages so the CLI's `new-tab` command stays trackable.""" + first = MagicMock(name="first") + first.bring_to_front = AsyncMock() + first.close = AsyncMock() + first.goto = AsyncMock() + first.video = None + first.is_closed = MagicMock(return_value=False) + + second = MagicMock(name="second") + second.bring_to_front = AsyncMock() + second.close = AsyncMock() + second.goto = AsyncMock() + second.video = None + second.is_closed = MagicMock(return_value=False) + + call_count = {"n": 0} + + async def _factory(): + call_count["n"] += 1 + return first if call_count["n"] == 1 else second + + mock_pw, _, _, _ = self._make_cdp_mocks( + pages=[self._make_page("u")], new_page_factory=_factory + ) + browser = await self._start_cdp_browser(mock_pw) + + # _start() consumed first via new_page() + assert browser._page is first + assert first in browser._cdp_owned_pages + assert len(browser._cdp_owned_pages) == 1 + + # second invocation via _new_page() should track the new page too + result = await browser._new_page() + assert result is second + assert second in browser._cdp_owned_pages + assert len(browser._cdp_owned_pages) == 2 + + @pytest.mark.asyncio + async def test_navigate_to_recovery_tracks_owned_in_cdp(self): + """In CDP mode, navigate_to() recovery (when self._page is None) must + track the recovery-created page in _cdp_owned_pages.""" + first = self._make_page("first") + recovery = self._make_page("recovery") + + call_count = {"n": 0} + + async def _factory(): + call_count["n"] += 1 + return first if call_count["n"] == 1 else recovery + + mock_pw, _, _, _ = self._make_cdp_mocks( + pages=[self._make_page("u")], new_page_factory=_factory + ) + browser = await self._start_cdp_browser(mock_pw) + + assert browser._page is first + assert first in browser._cdp_owned_pages + + # Simulate "all tabs closed" — navigate_to() will create a new page. + browser._page = None + await browser.navigate_to("about:blank") + + assert browser._page is recovery + assert recovery in browser._cdp_owned_pages + assert len(browser._cdp_owned_pages) == 2 + + @pytest.mark.asyncio + async def test_owned_pages_cleared_after_close(self): + """_cdp_owned_pages must be reset to an empty set after close().""" + mock_pw, _, _, _ = self._make_cdp_mocks(pages=[self._make_page("u")]) + browser = await self._start_cdp_browser(mock_pw) + + assert len(browser._cdp_owned_pages) == 1 + await browser.close() + + assert browser._cdp_owned_pages == set() + + @pytest.mark.asyncio + async def test_close_page_discards_from_cdp_owned_pages(self): + """_close_page() must remove the closed page from _cdp_owned_pages. + + Regression guard for M1: long-running CDP daemon would otherwise leak + Page references for every new-tab + close-tab cycle, holding onto + frames, listeners, and cached resources. + """ + first = self._make_page("first") + second = self._make_page("second") + # _close_page() awaits title() on the new active page after switching. + second.title = AsyncMock(return_value="second-title") + second.url = "https://second.example/" + + call_count = {"n": 0} + + async def _factory(): + call_count["n"] += 1 + return first if call_count["n"] == 1 else second + + mock_pw, _, mock_ctx, _ = self._make_cdp_mocks( + pages=[self._make_page("u")], new_page_factory=_factory + ) + browser = await self._start_cdp_browser(mock_pw) + + # _start() consumed first via new_page() and added it to the owned set. + assert first in browser._cdp_owned_pages + assert browser._page is first + + # Open a second bridgic-owned tab; ownership grows to 2. + second_returned = await browser._new_page() + assert second_returned is second + assert second in browser._cdp_owned_pages + assert len(browser._cdp_owned_pages) == 2 + + # After _close_page(first), the post-close switch reads + # self._context.pages[0] — make that be `second` so title() works. + mock_ctx.pages = [second] + + await browser._close_page(first) + + # first must vanish from the owned set; second must remain. + assert first not in browser._cdp_owned_pages + assert second in browser._cdp_owned_pages + assert len(browser._cdp_owned_pages) == 1 + + @pytest.mark.asyncio + async def test_cdp_owned_pages_unused_in_launch_mode(self): + """Launch mode (no cdp_url) must NOT touch _cdp_owned_pages — the + tracking logic is CDP-specific and must not leak elsewhere.""" + mock_pg = MagicMock() + mock_pg.bring_to_front = AsyncMock() + mock_pg.close = AsyncMock() + mock_pg.video = None + mock_pg.is_closed = MagicMock(return_value=False) + + new_pg = MagicMock() + new_pg.bring_to_front = AsyncMock() + new_pg.close = AsyncMock() + new_pg.video = None + new_pg.is_closed = MagicMock(return_value=False) + + mock_ctx = MagicMock() + mock_ctx.add_init_script = AsyncMock() + mock_ctx.new_page = AsyncMock(return_value=new_pg) + mock_ctx.pages = [mock_pg] + mock_ctx.close = AsyncMock() + mock_ctx.tracing = MagicMock() + mock_ctx.tracing.stop = AsyncMock() + + mock_browser = MagicMock() + mock_browser.new_context = AsyncMock(return_value=mock_ctx) + mock_browser.close = AsyncMock() + + mock_pw = MagicMock() + mock_pw.chromium.launch_persistent_context = AsyncMock(return_value=mock_ctx) + mock_pw.chromium.launch = AsyncMock(return_value=mock_browser) + mock_pw.stop = AsyncMock() + + browser = Browser(stealth=False, clear_user_data=True) + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_pw) + await browser._start() + # Launch mode: no tracking + assert browser._cdp_owned_pages == set() + await browser._new_page() + # _new_page should NOT add to the set in launch mode + assert browser._cdp_owned_pages == set() + + +# ───────────────────────────────────────────────────────────────────────────── +# find_cdp_url() — system proxy bypass for loopback hosts +# ───────────────────────────────────────────────────────────────────────────── + +class TestFindCdpUrlProxyBypass: + """find_cdp_url(mode="port") must bypass the system HTTP proxy when probing + loopback hosts (localhost / 127.0.0.1 / ::1) so a misconfigured proxy cannot + return misleading 502 errors for ports that are simply not listening. + + Remote hosts (cloud browser services, SSH-tunneled CDP, etc.) MUST keep + proxy support.""" + + def _make_fake_response(self, payload: dict): + """Return an object with a .read() method returning JSON bytes.""" + import json as _json + fake = MagicMock() + fake.read = MagicMock(return_value=_json.dumps(payload).encode("utf-8")) + return fake + + def test_find_cdp_url_localhost_bypasses_system_proxy(self, monkeypatch): + """Localhost probes must build an opener with empty ProxyHandler({}).""" + import urllib.request + from bridgic.browser.session import find_cdp_url + + # Set a system proxy that would obviously break the probe if used. + monkeypatch.setenv("HTTP_PROXY", "http://127.0.0.1:1") + monkeypatch.setenv("HTTPS_PROXY", "http://127.0.0.1:1") + + captured_handlers = [] + real_build_opener = urllib.request.build_opener + + def _spy_build_opener(*handlers): + captured_handlers.append(handlers) + opener = MagicMock() + opener.open = MagicMock( + return_value=self._make_fake_response( + {"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"} + ) + ) + return opener + + # Track whether default urlopen was used (it must NOT be). + urlopen_calls = [] + real_urlopen = urllib.request.urlopen + + def _spy_urlopen(*args, **kwargs): + urlopen_calls.append((args, kwargs)) + return self._make_fake_response( + {"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"} + ) + + monkeypatch.setattr(urllib.request, "build_opener", _spy_build_opener) + monkeypatch.setattr(urllib.request, "urlopen", _spy_urlopen) + + result = find_cdp_url(mode="port", host="localhost", port=9222) + + assert result == "ws://localhost:9222/devtools/browser/abc" + # build_opener was called once for the loopback bypass path. + assert len(captured_handlers) == 1, ( + f"Expected 1 build_opener call, got {len(captured_handlers)}" + ) + # The handler list must contain a ProxyHandler with empty proxies dict. + handler_types = [type(h).__name__ for h in captured_handlers[0]] + assert "ProxyHandler" in handler_types, ( + f"Expected ProxyHandler in handlers, got: {handler_types}" + ) + for h in captured_handlers[0]: + if isinstance(h, urllib.request.ProxyHandler): + # Empty dict means: no proxies, bypass system config entirely. + assert h.proxies == {}, ( + f"ProxyHandler must be constructed with empty dict, got: {h.proxies}" + ) + # Default urlopen must not be used for loopback hosts. + assert urlopen_calls == [], ( + f"Default urlopen must not be used for localhost, got: {urlopen_calls}" + ) + + def test_find_cdp_url_127_0_0_1_bypasses_system_proxy(self, monkeypatch): + """127.0.0.1 must also trigger the loopback bypass path.""" + import urllib.request + from bridgic.browser.session import find_cdp_url + + captured_handlers = [] + + def _spy_build_opener(*handlers): + captured_handlers.append(handlers) + opener = MagicMock() + opener.open = MagicMock( + return_value=self._make_fake_response( + {"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"} + ) + ) + return opener + + monkeypatch.setattr(urllib.request, "build_opener", _spy_build_opener) + + result = find_cdp_url(mode="port", host="127.0.0.1", port=9222) + + assert "ws://127.0.0.1:9222/devtools/browser/abc" == result + assert len(captured_handlers) == 1 + assert any( + isinstance(h, urllib.request.ProxyHandler) and h.proxies == {} + for h in captured_handlers[0] + ) + + def test_find_cdp_url_remote_uses_default_opener(self, monkeypatch): + """Remote hosts must keep proxy support and use the default urlopen.""" + import urllib.request + from bridgic.browser.session import find_cdp_url + + build_opener_calls = [] + + def _spy_build_opener(*handlers): + build_opener_calls.append(handlers) + return MagicMock() + + urlopen_calls = [] + + def _spy_urlopen(*args, **kwargs): + urlopen_calls.append((args, kwargs)) + return self._make_fake_response( + {"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"} + ) + + monkeypatch.setattr(urllib.request, "build_opener", _spy_build_opener) + monkeypatch.setattr(urllib.request, "urlopen", _spy_urlopen) + + result = find_cdp_url(mode="port", host="example.com", port=9222) + + # Remote host: replace localhost in the returned URL with the actual host. + assert result == "ws://example.com:9222/devtools/browser/abc" + # Loopback bypass branch must NOT have been taken. + assert build_opener_calls == [], ( + f"Remote host must not call build_opener, got {build_opener_calls}" + ) + # Default urlopen must have been used exactly once. + assert len(urlopen_calls) == 1, ( + f"Expected 1 urlopen call for remote host, got {len(urlopen_calls)}" + ) + + def test_find_cdp_url_localhost_returns_connection_error_when_port_dead(self): + """End-to-end check: probing a dead local port surfaces a clean + ConnectionError that mentions the port number, not a proxy-shaped + message like '502 Bad Gateway'. + + Note: the original macOS system-proxy bug cannot be reproduced via + env-var proxies in unit tests because urllib auto-bypasses 127.0.0.1 + for env-var proxies (proxy_bypass_environment). The two preceding tests + cover the bypass mechanism directly via build_opener spying. This test + guards against regressions in the basic localhost path.""" + import socket + from bridgic.browser.session import find_cdp_url + + # Find a free port by binding then releasing it. + s = socket.socket() + try: + s.bind(("127.0.0.1", 0)) + dead_port = s.getsockname()[1] + finally: + s.close() + + with pytest.raises(ConnectionError) as exc_info: + find_cdp_url(mode="port", host="127.0.0.1", port=dead_port) + + # Error message must mention the port and not look like a proxy error. + msg = str(exc_info.value) + assert str(dead_port) in msg, f"Expected port {dead_port} in error: {msg}" + assert "502" not in msg, f"Error must not mention 502 Bad Gateway: {msg}" + assert "Bad Gateway" not in msg, f"Error must not mention Bad Gateway: {msg}" + + +# ───────────────────────────────────────────────────────────────────────────── +# Public API exposure +# ───────────────────────────────────────────────────────────────────────────── + +class TestApiExposure: + """Smoke tests verifying find_cdp_url and resolve_cdp_input are callable + and present in the public API (bridgic.browser and bridgic.browser.session).""" + + def test_importable_from_bridgic_browser(self): + from bridgic.browser import find_cdp_url, resolve_cdp_input + assert callable(find_cdp_url) + assert callable(resolve_cdp_input) + + def test_importable_from_bridgic_browser_session(self): + from bridgic.browser.session import find_cdp_url, resolve_cdp_input + assert callable(find_cdp_url) + assert callable(resolve_cdp_input) + + def test_in_all(self): + import bridgic.browser as pkg + assert "find_cdp_url" in pkg.__all__ + assert "resolve_cdp_input" in pkg.__all__ diff --git a/tests/unit/test_browser_methods.py b/tests/unit/test_browser_methods.py index 206f24f..a4c439b 100644 --- a/tests/unit/test_browser_methods.py +++ b/tests/unit/test_browser_methods.py @@ -82,6 +82,14 @@ def _make_browser_with_mock_page() -> tuple: browser._dialog_handlers = {} browser._tracing_state = {} browser._video_state = {} + browser._video_recorders = {} + browser._video_session = None + # CDP-mode attributes — required by start_video / get_pages / _close_page + # which inspect them to decide whether to filter out user tabs. Tests in + # this file simulate launch-mode (non-CDP), so both default to "not CDP". + browser._cdp_url = None + browser._cdp_context_owned = False + browser._cdp_owned_pages = set() browser._context = MagicMock() browser._page = MagicMock() # get_current_page() returns self._page @@ -119,3 +127,229 @@ async def test_stop_tracing_guard(): with pytest.raises(StateError) as exc_info: await browser.stop_tracing() assert exc_info.value.code == "NO_ACTIVE_TRACING" + + +@pytest.mark.asyncio +async def test_start_video_uses_window_inner_dimensions_not_viewport_size(): + """Regression: start_video() must derive its recording size from + ``window.innerWidth/innerHeight`` (queried via JS), NOT from + ``page.viewport_size``. + + In CDP attach mode bridgic never calls ``setViewportSize`` on the + foreign Chrome, so ``page.viewport_size`` returns ``None`` and the + old code fell back to a hard-coded 800×600. Chrome then captured at + the real (e.g. 16:9) window aspect ratio and downsampled to fit + within 800×600, which: + 1. blurred the page (37% downscale) + 2. left a gray strip at the bottom from ffmpeg's pad filter + Querying the page directly avoids both. + """ + browser = _make_browser_with_mock_page() + + fake_context = MagicMock() + fake_context.pages = [] # no pages → no recorders to start + fake_context.on = MagicMock() + + fake_page = MagicMock() + fake_page.context = fake_context + # Simulate CDP attach mode: viewport_size is None. + fake_page.viewport_size = None + fake_page.is_closed = MagicMock(return_value=False) + # window.innerWidth/innerHeight reports the real window — 16:9, much + # larger than the old 800×600 fallback. + fake_page.evaluate = AsyncMock(return_value={"w": 1366, "h": 768}) + browser.get_current_page = AsyncMock(return_value=fake_page) + + await browser.start_video() + + # JS query was performed. + fake_page.evaluate.assert_awaited_once() + call_arg = fake_page.evaluate.await_args.args[0] + assert "innerWidth" in call_arg + assert "innerHeight" in call_arg + + # Recording size matches the queried dimensions, NOT the 800×600 + # fallback. (& ~1 rounds to even, both are already even here.) + session = browser._video_session + assert session is not None + assert session["width"] == 1366 + assert session["height"] == 768 + + # Cleanup so subsequent tests don't see a leaked session. + browser._video_session = None + browser._video_state.clear() + + +@pytest.mark.asyncio +async def test_start_video_falls_back_to_viewport_size_when_evaluate_fails(): + """If ``page.evaluate`` raises (hardened CSP, page closed mid-call, + etc.), start_video() should fall back to ``page.viewport_size`` + instead of crashing.""" + browser = _make_browser_with_mock_page() + + fake_context = MagicMock() + fake_context.pages = [] + fake_context.on = MagicMock() + + fake_page = MagicMock() + fake_page.context = fake_context + fake_page.viewport_size = {"width": 1280, "height": 800} + fake_page.is_closed = MagicMock(return_value=False) + fake_page.evaluate = AsyncMock(side_effect=RuntimeError("CSP blocked")) + browser.get_current_page = AsyncMock(return_value=fake_page) + + await browser.start_video() + + session = browser._video_session + assert session is not None + assert session["width"] == 1280 + assert session["height"] == 800 + + browser._video_session = None + browser._video_state.clear() + + +@pytest.mark.asyncio +async def test_start_video_already_active_does_not_destroy_existing_session(): + """Regression: a duplicate start_video() must raise VIDEO_ALREADY_ACTIVE + *without* tearing down the previously-started session. + + Earlier the rollback `except` block fired unconditionally, wiping out + `_video_session` and stopping every recorder in `_video_recorders` — + so calling `start_video()` twice silently destroyed the user's first + recording while reporting "already active". + """ + browser = _make_browser_with_mock_page() + + fake_context = MagicMock() + fake_context.pages = [] # no pages → no recorders to start + fake_context.on = MagicMock() + + fake_page = MagicMock() + fake_page.context = fake_context + fake_page.viewport_size = {"width": 800, "height": 600} + fake_page.is_closed = MagicMock(return_value=False) + browser.get_current_page = AsyncMock(return_value=fake_page) + + # First call: sets up a session. + await browser.start_video() + sentinel_session = browser._video_session + assert sentinel_session is not None + + # Second call: must error out without touching the existing session. + with pytest.raises(StateError) as exc_info: + await browser.start_video() + assert exc_info.value.code == "VIDEO_ALREADY_ACTIVE" + + assert browser._video_session is sentinel_session + assert browser._video_state # context_key entry still present + + +# --------------------------------------------------------------------------- +# CDP borrowed-context isolation: bridgic must never expose, record, or +# auto-switch to user-owned tabs in CDP borrowed mode. These tests cover +# the regression set R2-A / R2-B / R2-C found in the second-round CR. +# --------------------------------------------------------------------------- + +def _make_borrowed_cdp_browser_with_pages(owned_page, user_page): + """Build a Browser configured as if it had connected to a user's Chrome + via CDP, with one bridgic-owned tab and one user-owned tab in the + same context.""" + browser = _make_browser_with_mock_page() + browser._cdp_url = "ws://localhost:9222/devtools/browser/abc" + browser._cdp_context_owned = False # borrowed + browser._cdp_owned_pages = {owned_page} + fake_context = MagicMock() + # Order matters — get_pages preserves the underlying tab order + fake_context.pages = [user_page, owned_page] + browser._context = fake_context + browser._page = owned_page + return browser + + +def test_get_pages_filters_user_tabs_in_cdp_borrowed_mode(): + """get_pages must hide user-owned tabs when bridgic borrowed the context.""" + owned = MagicMock(name="bridgic_tab") + user = MagicMock(name="user_tab") + browser = _make_borrowed_cdp_browser_with_pages(owned, user) + + visible = browser.get_pages() + assert visible == [owned] + assert user not in visible + + +def test_get_pages_returns_all_pages_when_context_owned(): + """When bridgic owns the context (launch / persistent / owned-CDP), + every page in the context should be visible.""" + browser = _make_browser_with_mock_page() + browser._cdp_url = None # launch mode + browser._cdp_context_owned = False + p1 = MagicMock(name="p1") + p2 = MagicMock(name="p2") + browser._context.pages = [p1, p2] + + assert browser.get_pages() == [p1, p2] + + +@pytest.mark.asyncio +async def test_close_page_does_not_switch_to_user_tab_in_cdp_borrowed_mode(): + """Closing the last bridgic tab must NOT silently land self._page on + a user-owned tab — that would route subsequent commands into the + user's banking / email page. + """ + owned = MagicMock(name="bridgic_tab") + owned.close = AsyncMock() + owned.title = AsyncMock(return_value="bridgic") + user = MagicMock(name="user_tab") + browser = _make_borrowed_cdp_browser_with_pages(owned, user) + + success, _msg = await browser._close_page(owned) + assert success + # No bridgic-owned pages remain → self._page must be None, + # NOT the user's still-open tab. + assert browser._page is None + + +@pytest.mark.asyncio +async def test_start_video_records_all_tabs_in_cdp_borrowed_mode(): + """start_video() MUST install a recorder on every page (including the + user's existing tabs) when bridgic is a guest on a borrowed CDP context. + """ + owned = MagicMock(name="bridgic_tab") + owned.is_closed = MagicMock(return_value=False) + + user = MagicMock(name="user_tab") + user.is_closed = MagicMock(return_value=False) + + browser = _make_browser_with_mock_page() + browser._cdp_url = "ws://localhost:9222/devtools/browser/abc" + browser._cdp_context_owned = False + browser._cdp_owned_pages = {owned} + + fake_context = MagicMock() + fake_context.pages = [owned, user] + + owned.context = fake_context + user.context = fake_context + + fake_context.on = MagicMock() + browser._context = fake_context + + started: list = [] + + async def _fake_starter(page): + started.append(page) + + browser._start_page_video_recorder = _fake_starter # type: ignore[method-assign] + browser.get_current_page = AsyncMock(return_value=owned) + owned.evaluate = AsyncMock(return_value={"w": 1280, "h": 720}) + + await browser.start_video() + + # Both bridgic-owned tab AND the user's pre-existing tab must be recorded. + assert owned in started + assert user in started + + # Cleanup: avoid leaking the fake session into other tests. + browser._video_session = None + browser._video_state.clear() diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 5f32e70..6de2dc8 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -16,8 +16,10 @@ import logging import os import stat +import tempfile from types import SimpleNamespace -from typing import Any +from pathlib import Path +from typing import Any, Dict from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -32,6 +34,10 @@ ) from bridgic.browser.cli._commands import _strip_ref, cli, SectionedGroup from bridgic.browser.cli._daemon import ( + _BROWSER_CLOSED_HINT, + _browser_closed_hint, + _cdp_reconnect, + _resolve_default_downloads_dir, _dispatch, _handle_connection, _handle_open, @@ -234,6 +240,7 @@ def make_browser() -> MagicMock: "trace": [], "video": [], }) + b._cdp_url = None # explicit None so _dispatch treats as local-launch mode return b @@ -417,15 +424,73 @@ class TestCliCommandRouting: def test_open(self): _, sc = invoke(["open", "https://example.com"]) - sc.assert_called_once_with("open", {"url": "https://example.com"}, headed=False, clear_user_data=False) + sc.assert_called_once_with("open", {"url": "https://example.com"}, headed=False, clear_user_data=False, cdp_url=None) def test_open_headed(self): _, sc = invoke(["open", "--headed", "https://example.com"]) - sc.assert_called_once_with("open", {"url": "https://example.com"}, headed=True, clear_user_data=False) + sc.assert_called_once_with("open", {"url": "https://example.com"}, headed=True, clear_user_data=False, cdp_url=None) def test_open_clear_user_data(self): _, sc = invoke(["open", "--clear-user-data", "https://example.com"]) - sc.assert_called_once_with("open", {"url": "https://example.com"}, headed=False, clear_user_data=True) + sc.assert_called_once_with("open", {"url": "https://example.com"}, headed=False, clear_user_data=True, cdp_url=None) + + def test_open_cdp_ws_url_passthrough(self): + """--cdp ws://... passes through without resolution.""" + with patch("bridgic.browser.session._browser.find_cdp_url") as mock_find: + _, sc = invoke(["open", "--cdp", "ws://localhost:9222/devtools/browser/abc", "https://example.com"]) + mock_find.assert_not_called() + sc.assert_called_once_with( + "open", {"url": "https://example.com"}, + headed=False, clear_user_data=False, cdp_url="ws://localhost:9222/devtools/browser/abc", + ) + + def test_open_cdp_port_number(self): + """--cdp 9222 calls find_cdp_url(mode='port', host='localhost', port=9222).""" + with patch("bridgic.browser.session._browser.find_cdp_url", return_value="ws://localhost:9222/devtools/browser/xyz") as mock_find: + _, sc = invoke(["open", "--cdp", "9222", "https://example.com"]) + mock_find.assert_called_once_with(mode="port", host="localhost", port=9222) + sc.assert_called_once_with( + "open", {"url": "https://example.com"}, + headed=False, clear_user_data=False, cdp_url="ws://localhost:9222/devtools/browser/xyz", + ) + + def test_open_cdp_http_url(self): + """--cdp http://host:port calls find_cdp_url(mode='port', host=..., port=...).""" + with patch("bridgic.browser.session._browser.find_cdp_url", return_value="ws://1.2.3.4:9222/devtools/browser/xyz") as mock_find: + _, sc = invoke(["open", "--cdp", "http://1.2.3.4:9222", "https://example.com"]) + mock_find.assert_called_once_with(mode="port", host="1.2.3.4", port=9222) + sc.assert_called_once_with( + "open", {"url": "https://example.com"}, + headed=False, clear_user_data=False, cdp_url="ws://1.2.3.4:9222/devtools/browser/xyz", + ) + + def test_open_cdp_auto(self): + """--cdp auto calls find_cdp_url(mode='scan').""" + with patch("bridgic.browser.session._browser.find_cdp_url", return_value="ws://localhost:57234/devtools/browser/auto") as mock_find: + _, sc = invoke(["open", "--cdp", "auto", "https://example.com"]) + mock_find.assert_called_once_with(mode="scan") + sc.assert_called_once_with( + "open", {"url": "https://example.com"}, + headed=False, clear_user_data=False, cdp_url="ws://localhost:57234/devtools/browser/auto", + ) + + def test_open_cdp_wss_url_passthrough(self): + """--cdp wss://... passes through unchanged (cloud services like Browserless, Steel.dev).""" + wss_url = "wss://production.browserless.io/chromium/playwright?token=abc123" + with patch("bridgic.browser.session._browser.find_cdp_url") as mock_find: + _, sc = invoke(["open", "--cdp", wss_url, "https://example.com"]) + mock_find.assert_not_called() + sc.assert_called_once_with( + "open", {"url": "https://example.com"}, + headed=False, clear_user_data=False, cdp_url=wss_url, + ) + + def test_open_cdp_invalid_format_shows_error(self): + """--cdp with unrecognized format prints an error and does NOT call send_command.""" + result, sc = invoke(["open", "--cdp", "not-a-valid-cdp", "https://example.com"]) + sc.assert_not_called() + assert result.exit_code == 1 # _err() calls sys.exit(1) + assert "Invalid --cdp value" in result.output def test_back(self): _, sc = invoke(["back"]) @@ -2239,3 +2304,599 @@ def test_unix_transport_inject_auth_is_noop(self): result = t.inject_auth(req) assert result == req assert "_token" not in result + + +# ───────────────────────────────────────────────────────────────────────────── +# resolve_cdp_input unit tests +# ───────────────────────────────────────────────────────────────────────────── + +class TestResolveCdpInput: + """Direct unit tests for resolve_cdp_input() — all branches.""" + + def test_port_number_calls_find_cdp_url(self, monkeypatch): + monkeypatch.setattr( + "bridgic.browser.session._browser.find_cdp_url", + lambda mode, host, port: f"ws://{host}:{port}/fake", + ) + from bridgic.browser.session._browser import resolve_cdp_input + assert resolve_cdp_input("9222") == "ws://localhost:9222/fake" + + def test_ws_passthrough(self): + from bridgic.browser.session._browser import resolve_cdp_input + url = "ws://localhost:9222/devtools/browser/abc123" + assert resolve_cdp_input(url) == url + + def test_wss_passthrough(self): + from bridgic.browser.session._browser import resolve_cdp_input + url = "wss://production.browserless.io/chromium/playwright?token=xyz" + assert resolve_cdp_input(url) == url + + def test_http_url_calls_find_cdp_url(self, monkeypatch): + monkeypatch.setattr( + "bridgic.browser.session._browser.find_cdp_url", + lambda mode, host, port: f"ws://{host}:{port}/fake", + ) + from bridgic.browser.session._browser import resolve_cdp_input + assert resolve_cdp_input("http://remote.host:9222") == "ws://remote.host:9222/fake" + + def test_auto_calls_scan(self, monkeypatch): + monkeypatch.setattr( + "bridgic.browser.session._browser.find_cdp_url", + lambda mode: "ws://localhost:54321/fake", + ) + from bridgic.browser.session._browser import resolve_cdp_input + assert resolve_cdp_input("auto") == "ws://localhost:54321/fake" + + def test_scan_alias_calls_scan(self, monkeypatch): + monkeypatch.setattr( + "bridgic.browser.session._browser.find_cdp_url", + lambda mode: "ws://localhost:54321/fake", + ) + from bridgic.browser.session._browser import resolve_cdp_input + assert resolve_cdp_input("scan") == "ws://localhost:54321/fake" + + def test_invalid_raises_value_error(self): + from bridgic.browser.session._browser import resolve_cdp_input + with pytest.raises(ValueError, match="Invalid --cdp value"): + resolve_cdp_input("not-a-valid-input") + + def test_whitespace_stripped(self, monkeypatch): + monkeypatch.setattr( + "bridgic.browser.session._browser.find_cdp_url", + lambda mode, host, port: f"ws://{host}:{port}/fake", + ) + from bridgic.browser.session._browser import resolve_cdp_input + assert resolve_cdp_input(" 9222 ") == "ws://localhost:9222/fake" + + +# ───────────────────────────────────────────────────────────────────────────── +# find_cdp_url() unit tests +# ───────────────────────────────────────────────────────────────────────────── + +class TestFindCdpUrl: + """Direct unit tests for find_cdp_url() — all branches, all mocked.""" + + def test_service_mode_returns_ws_endpoint(self): + from bridgic.browser import find_cdp_url + url = "wss://my-cloud-service.io/browser?token=abc" + assert find_cdp_url(mode="service", ws_endpoint=url) == url + + def test_service_mode_no_endpoint_raises_value_error(self): + from bridgic.browser import find_cdp_url + with pytest.raises(ValueError, match="ws_endpoint is required"): + find_cdp_url(mode="service") + + def _make_loopback_opener_patch(self, mock_resp): + """Return a patch context manager for urllib.request.build_opener that + returns an opener whose .open() returns mock_resp. Used for loopback + host tests because find_cdp_url() bypasses the system proxy via + ProxyHandler({}) on loopback hosts.""" + opener = MagicMock() + opener.open = MagicMock(return_value=mock_resp) + return patch("urllib.request.build_opener", return_value=opener), opener + + def test_port_mode_returns_ws_url(self): + from bridgic.browser import find_cdp_url + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"}' + patch_ctx, _ = self._make_loopback_opener_patch(mock_resp) + with patch_ctx: + url = find_cdp_url(mode="port", port=9222) + assert url == "ws://localhost:9222/devtools/browser/abc" + + def test_port_remote_host_replaces_localhost(self): + from bridgic.browser import find_cdp_url + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"}' + with patch("urllib.request.urlopen", return_value=mock_resp) as mock_open: + url = find_cdp_url(mode="port", host="192.168.1.100", port=9222) + assert url == "ws://192.168.1.100:9222/devtools/browser/abc" + mock_open.assert_called_once_with("http://192.168.1.100:9222/json/version", timeout=5) + + def test_port_localhost_uppercase_keeps_localhost(self): + """host='LOCALHOST' must be normalized to lowercase 'localhost' so the + ws_url is not rewritten with a misleading uppercase host. Regression + guard for L2.""" + from bridgic.browser import find_cdp_url + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"webSocketDebuggerUrl": "ws://localhost:9222/devtools/browser/abc"}' + patch_ctx, _ = self._make_loopback_opener_patch(mock_resp) + with patch_ctx: + url = find_cdp_url(mode="port", host="LOCALHOST", port=9222) + # Must NOT contain uppercase LOCALHOST in the result. + assert url == "ws://localhost:9222/devtools/browser/abc" + + def test_port_chrome_not_running_raises_connection_error(self): + import urllib.error + from bridgic.browser import find_cdp_url + # Loopback path: patch build_opener so .open() raises URLError. + opener = MagicMock() + opener.open = MagicMock(side_effect=urllib.error.URLError("Connection refused")) + with patch("urllib.request.build_opener", return_value=opener): + with pytest.raises(ConnectionError, match="--remote-debugging-port=9222"): + find_cdp_url(mode="port", port=9222) + + def test_port_invalid_json_raises_value_error(self): + from bridgic.browser import find_cdp_url + mock_resp = MagicMock() + mock_resp.read.return_value = b'hey' + patch_ctx, _ = self._make_loopback_opener_patch(mock_resp) + with patch_ctx: + with pytest.raises(ValueError, match="Failed to parse /json/version response"): + find_cdp_url(mode="port", port=9222) + + def test_port_missing_key_raises_value_error(self): + from bridgic.browser import find_cdp_url + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"Browser": "Chrome/124"}' + patch_ctx, _ = self._make_loopback_opener_patch(mock_resp) + with patch_ctx: + with pytest.raises(ValueError, match="Failed to parse /json/version response"): + find_cdp_url(mode="port", port=9222) + + def test_port_urlopen_uses_timeout_5(self): + from bridgic.browser import find_cdp_url + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"webSocketDebuggerUrl": "ws://localhost:9222/fake"}' + # Loopback path uses build_opener(...).open(url, timeout=5). + patch_ctx, opener = self._make_loopback_opener_patch(mock_resp) + with patch_ctx: + find_cdp_url(mode="port", port=9222) + _, kwargs = opener.open.call_args + assert kwargs.get("timeout") == 5 + + def test_scan_mode_returns_url_from_file(self): + from bridgic.browser import find_cdp_url + fake_url = "ws://localhost:9222/devtools/browser/chrome-uuid" + with patch("bridgic.browser.session._browser._read_devtools_active_port", return_value=fake_url): + url = find_cdp_url(mode="scan") + assert url == fake_url + + def test_scan_mode_returns_first_active(self): + from bridgic.browser import find_cdp_url + chrome_url = "ws://localhost:9222/devtools/browser/chrome-uuid" + + def fake_read(base): + if "Chrome" in base and "Canary" not in base and "Beta" not in base: + return chrome_url + return None + + with patch("bridgic.browser.session._browser._read_devtools_active_port", side_effect=fake_read): + result = find_cdp_url(mode="scan") + assert result == chrome_url + + def test_scan_mode_no_profiles_raises_runtime_error(self): + from bridgic.browser import find_cdp_url + with patch("bridgic.browser.session._browser._read_devtools_active_port", return_value=None): + with pytest.raises(RuntimeError, match="--remote-debugging-port=9222"): + find_cdp_url(mode="scan") + + def test_scan_mode_unsupported_platform_raises_runtime_error(self): + from bridgic.browser import find_cdp_url + with patch("sys.platform", "freebsd"): + with pytest.raises(RuntimeError, match="not supported on platform"): + find_cdp_url(mode="scan") + + +# ───────────────────────────────────────────────────────────────────────────── +# _read_devtools_active_port() unit tests +# ───────────────────────────────────────────────────────────────────────────── + +class TestReadDevToolsActivePort: + """Unit tests for _read_devtools_active_port() using tempfile.""" + + def _fn(self): + from bridgic.browser.session._browser import _read_devtools_active_port + return _read_devtools_active_port + + def test_valid_file_returns_ws_url(self): + fn = self._fn() + with tempfile.TemporaryDirectory() as d: + open(os.path.join(d, "DevToolsActivePort"), "w").write("9222\n/devtools/browser/abc123\n") + result = fn(d) + assert result == "ws://localhost:9222/devtools/browser/abc123" + + def test_missing_file_returns_none(self): + fn = self._fn() + result = fn("/tmp/nonexistent-bridgic-profile-xyz-abc") + assert result is None + + def test_single_line_file_returns_none(self): + fn = self._fn() + with tempfile.TemporaryDirectory() as d: + with open(os.path.join(d, "DevToolsActivePort"), "w") as f: + f.write("9222\n") + result = fn(d) + assert result is None + + def test_no_read_permission_returns_none(self): + fn = self._fn() + with tempfile.TemporaryDirectory() as d: + p = os.path.join(d, "DevToolsActivePort") + with open(p, "w") as f: + f.write("9222\n/devtools/browser/abc\n") + os.chmod(p, 0o000) + try: + result = fn(d) + finally: + os.chmod(p, 0o644) + assert result is None + + +# ───────────────────────────────────────────────────────────────────────────── +# _browser_closed_hint() unit tests +# ───────────────────────────────────────────────────────────────────────────── + +class TestBrowserClosedHint: + """Unit tests for _browser_closed_hint().""" + + def test_no_cdp_returns_default_hint(self): + assert _browser_closed_hint(None) == _BROWSER_CLOSED_HINT + assert _browser_closed_hint() == _BROWSER_CLOSED_HINT + + @pytest.mark.parametrize("host,url_host", [ + ("localhost", "localhost"), + ("127.0.0.1", "127.0.0.1"), + ("::1", "[::1]"), + ]) + def test_local_host_shows_port_only(self, host, url_host): + url = f"ws://{url_host}:9222/devtools/browser/some-uuid" + msg = _browser_closed_hint(url) + assert "9222" in msg + assert "some-uuid" not in msg + assert "Local Chrome" in msg + assert "bridgic-browser close" in msg + + def test_remote_host_exposes_full_url(self): + url = "wss://my-cloud.io/browser?token=secret123" + msg = _browser_closed_hint(url) + assert url in msg + assert "Remote browser session" in msg + assert "bridgic-browser close" in msg + + +# ───────────────────────────────────────────────────────────────────────────── +# find_cdp_url() — invalid mode +# ───────────────────────────────────────────────────────────────────────────── + +class TestFindCdpUrlInvalidMode: + def test_invalid_mode_raises_value_error(self): + from bridgic.browser import find_cdp_url + with pytest.raises(ValueError, match="Unknown mode"): + find_cdp_url(mode="bogus") + + +# ───────────────────────────────────────────────────────────────────────────── +# _cdp_reconnect() unit tests +# ───────────────────────────────────────────────────────────────────────────── + +class TestCdpReconnect: + """Unit tests for _cdp_reconnect() using AsyncMock.""" + + async def test_close_and_start_succeed_returns_true(self): + browser = MagicMock() + browser.close = AsyncMock() + browser._start = AsyncMock() + result = await _cdp_reconnect(browser) + assert result is True + browser.close.assert_awaited_once() + browser._start.assert_awaited_once() + + async def test_close_raises_ignored_start_called_returns_true(self): + browser = MagicMock() + browser.close = AsyncMock(side_effect=RuntimeError("already closed")) + browser._start = AsyncMock() + result = await _cdp_reconnect(browser) + assert result is True + browser._start.assert_awaited_once() + + async def test_start_fails_returns_false(self): + browser = MagicMock() + browser.close = AsyncMock() + browser._start = AsyncMock(side_effect=ConnectionError("Chrome not found")) + result = await _cdp_reconnect(browser) + assert result is False + + async def test_close_and_start_both_fail_returns_false(self): + browser = MagicMock() + browser.close = AsyncMock(side_effect=RuntimeError("gone")) + browser._start = AsyncMock(side_effect=ConnectionError("still gone")) + result = await _cdp_reconnect(browser) + assert result is False + + +# ───────────────────────────────────────────────────────────────────────────── +# _dispatch() CDP reconnect logic +# ───────────────────────────────────────────────────────────────────────────── + +class TestDispatchCdpReconnect: + """Tests for _dispatch() CDP reconnect retry logic.""" + + def _make_cdp_browser(self, cdp_url="ws://cloud.io/browser/abc"): + b = make_browser() + b._cdp_url = cdp_url + return b + + async def test_cdp_browser_closed_reconnect_success_retry_success(self): + browser = self._make_cdp_browser() + call_count = 0 + + async def navigate(url): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise RuntimeError("browser has been closed") + return "Navigated" + + browser.navigate_to = navigate + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=AsyncMock(return_value=True)): + resp = await _dispatch(browser, "open", {"url": "x"}) + + assert resp["success"] is True + assert resp["result"] == "Navigated" + assert call_count == 2 + + async def test_cdp_browser_closed_reconnect_success_retry_fails(self): + browser = self._make_cdp_browser() + browser.navigate_to = AsyncMock( + side_effect=RuntimeError("browser has been closed") + ) + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=AsyncMock(return_value=True)): + resp = await _dispatch(browser, "open", {"url": "x"}) + + assert resp["success"] is False + assert resp["error_code"] == "BROWSER_CLOSED" + assert browser.navigate_to.await_count == 2 + + async def test_cdp_browser_closed_reconnect_fails(self): + browser = self._make_cdp_browser() + browser.navigate_to = AsyncMock( + side_effect=RuntimeError("browser has been closed") + ) + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=AsyncMock(return_value=False)): + resp = await _dispatch(browser, "open", {"url": "x"}) + + assert resp["success"] is False + assert resp["error_code"] == "BROWSER_CLOSED" + browser.navigate_to.assert_awaited_once() + + async def test_cdp_close_command_no_reconnect(self): + browser = self._make_cdp_browser() + browser.inspect_pending_close_artifacts = MagicMock(return_value={ + "session_dir": "/tmp/close-test", "trace": [], "video": [], + }) + mock_reconnect = AsyncMock(return_value=True) + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=mock_reconnect): + with patch("bridgic.browser.cli._daemon._HANDLERS", { + "close": AsyncMock(side_effect=RuntimeError("browser has been closed")) + }): + resp = await _dispatch(browser, "close", {}) + mock_reconnect.assert_not_called() + assert resp["error_code"] == "BROWSER_CLOSED" + + async def test_non_cdp_browser_closed_no_reconnect(self): + browser = make_browser() # _cdp_url = None + browser.navigate_to = AsyncMock( + side_effect=RuntimeError("browser has been closed") + ) + mock_reconnect = AsyncMock(return_value=True) + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=mock_reconnect): + resp = await _dispatch(browser, "open", {"url": "x"}) + mock_reconnect.assert_not_called() + assert resp["error_code"] == "BROWSER_CLOSED" + browser.navigate_to.assert_awaited_once() + + async def test_cdp_non_browser_closed_error_no_reconnect(self): + browser = self._make_cdp_browser() + browser.navigate_to = AsyncMock( + side_effect=OperationError(code="ELEMENT_NOT_FOUND", message="element not found") + ) + mock_reconnect = AsyncMock(return_value=True) + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=mock_reconnect): + resp = await _dispatch(browser, "open", {"url": "x"}) + mock_reconnect.assert_not_called() + assert resp["error_code"] == "ELEMENT_NOT_FOUND" + browser.navigate_to.assert_awaited_once() + + async def test_cdp_plain_exception_with_closed_message_triggers_reconnect(self): + browser = self._make_cdp_browser() + call_count = 0 + + async def navigate(url): + nonlocal call_count + call_count += 1 + if call_count == 1: + raise RuntimeError("Target page, context or browser has been closed") + return "Navigated" + + browser.navigate_to = navigate + with patch("bridgic.browser.cli._daemon._cdp_reconnect", new=AsyncMock(return_value=True)): + resp = await _dispatch(browser, "open", {"url": "x"}) + + assert resp["success"] is True + assert call_count == 2 + + +# ───────────────────────────────────────────────────────────────────────────── +# _spawn_daemon() env var passing +# ───────────────────────────────────────────────────────────────────────────── + +class TestSpawnDaemonEnv: + """Unit tests for _spawn_daemon() environment variable propagation.""" + + def _fake_popen_factory(self, captured_env: dict): + """Return a fake Popen that records the env and signals READY.""" + def fake_popen(cmd, **kwargs): + captured_env.update(kwargs.get("env", {})) + m = MagicMock() + m.stdout = MagicMock() + lines = [b"BRIDGIC_DAEMON_READY\n"] + m.stdout.__iter__ = lambda self: iter(lines) + m.stdout.close = MagicMock() + return m + return fake_popen + + def _run_spawn(self, captured_env, **kwargs): + from bridgic.browser.cli._client import _spawn_daemon + fake_popen = self._fake_popen_factory(captured_env) + with patch("subprocess.Popen", side_effect=fake_popen): + _spawn_daemon(**kwargs) + + def test_cdp_url_sets_env_var(self): + captured_env: dict = {} + self._run_spawn(captured_env, cdp_url="ws://localhost:9222/devtools/browser/abc") + assert captured_env.get("BRIDGIC_CDP") == "ws://localhost:9222/devtools/browser/abc" + + def test_no_cdp_url_env_var_absent(self): + captured_env: dict = {} + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("BRIDGIC_CDP", None) + self._run_spawn(captured_env) + assert "BRIDGIC_CDP" not in captured_env + + def test_headed_and_cdp_url_both_set(self): + captured_env: dict = {} + self._run_spawn( + captured_env, + headed=True, + cdp_url="ws://localhost:9222/devtools/browser/abc", + ) + assert captured_env.get("BRIDGIC_HEADLESS") is None or "BRIDGIC_BROWSER_JSON" in captured_env + assert captured_env.get("BRIDGIC_CDP") == "ws://localhost:9222/devtools/browser/abc" + + +# --------------------------------------------------------------------------- +# TestDaemonDownloadsPath — default downloads_path injection in run_daemon() +# --------------------------------------------------------------------------- + + +class TestDaemonDownloadsPath: + """Verify that run_daemon() auto-injects downloads_path when not configured.""" + + @pytest.mark.asyncio + async def test_daemon_injects_default_downloads_path(self): + """When no config sets downloads_path, daemon injects a default.""" + captured_kwargs: Dict[str, Any] = {} + + def fake_browser(**kw: Any) -> MagicMock: + captured_kwargs.update(kw) + b = MagicMock() + b.get_config.return_value = kw + return b + + with ( + patch("bridgic.browser.cli._daemon._load_config_sources", return_value={}), + patch("bridgic.browser.cli._daemon._resolve_default_downloads_dir", return_value=Path.home() / "Downloads"), + patch("bridgic.browser.session._browser.Browser", side_effect=fake_browser), + patch("bridgic.browser.cli._daemon.get_transport") as mock_transport, + patch("bridgic.browser.cli._daemon.write_run_info"), + patch("bridgic.browser.cli._daemon.asyncio.Event") as mock_event, + ): + mock_event.return_value.wait = AsyncMock() + mock_server = AsyncMock() + mock_transport.return_value.start_server = AsyncMock(return_value=mock_server) + mock_transport.return_value.build_run_info.return_value = {} + mock_transport.return_value.verify_auth = None + mock_event.return_value.is_set.return_value = True + + with patch("bridgic.browser.cli._daemon.logger"): + from bridgic.browser.cli._daemon import run_daemon + + with patch("sys.stdout"): + try: + await run_daemon() + except Exception: + pass + + assert "downloads_path" in captured_kwargs + assert captured_kwargs["downloads_path"] == str(Path.home() / "Downloads") + + @pytest.mark.asyncio + async def test_daemon_respects_config_downloads_path(self): + """When config already sets downloads_path, daemon does not override.""" + captured_kwargs: Dict[str, Any] = {} + + def fake_browser(**kw: Any) -> MagicMock: + captured_kwargs.update(kw) + b = MagicMock() + b.get_config.return_value = kw + return b + + with ( + patch("bridgic.browser.cli._daemon._load_config_sources", return_value={"downloads_path": "/custom/path"}), + patch("bridgic.browser.session._browser.Browser", side_effect=fake_browser), + patch("bridgic.browser.cli._daemon.get_transport") as mock_transport, + patch("bridgic.browser.cli._daemon.write_run_info"), + patch("bridgic.browser.cli._daemon.asyncio.Event") as mock_event, + ): + mock_event.return_value.wait = AsyncMock() + mock_server = AsyncMock() + mock_transport.return_value.start_server = AsyncMock(return_value=mock_server) + mock_transport.return_value.build_run_info.return_value = {} + mock_transport.return_value.verify_auth = None + mock_event.return_value.is_set.return_value = True + + with patch("bridgic.browser.cli._daemon.logger"): + from bridgic.browser.cli._daemon import run_daemon + + with patch("sys.stdout"): + try: + await run_daemon() + except Exception: + pass + + # Should NOT have downloads_path injected (config already has it) + assert captured_kwargs.get("downloads_path") is None + + def test_resolve_default_downloads_dir_prefers_user_downloads(self, tmp_path: Path): + """When ~/Downloads is writable, it is preferred.""" + fake_home = tmp_path / "home" + fake_downloads = fake_home / "Downloads" + fake_downloads.mkdir(parents=True) + + with patch("bridgic.browser.cli._daemon.Path.home", return_value=fake_home): + result = _resolve_default_downloads_dir() + + assert result == fake_downloads + + def test_resolve_default_downloads_dir_fallback(self, tmp_path: Path): + """When ~/Downloads is not writable, falls back to app-managed dir.""" + fake_home = tmp_path / "home" + fake_downloads = fake_home / "Downloads" + fake_downloads.mkdir(parents=True) + # Make ~/Downloads read-only so probe.touch() fails + fake_downloads.chmod(0o444) + + fallback_dir = tmp_path / "fallback" + + with ( + patch("bridgic.browser.cli._daemon.Path.home", return_value=fake_home), + patch("bridgic.browser.cli._daemon.BRIDGIC_DOWNLOADS_DIR", fallback_dir), + ): + result = _resolve_default_downloads_dir() + + assert result == fallback_dir + assert fallback_dir.exists() + + # Restore permissions for cleanup + fake_downloads.chmod(0o755) diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index bf173ed..810490a 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -12,6 +12,8 @@ import os from unittest.mock import MagicMock, patch +import pytest + from bridgic.browser._config import _load_config_sources, load_browser_config @@ -135,6 +137,35 @@ def test_invalid_env_var_ignored(self, tmp_path): assert cfg == {} + def test_non_dict_user_config_ignored(self, tmp_path): + """User config with non-dict JSON (e.g. array) is ignored.""" + fake_browser_home = tmp_path / ".bridgic" + fake_browser_home.mkdir() + (fake_browser_home / "bridgic-browser.json").write_text('[1, 2, 3]') + + mock_local = MagicMock() + mock_local.is_file.return_value = False + with ( + patch("bridgic.browser._config.BRIDGIC_BROWSER_HOME", fake_browser_home), + patch("bridgic.browser._config.Path", return_value=mock_local), + patch.dict(os.environ, {}, clear=False), + ): + os.environ.pop("BRIDGIC_BROWSER_JSON", None) + cfg = _load_config_sources() + + assert cfg == {} + + def test_non_dict_env_var_ignored(self, tmp_path): + """BRIDGIC_BROWSER_JSON with non-dict JSON (e.g. string) is ignored.""" + fake_browser_home = tmp_path / ".bridgic" + fake_browser_home.mkdir() + + p1, p2 = _no_config_patches(fake_browser_home) + with p1, p2, patch.dict(os.environ, {"BRIDGIC_BROWSER_JSON": '"just a string"'}, clear=False): + cfg = _load_config_sources() + + assert cfg == {} + # ── load_browser_config ─────────────────────────────────────────────── @@ -459,3 +490,115 @@ def test_explicit_params_do_not_leak_to_extra_kwargs(self, tmp_path): assert "headless" not in browser._extra_kwargs assert "channel" not in browser._extra_kwargs assert "locale" not in browser._extra_kwargs + + def test_config_cdp_url_loaded(self, tmp_path): + """Browser() picks up cdp_url from config file.""" + from bridgic.browser.session._browser import Browser + + fake_browser_home = tmp_path / ".bridgic" + fake_browser_home.mkdir() + (fake_browser_home / "bridgic-browser.json").write_text( + json.dumps({"cdp_url": "ws://localhost:9222/devtools/browser/abc"}) + ) + + mock_local = MagicMock() + mock_local.is_file.return_value = False + with ( + patch("bridgic.browser._config.BRIDGIC_BROWSER_HOME", fake_browser_home), + patch("bridgic.browser._config.Path", return_value=mock_local), + patch.dict(os.environ, {}, clear=False), + ): + os.environ.pop("BRIDGIC_BROWSER_JSON", None) + browser = Browser() + + assert browser._cdp_url == "ws://localhost:9222/devtools/browser/abc" + assert "cdp_url" not in browser._extra_kwargs + + def test_explicit_cdp_url_overrides_config(self, tmp_path): + """Browser(cdp_url=...) overrides config's cdp_url.""" + from bridgic.browser.session._browser import Browser + + fake_browser_home = tmp_path / ".bridgic" + fake_browser_home.mkdir() + (fake_browser_home / "bridgic-browser.json").write_text( + json.dumps({"cdp_url": "ws://localhost:9222/devtools/browser/old"}) + ) + + mock_local = MagicMock() + mock_local.is_file.return_value = False + with ( + patch("bridgic.browser._config.BRIDGIC_BROWSER_HOME", fake_browser_home), + patch("bridgic.browser._config.Path", return_value=mock_local), + patch.dict(os.environ, {}, clear=False), + ): + os.environ.pop("BRIDGIC_BROWSER_JSON", None) + browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/new") + + assert browser._cdp_url == "ws://localhost:9222/devtools/browser/new" + assert "cdp_url" not in browser._extra_kwargs + + # ── M2: cdp_url normalization in __init__ ───────────────────────── + + def test_config_cdp_url_port_string_normalized(self, tmp_path): + """Browser() should normalize a bare port number from config to ws:// URL. + + Regression guard for M2: previously, a config like ``{"cdp_url":"9222"}`` + was passed unchanged to Playwright's connect_over_cdp(), which crashes + deep in the driver because the value is not a WebSocket URL. + """ + from bridgic.browser.session._browser import Browser + + fake_browser_home = tmp_path / ".bridgic" + fake_browser_home.mkdir() + (fake_browser_home / "bridgic-browser.json").write_text( + json.dumps({"cdp_url": "9222"}) + ) + + mock_local = MagicMock() + mock_local.is_file.return_value = False + with ( + patch("bridgic.browser._config.BRIDGIC_BROWSER_HOME", fake_browser_home), + patch("bridgic.browser._config.Path", return_value=mock_local), + patch.dict(os.environ, {}, clear=False), + patch( + "bridgic.browser.session._browser.find_cdp_url", + return_value="ws://localhost:9222/devtools/browser/zzz", + ), + ): + os.environ.pop("BRIDGIC_BROWSER_JSON", None) + browser = Browser() + + assert browser._cdp_url == "ws://localhost:9222/devtools/browser/zzz" + + def test_config_cdp_url_invalid_raises(self, tmp_path): + """Browser() with malformed cdp_url in config should raise InvalidInputError.""" + from bridgic.browser.session._browser import Browser + from bridgic.browser.errors import InvalidInputError + + fake_browser_home = tmp_path / ".bridgic" + fake_browser_home.mkdir() + (fake_browser_home / "bridgic-browser.json").write_text( + json.dumps({"cdp_url": "this-is-not-valid"}) + ) + + mock_local = MagicMock() + mock_local.is_file.return_value = False + with ( + patch("bridgic.browser._config.BRIDGIC_BROWSER_HOME", fake_browser_home), + patch("bridgic.browser._config.Path", return_value=mock_local), + patch.dict(os.environ, {}, clear=False), + ): + os.environ.pop("BRIDGIC_BROWSER_JSON", None) + with pytest.raises(InvalidInputError, match="Failed to resolve cdp_url"): + Browser() + + def test_explicit_cdp_url_port_normalized(self, monkeypatch): + """Browser(cdp_url='9222') as an explicit argument is also normalized.""" + from bridgic.browser.session._browser import Browser + + monkeypatch.setattr( + "bridgic.browser.session._browser.find_cdp_url", + lambda mode, host, port: f"ws://{host}:{port}/devtools/browser/normalized", + ) + browser = Browser(cdp_url="9222") + assert browser._cdp_url == "ws://localhost:9222/devtools/browser/normalized" diff --git a/tests/unit/test_tools.py b/tests/unit/test_tools.py index 91e122b..e4e24b2 100644 --- a/tests/unit/test_tools.py +++ b/tests/unit/test_tools.py @@ -398,7 +398,6 @@ class TestTabManagementTools: @pytest.mark.asyncio async def test_new_tab(self, mock_browser): """Test new_tab with no URL opens a blank page.""" - mock_browser._new_page.return_value = MagicMock() result = await Browser.new_tab(mock_browser) @@ -409,7 +408,6 @@ async def test_new_tab(self, mock_browser): @pytest.mark.asyncio async def test_new_tab_with_url(self, mock_browser): """Test new_tab with URL.""" - mock_browser._new_page.return_value = MagicMock() result = await Browser.new_tab(mock_browser, "https://example.com") @@ -1514,19 +1512,136 @@ async def test_stop_tracing(self, mock_browser, temp_dir): @pytest.mark.asyncio async def test_start_video(self, mock_browser): - """Test starting video recording.""" + """Test starting video recording — multi-page: all existing + pages in the context get a per-page recorder, and context.on('page') + is subscribed so future pages auto-record too. + """ + import types + + page = mock_browser._page + page.viewport_size = {"width": 800, "height": 600} + page.is_closed = MagicMock(return_value=False) + mock_context = page.context + mock_context.pages = [page] + mock_context.on = MagicMock() + mock_browser._video_state = {} + mock_browser._video_recorders = {} + mock_browser._video_session = None + # Bind the real helper so start_video can drive _start_page_video_recorder. + mock_browser._start_page_video_recorder = types.MethodType( + Browser._start_page_video_recorder, mock_browser, + ) - result = await Browser.start_video(mock_browser) + mock_recorder = MagicMock() + mock_recorder.start = AsyncMock() + with patch("bridgic.browser.session._browser._video_recorder_mod.VideoRecorder", return_value=mock_recorder): + result = await Browser.start_video(mock_browser) - assert result == "Video recording started" + assert "Video recording started" in result + assert "1 page" in result + assert mock_browser._video_recorders[page] is mock_recorder + assert mock_browser._video_session is not None + # context.on('page', handler) must be registered for auto-recording + # of newly opened tabs. + assert mock_context.on.called + assert mock_context.on.call_args.args[0] == "page" @pytest.mark.asyncio async def test_stop_video(self, mock_browser): - """Test stopping video recording.""" + """Test stopping video recording when no session is active.""" + mock_browser._video_recorders = {} + mock_browser._video_session = None + mock_browser._video_state = {} with pytest.raises(StateError) as exc_info: await Browser.stop_video(mock_browser) assert exc_info.value.code == "NO_ACTIVE_RECORDING" + @pytest.mark.asyncio + async def test_start_video_records_all_pages(self, mock_browser): + """start_video should record every existing page, not just current.""" + import types + + page1 = mock_browser._page + page1.viewport_size = {"width": 800, "height": 600} + page1.is_closed = MagicMock(return_value=False) + + page2 = MagicMock() + page2.viewport_size = {"width": 800, "height": 600} + page2.is_closed = MagicMock(return_value=False) + page2.context = page1.context + + mock_context = page1.context + mock_context.pages = [page1, page2] + mock_context.on = MagicMock() + mock_browser._video_state = {} + mock_browser._video_recorders = {} + mock_browser._video_session = None + mock_browser._start_page_video_recorder = types.MethodType( + Browser._start_page_video_recorder, mock_browser, + ) + + created_recorders = [] + + def _factory(context, page, output_path, size): + rec = MagicMock() + rec.start = AsyncMock() + rec.output_path = output_path + created_recorders.append(rec) + return rec + + with patch( + "bridgic.browser.session._browser._video_recorder_mod.VideoRecorder", + side_effect=_factory, + ): + result = await Browser.start_video(mock_browser) + + assert "2 pages" in result + assert len(mock_browser._video_recorders) == 2 + assert page1 in mock_browser._video_recorders + assert page2 in mock_browser._video_recorders + assert len(created_recorders) == 2 + for rec in created_recorders: + rec.start.assert_awaited_once() + + @pytest.mark.asyncio + async def test_stop_video_returns_multiple_paths(self, mock_browser, tmp_path): + """stop_video should stop all page recorders and return all paths.""" + from bridgic.browser.session import _browser as browser_module + + mock_browser._context.remove_listener = MagicMock() + context_key = browser_module._get_context_key(mock_browser._context) + mock_browser._video_state = {context_key: True} + # Bind real static helpers — otherwise `self._resolve_multi_video_dests` + # on a MagicMock returns another MagicMock (truthy) and the code + # takes the wrong branch. + mock_browser._resolve_multi_video_dests = Browser._resolve_multi_video_dests + mock_browser._move_video_local = Browser._move_video_local + + page1 = MagicMock() + page2 = MagicMock() + rec1 = MagicMock() + rec1.stop = AsyncMock(return_value=str(tmp_path / "a.webm")) + rec2 = MagicMock() + rec2.stop = AsyncMock(return_value=str(tmp_path / "b.webm")) + (tmp_path / "a.webm").write_bytes(b"") + (tmp_path / "b.webm").write_bytes(b"") + + mock_browser._video_recorders = {page1: rec1, page2: rec2} + mock_browser._video_session = { + "width": 800, "height": 600, "context": mock_browser._context, + "page_listener": lambda *_: None, + } + + result = await Browser.stop_video(mock_browser) + + rec1.stop.assert_awaited_once() + rec2.stop.assert_awaited_once() + assert "Video files saved" in result + assert str(tmp_path / "a.webm") in result + assert str(tmp_path / "b.webm") in result + assert mock_browser._video_recorders == {} + assert mock_browser._video_session is None + # ==================== State Tools Tests ==================== class TestStateTools: diff --git a/tests/unit/test_video_recorder.py b/tests/unit/test_video_recorder.py new file mode 100644 index 0000000..799e23b --- /dev/null +++ b/tests/unit/test_video_recorder.py @@ -0,0 +1,248 @@ +"""Unit tests for the CDP screencast VideoRecorder.""" + +import asyncio +import os +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from bridgic.browser.session._video_recorder import ( + VideoRecorder, + _create_white_jpeg, + _find_ffmpeg, +) + + +# --------------------------------------------------------------------------- +# _find_ffmpeg +# --------------------------------------------------------------------------- + +class TestFindFfmpeg: + def test_returns_system_ffmpeg(self, tmp_path: Path) -> None: + """Falls back to system ffmpeg when no Playwright ffmpeg found.""" + with patch.dict(os.environ, {"PLAYWRIGHT_BROWSERS_PATH": str(tmp_path)}): + with patch("shutil.which", return_value="/usr/bin/ffmpeg"): + assert _find_ffmpeg() == "/usr/bin/ffmpeg" + + def test_raises_when_not_found(self, tmp_path: Path) -> None: + with patch.dict(os.environ, {"PLAYWRIGHT_BROWSERS_PATH": str(tmp_path)}): + with patch("shutil.which", return_value=None): + with pytest.raises(FileNotFoundError, match="ffmpeg not found"): + _find_ffmpeg() + + def test_finds_playwright_ffmpeg(self, tmp_path: Path) -> None: + """Finds ffmpeg in Playwright cache directory.""" + ffmpeg_dir = tmp_path / "ffmpeg-1011" + ffmpeg_dir.mkdir() + ffmpeg_bin = ffmpeg_dir / "ffmpeg-mac" + ffmpeg_bin.touch() + with patch.dict(os.environ, {"PLAYWRIGHT_BROWSERS_PATH": str(tmp_path)}): + with patch("platform.system", return_value="Darwin"): + assert _find_ffmpeg() == str(ffmpeg_bin) + + def test_picks_highest_numeric_version_not_lexicographic(self, tmp_path: Path) -> None: + """Regression: ffmpeg-1011 must beat ffmpeg-999 (numeric, not lex). + + Lexicographic sort would pick 'ffmpeg-999' because '9' > '1'. The + production code must extract the numeric part and sort numerically. + """ + for rev in ("999", "1011", "1000"): + d = tmp_path / f"ffmpeg-{rev}" + d.mkdir() + (d / "ffmpeg-mac").touch() + # Distractor: a non-version directory must be ignored. + (tmp_path / "ffmpeg-").mkdir() + with patch.dict(os.environ, {"PLAYWRIGHT_BROWSERS_PATH": str(tmp_path)}): + with patch("platform.system", return_value="Darwin"): + resolved = _find_ffmpeg() + assert resolved == str(tmp_path / "ffmpeg-1011" / "ffmpeg-mac") + + +# --------------------------------------------------------------------------- +# _create_white_jpeg +# --------------------------------------------------------------------------- + +class TestCreateWhiteJpeg: + def test_returns_bytes(self) -> None: + data = _create_white_jpeg(100, 100) + assert isinstance(data, bytes) + assert len(data) > 0 + + def test_starts_with_jpeg_soi(self) -> None: + """JPEG data must start with SOI marker 0xFFD8.""" + data = _create_white_jpeg(200, 150) + assert data[:2] == b"\xff\xd8" + + def test_fallback_without_pillow(self) -> None: + """Even without Pillow, a valid JPEG is returned.""" + with patch.dict("sys.modules", {"PIL": None, "PIL.Image": None}): + # Force ImportError path + import importlib + from bridgic.browser.session import _video_recorder as mod + # Call the function — it should use the fallback bytes + data = mod._create_white_jpeg(1, 1) + assert data[:2] == b"\xff\xd8" + + +# --------------------------------------------------------------------------- +# VideoRecorder +# --------------------------------------------------------------------------- + +class TestVideoRecorder: + def _make_recorder(self, tmp_path: Path) -> VideoRecorder: + ctx = MagicMock() + page = MagicMock() + output = str(tmp_path / "test.webm") + return VideoRecorder(ctx, page, output, (800, 600)) + + def test_init_validates_extension(self, tmp_path: Path) -> None: + with pytest.raises(ValueError, match="must have .webm extension"): + VideoRecorder(MagicMock(), MagicMock(), str(tmp_path / "bad.mp4"), (800, 600)) + + def test_init_sets_state(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + assert rec.is_stopped is False + assert rec.output_path == str(tmp_path / "test.webm") + + @pytest.mark.asyncio + async def test_stop_returns_immediately_when_already_stopped(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + rec._is_stopped = True + path = await rec.stop() + assert path == rec.output_path + + @pytest.mark.asyncio + async def test_start_kills_ffmpeg_on_cdp_failure(self, tmp_path: Path) -> None: + """If CDP session creation fails, ffmpeg process must be killed.""" + rec = self._make_recorder(tmp_path) + + mock_proc = MagicMock() + mock_proc.kill = MagicMock() + mock_proc.stdin = MagicMock() + + with patch("bridgic.browser.session._video_recorder._find_ffmpeg", return_value="/usr/bin/ffmpeg"): + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock, return_value=mock_proc): + rec._context.new_cdp_session = AsyncMock(side_effect=RuntimeError("CDP failed")) + with pytest.raises(RuntimeError, match="CDP failed"): + await rec.start() + # ffmpeg must have been killed + mock_proc.kill.assert_called_once() + assert rec._ffmpeg is None + + def test_write_frame_queues_frames(self, tmp_path: Path) -> None: + """_write_frame should queue repeated frames based on timestamp diff.""" + rec = self._make_recorder(tmp_path) + # First frame — sets _first_frame_ts + rec._write_frame(b"frame1", 1000.0) + assert rec._last_frame is not None + assert rec._last_frame[0] == b"frame1" + assert len(rec._frame_queue) == 0 # no repeat yet + + # Second frame 1 second later — should queue ~25 repeats of frame1 + rec._write_frame(b"frame2", 1001.0) + assert len(rec._frame_queue) == 25 # 25 fps * 1 second + assert all(f == b"frame1" for f in rec._frame_queue) + + def test_write_frame_empty_sentinel_pads(self, tmp_path: Path) -> None: + """Empty frame sentinel should pad with last frame data.""" + rec = self._make_recorder(tmp_path) + rec._write_frame(b"frame1", 1000.0) + rec._frame_queue.clear() + + # Empty sentinel 0.5s later + rec._write_frame(b"", 1000.5) + # Should queue ~12 repeats (floor(0.5 * 25) = 12) + assert len(rec._frame_queue) == 12 + assert all(f == b"frame1" for f in rec._frame_queue) + + def test_write_frame_ignores_when_stopped(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + rec._is_stopped = True + rec._write_frame(b"data", 1000.0) + assert rec._last_frame is None + + @pytest.mark.asyncio + async def test_flush_queue_writes_to_ffmpeg(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + mock_stdin = MagicMock() + mock_stdin.is_closing = MagicMock(return_value=False) + mock_stdin.write = MagicMock() + mock_stdin.drain = AsyncMock() + mock_proc = MagicMock() + mock_proc.stdin = mock_stdin + rec._ffmpeg = mock_proc + + rec._frame_queue = [b"a", b"b", b"c"] + await rec._flush_queue() + + assert mock_stdin.write.call_count == 3 + assert mock_stdin.drain.await_count == 3 + assert len(rec._frame_queue) == 0 + + @pytest.mark.asyncio + async def test_send_frame_handles_write_error(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + mock_stdin = MagicMock() + mock_stdin.is_closing = MagicMock(return_value=False) + mock_stdin.write = MagicMock(side_effect=BrokenPipeError("pipe closed")) + mock_stdin.drain = AsyncMock() + mock_proc = MagicMock() + mock_proc.stdin = mock_stdin + rec._ffmpeg = mock_proc + + # Should not raise + await rec._send_frame(b"data") + + @pytest.mark.asyncio + async def test_send_frame_skips_when_stdin_closing(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + mock_stdin = MagicMock() + mock_stdin.is_closing = MagicMock(return_value=True) + mock_stdin.write = MagicMock() + mock_proc = MagicMock() + mock_proc.stdin = mock_stdin + rec._ffmpeg = mock_proc + + await rec._send_frame(b"data") + mock_stdin.write.assert_not_called() + + @pytest.mark.asyncio + async def test_start_uses_devnull_for_stdout_stderr(self, tmp_path: Path) -> None: + """ffmpeg stdout/stderr must be DEVNULL to avoid pipe-buffer back-pressure. + + Regression guard for M3: PIPE without a reader fills the OS pipe buffer + (~64 KB on Linux) when ffmpeg writes errors, which then blocks ffmpeg's + next write() call, which back-pressures stdin.drain(), which deadlocks + the recorder's stop() path. + """ + rec = self._make_recorder(tmp_path) + + captured: dict = {} + + async def fake_create(*args, **kwargs): + captured.update(kwargs) + m = MagicMock() + m.stdin = MagicMock() + m.stdin.is_closing = MagicMock(return_value=False) + m.kill = MagicMock() + return m + + rec._context.new_cdp_session = AsyncMock() + rec._context.new_cdp_session.return_value.on = MagicMock() + rec._context.new_cdp_session.return_value.send = AsyncMock() + + with patch( + "bridgic.browser.session._video_recorder._find_ffmpeg", + return_value="/usr/bin/ffmpeg", + ): + with patch( + "asyncio.create_subprocess_exec", + side_effect=fake_create, + ): + await rec.start() + + assert captured.get("stdout") == asyncio.subprocess.DEVNULL + assert captured.get("stderr") == asyncio.subprocess.DEVNULL + # stdin must remain PIPE — bridgic feeds JPEG bytes into it. + assert captured.get("stdin") == asyncio.subprocess.PIPE diff --git a/uv.lock b/uv.lock index 4250d91..9669faa 100644 --- a/uv.lock +++ b/uv.lock @@ -196,7 +196,7 @@ wheels = [ [[package]] name = "bridgic-browser" -version = "0.0.3" +version = "0.0.4.dev1" source = { editable = "." } dependencies = [ { name = "bridgic-core" }, From eb8f85601044a95f81e815580a985d92b86624bd Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 17:32:41 +0800 Subject: [PATCH 02/72] refactor: remove CDP ownership guard from _start_page_video_recorder --- bridgic/browser/session/_browser.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 50c5bf7..76f16cf 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -6950,11 +6950,8 @@ async def _start_page_video_recorder(self, page: Page) -> None: already being recorded. Mirrors Playwright CLI's ``Context._startPageVideo`` (``tools/backend/context.ts``). - In CDP borrowed-context mode, skip pages bridgic does not own - (the user's existing tabs and any pop-ups they spawn). Recording - a user's banking / email tab without consent would be a serious - privacy violation, and contradicts the tab-ownership invariant - documented in ``docs/CDP_MODE.md``. + In CDP borrowed-context mode ALL pages in the context are recorded, + including the user's pre-existing tabs. """ if self._video_session is None: return @@ -6962,9 +6959,6 @@ async def _start_page_video_recorder(self, page: Page) -> None: return if page.is_closed(): return - # CDP borrowed context: only record bridgic-owned tabs. - if self._cdp_url and not self._cdp_context_owned and page not in self._cdp_owned_pages: - return output_path = self._allocate_video_temp_path() w = int(self._video_session["width"]) From 855676ee334e876a1de54e270d9db4375614aca6 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 17:37:08 +0800 Subject: [PATCH 03/72] feat: record all CDP pages in start_video, not just bridgic-owned tabs --- bridgic/browser/session/_browser.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 76f16cf..c455ccb 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -7095,17 +7095,9 @@ async def start_video( # for (const page of browserContext.pages()) # await this._startPageVideo(page); # - # CDP borrowed context: restrict to bridgic-owned pages so we - # never record the user's existing tabs. - # _start_page_video_recorder double-checks this, but - # filtering up front makes intent obvious and avoids spurious - # "page start failed" log lines. - if self._cdp_url and not self._cdp_context_owned: - existing_pages = [ - p for p in self._cdp_owned_pages if not p.is_closed() - ] - else: - existing_pages = [p for p in context.pages if not p.is_closed()] + # CDP borrowed context: all pages in the context are recorded, + # including the user's pre-existing tabs. + existing_pages = [p for p in context.pages if not p.is_closed()] for p in existing_pages: try: await self._start_page_video_recorder(p) From 27e3bd4266560467e2b4bc1f2864539fd374fb17 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 17:41:49 +0800 Subject: [PATCH 04/72] =?UTF-8?q?docs:=20update=20CDP=5FMODE.md=20?= =?UTF-8?q?=E2=80=94=20start=5Fvideo=20now=20records=20all=20pages?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/CDP_MODE.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/CDP_MODE.md b/docs/CDP_MODE.md index 18527bd..ec1b6d6 100644 --- a/docs/CDP_MODE.md +++ b/docs/CDP_MODE.md @@ -72,12 +72,12 @@ When connecting via CDP, bridgic borrows the browser's existing default context If the remote Chrome was not started with stealth flags, bridgic's JS patches can cover some fingerprints (navigator, webdriver, plugins) but cannot modify signals that require launch arguments (e.g., Blink feature disabling). -### Video recording is restricted to bridgic-owned tabs +### Video recording covers all tabs in the context -bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), **not** Playwright's `record_video` context option — so video recording works on borrowed contexts. There are two CDP-mode constraints worth knowing: +bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), **not** Playwright's `record_video` context option — so video recording works on borrowed contexts. -- **Only bridgic's own tabs are recorded.** `start_video()` skips every page in the borrowed context that bridgic did not create itself, and the future-page listener applies the same filter. The user's banking, email, or chat tabs are never captured. Pop-ups (`target=_blank`) spawned by pages bridgic was driving are also untracked, and therefore not recorded either. -- **Recording stops cleanly without touching user tabs.** `stop_video()` only finalizes the screencast sessions for bridgic-owned pages, so no user page is closed or refreshed. +- **All pages are recorded.** `start_video()` starts a screencast session for every open page in the borrowed context, including the user's pre-existing tabs and any pages opened after recording starts. Each page is saved to its own `.webm` file. +- **Recording stops cleanly without touching user tabs.** `stop_video()` finalizes every screencast session and saves the files; no page is closed or navigated. **Tracing is not affected** — `tracing.stop()` works at any time without closing pages or contexts. @@ -93,7 +93,7 @@ bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), | `context.close()` | Yes | **Skipped** | | `browser.close()` | Kills process | **Disconnects only** | | Save tracing artifacts | Yes | Yes | -| Save video artifacts | Yes | Yes (bridgic-owned tabs only) | +| Save video artifacts | Yes | Yes (all tabs in context) | After `close()`, the remote Chrome continues running with all of the **user's** tabs intact; only the tabs bridgic explicitly created are gone. From f89f5129674841b4a76334eae51160b25b85284c Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 18:44:30 +0800 Subject: [PATCH 05/72] refactor: remove _cdp_owned_pages, CDP close() now pure disconnect Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 1 + bridgic/browser/session/_browser.py | 144 +++++++--------------------- 2 files changed, 35 insertions(+), 110 deletions(-) diff --git a/.gitignore b/.gitignore index 59d39dc..c1e6b36 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ docs/site/ docs/debug.log docs/docs/reference docs/mkdocs.yml +docs/superpowers/ # Cursor/Editor .cursor/ diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index c455ccb..e3515a0 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -8,7 +8,7 @@ import tempfile from urllib.parse import urlparse from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Set, Union, NoReturn +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union, NoReturn if TYPE_CHECKING: try: @@ -848,11 +848,6 @@ def __init__( # Whether bridgic created the CDP context (vs borrowing an existing one). # When True, close() will close the context; when False it only disconnects. self._cdp_context_owned = False - # Pages bridgic explicitly created inside the (possibly borrowed) CDP - # context. close() uses this to clean up bridgic's own tabs without - # touching the user's existing tabs. Unused in non-CDP modes (kept - # empty as a defensive default). - self._cdp_owned_pages: Set[Any] = set() # Browser launch parameters self._channel = channel @@ -1361,7 +1356,6 @@ async def _start(self) -> None: # this is a no-op cost. existing_count = len(self._context.pages) self._page = await self._context.new_page() - self._cdp_owned_pages.add(self._page) logger.info( "[CDP] connected; created new bridgic tab " "(borrowed_context=%s, preserved_existing_tabs=%d)", @@ -1688,8 +1682,6 @@ async def close(self) -> str: # just because one step was interrupted. _pending_cancel: Optional[BaseException] = None _is_cdp = self._cdp_url is not None - # True when we are a guest on someone else's browser — must not - # close pages, navigate, or destroy the borrowed context. _cdp_borrowed = _is_cdp and not self._cdp_context_owned # Auto-stop active tracing before context/page teardown so trace data is saved. @@ -1804,69 +1796,32 @@ async def close(self) -> str: except Exception as e: errors.append(f"download_manager.detach: {e}") - # Close every page in parallel (replaces the old serial - # about:blank → close walk). - # - # Why we no longer navigate to about:blank first: - # The previous code navigated each page to about:blank to stop - # service workers, then closed it. Playwright CLI does not do - # this — it just calls close() directly. ``run_before_unload= - # False`` already aborts in-flight activity, and parallel close - # is much faster than serial about:blank + close. - # - # Why asyncio.gather: - # Tab closes are independent; serializing them would compound - # the per-page timeout. Reference: Playwright's - # browserContext.ts ``close()`` also closes pages in parallel. - # - # CDP borrowed context: only close pages bridgic created itself - # (``_cdp_owned_pages``); never touch the user's existing tabs. + # Close every page in parallel. + # CDP mode: skip page cleanup entirely — just disconnect. + # The remote browser manages its own tab lifecycle. + # Launch / persistent: close all pages explicitly before context close. self._page = None - if self._context: - if _cdp_borrowed: - # Borrowed CDP context: only close pages bridgic created. - # Skip pages already closed by the user (via Chrome UI). - owned = [ - p for p in self._cdp_owned_pages - if not p.is_closed() - ] - if owned: - page_results = await asyncio.gather( - *(asyncio.wait_for( - p.close(run_before_unload=False), - timeout=self._PAGE_CLOSE_TIMEOUT, - ) for p in owned), - return_exceptions=True, - ) - for r in page_results: - if isinstance(r, BaseException): - if not isinstance(r, Exception) and _pending_cancel is None: - _pending_cancel = r - elif isinstance(r, Exception): - errors.append(f"cdp_owned_page.close: {r}") - else: - # Launch / persistent / owned-CDP-context: close all pages. - all_pages = list(self._context.pages) - if all_pages: - page_results = await asyncio.gather( - *(asyncio.wait_for( - p.close(run_before_unload=False), - timeout=self._PAGE_CLOSE_TIMEOUT, - ) for p in all_pages), - return_exceptions=True, - ) - for r in page_results: - if isinstance(r, BaseException): - if not isinstance(r, Exception) and _pending_cancel is None: - _pending_cancel = r - elif isinstance(r, Exception): - errors.append(f"page.close: {r}") - - # Close context - # NOTE: In persistent context mode, closing context will auto close browser - # CDP mode: only close the context if bridgic created it; borrowed contexts - # belong to the remote browser and must not be destroyed. - if self._context and not _cdp_borrowed: + if self._context and not _is_cdp: + all_pages = list(self._context.pages) + if all_pages: + page_results = await asyncio.gather( + *(asyncio.wait_for( + p.close(run_before_unload=False), + timeout=self._PAGE_CLOSE_TIMEOUT, + ) for p in all_pages), + return_exceptions=True, + ) + for r in page_results: + if isinstance(r, BaseException): + if not isinstance(r, Exception) and _pending_cancel is None: + _pending_cancel = r + elif isinstance(r, Exception): + errors.append(f"page.close: {r}") + + # Close context. + # NOTE: In persistent context mode, closing context will auto close browser. + # CDP mode: skip context.close() — just release the reference and disconnect. + if self._context and not _is_cdp: _context = self._context self._context = None try: @@ -1892,7 +1847,7 @@ async def close(self) -> str: if _pending_cancel is None: _pending_cancel = e elif self._context: - # CDP borrowed context: release reference without closing + # CDP mode: release reference without closing (disconnect only) self._context = None # Close browser. @@ -1961,7 +1916,6 @@ async def close(self) -> str: self._dialog_handlers.clear() self._tracing_state.clear() self._video_state.clear() - self._cdp_owned_pages.clear() trace_paths = shutdown_artifacts.get("trace", []) video_paths = shutdown_artifacts.get("video", []) @@ -2069,8 +2023,6 @@ async def navigate_to( # All tabs were closed (e.g. via close_tab); _context is still alive. logger.info("No page is open, creating a new page in existing context") self._page = await self._context.new_page() - if self._cdp_url: - self._cdp_owned_pages.add(self._page) kwargs: Dict[str, Any] = {"wait_until": wait_until} if timeout is not None: @@ -2104,8 +2056,6 @@ async def _new_page( code="NO_BROWSER_CONTEXT", ) self._page = await self._context.new_page() - if self._cdp_url: - self._cdp_owned_pages.add(self._page) if url: await self.navigate_to(url, wait_until=wait_until, timeout=timeout) await self._page.bring_to_front() @@ -2136,26 +2086,15 @@ async def get_all_page_descs(self) -> List[PageDesc]: return page_descs def get_pages(self) -> List[Page]: - """Return the pages bridgic considers part of its session. - - Launch / persistent / CDP-with-owned-context modes: every page in - the context belongs to bridgic, so we return them all. - - CDP borrowed-context mode: bridgic is a guest on the user's real - Chrome session. We must only expose tabs bridgic explicitly - created (tracked in ``_cdp_owned_pages``); the user's tabs and - any pop-ups they spawn are off-limits — they should not appear - in ``get_tabs``, be selectable via ``switch_tab``, become the - fallback "current page" after ``close_tab``, or be visible to - any of the page-iterating tools. This invariant is documented - in ``docs/CDP_MODE.md``. + """Return all pages in the current browser context. + + In CDP mode bridgic operates as a guest on the remote browser, so all + tabs — including pre-existing user tabs and pop-ups spawned by pages + bridgic was driving — are part of the session and are reachable via + ``get_tabs`` / ``switch_tab``. """ if not self._context: return [] - if self._cdp_url and not self._cdp_context_owned: - # Preserve the underlying tab order so indices stay stable. - owned = self._cdp_owned_pages - return [p for p in self._context.pages if p in owned] return self._context.pages async def switch_to_page(self, page_id: str) -> tuple[bool, str]: @@ -2232,24 +2171,9 @@ async def _close_page(self, page: Page | str) -> tuple[bool, str]: except Exception as e: logger.debug("[_close_page] video recorder stop error: %s", e) - try: - await page.close() - finally: - # Drop our ownership reference now that the close attempt is - # done. Without this discard, every CDP-mode new-tab + - # close-tab cycle would leak a Page object (frames, - # listeners, cached resources) for the lifetime of the - # daemon. We discard inside `finally` so a raised - # page.close() can't leave a stale reference behind. - # discard() is a no-op in launch / persistent / - # owned-CDP-context modes where _cdp_owned_pages stays empty. - self._cdp_owned_pages.discard(page) + await page.close() # If the closed page is the current page, switch to another. - # Use the bridgic-visible page list (get_pages) so that in CDP - # borrowed mode we never silently land on a user tab — operating - # on the user's banking / email page after a close_tab would be - # a serious privacy violation. if self._page == page: pages = self.get_pages() self._page = pages[0] if pages else None From 5d00349cffc115ba2be94f37e68aef565646c272 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 18:53:00 +0800 Subject: [PATCH 06/72] refactor: remove dead _cdp_borrowed variable and update stale comments --- bridgic/browser/session/_browser.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index e3515a0..de33b73 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -1349,9 +1349,9 @@ async def _start(self) -> None: if init_script: await self._context.add_init_script(init_script) - # Always create a new bridgic-owned tab. We never reuse a - # borrowed user tab — the very next navigate_to() would - # otherwise overwrite whatever the user was looking at. + # Always create a new tab for bridgic to drive. We never + # reuse an existing user tab — the very next navigate_to() + # would otherwise overwrite whatever the user was looking at. # In owned-context mode the new context is empty anyway, so # this is a no-op cost. existing_count = len(self._context.pages) @@ -1650,9 +1650,7 @@ async def close(self) -> str: paths included in the result. **CDP mode**: only disconnects the Playwright session from the remote - browser — pages, tabs, and borrowed contexts are left intact. A - context created by bridgic (when ``connect_over_cdp`` returned no - existing contexts) is closed normally. + browser — pages, tabs, and contexts are left intact. Safe to call even when the browser was never started — returns ``"Browser closed."`` immediately without raising. @@ -1682,7 +1680,6 @@ async def close(self) -> str: # just because one step was interrupted. _pending_cancel: Optional[BaseException] = None _is_cdp = self._cdp_url is not None - _cdp_borrowed = _is_cdp and not self._cdp_context_owned # Auto-stop active tracing before context/page teardown so trace data is saved. if self._context: @@ -6874,8 +6871,7 @@ async def _start_page_video_recorder(self, page: Page) -> None: already being recorded. Mirrors Playwright CLI's ``Context._startPageVideo`` (``tools/backend/context.ts``). - In CDP borrowed-context mode ALL pages in the context are recorded, - including the user's pre-existing tabs. + In CDP mode all pages in the context are recorded. """ if self._video_session is None: return From 0816cc1279ffe5a1eb6668e527eb8f5d268b02eb Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 18:56:23 +0800 Subject: [PATCH 07/72] test: remove _cdp_owned_pages assertions and delete TestBrowserCdpOwnedPages --- tests/unit/test_browser.py | 336 +++---------------------------------- 1 file changed, 19 insertions(+), 317 deletions(-) diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index d5a1800..585ed10 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -1613,12 +1613,11 @@ async def test_cdp_always_creates_new_page_in_borrowed_context(self): mock_ctx.new_page.assert_awaited_once() assert browser._page is mock_pg assert browser._page is not page2 # CRITICAL: never hijack user's tab - assert browser._page in browser._cdp_owned_pages @pytest.mark.asyncio async def test_cdp_new_page_called_unconditionally(self): """Even when the borrowed context has no pages, _start() still calls - new_page() and tracks the result in _cdp_owned_pages.""" + new_page() to create a tab for bridgic to drive.""" mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks(pages=[]) browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc", stealth=False) with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: @@ -1626,7 +1625,6 @@ async def test_cdp_new_page_called_unconditionally(self): await browser._start() mock_ctx.new_page.assert_awaited_once() assert browser._page is mock_pg - assert browser._page in browser._cdp_owned_pages @pytest.mark.asyncio async def test_download_manager_attached(self, tmp_path): @@ -1713,10 +1711,8 @@ async def _start_cdp_browser(self, mock_pw, *, cdp_url="ws://localhost:9222/devt @pytest.mark.asyncio async def test_cdp_close_does_not_close_borrowed_pages(self): - """close() in CDP borrowed context must NOT close any user tabs. - It MUST close the bridgic-owned page (the one new_page() returned).""" - # Set up two distinct pages: a borrowed user tab and the bridgic-owned - # page that mock_ctx.new_page() returns. + """close() in CDP borrowed context must NOT close any pages — bridgic + just disconnects and leaves the remote browser intact.""" borrowed_pg = MagicMock() borrowed_pg.close = AsyncMock() borrowed_pg.goto = AsyncMock() @@ -1728,16 +1724,13 @@ async def test_cdp_close_does_not_close_borrowed_pages(self): bridgic_pg.is_closed = MagicMock(return_value=False) browser = await self._start_cdp_browser(mock_pw) - # After _start: bridgic should have created its own page; borrowed page untouched. assert browser._page is bridgic_pg - assert bridgic_pg in browser._cdp_owned_pages await browser.close() - # User's borrowed tab MUST NOT be closed. + # No page is closed — bridgic only disconnects. borrowed_pg.close.assert_not_called() - # bridgic's owned page MUST be closed. - bridgic_pg.close.assert_awaited_once() + bridgic_pg.close.assert_not_called() @pytest.mark.asyncio async def test_cdp_close_does_not_close_borrowed_context(self): @@ -1751,8 +1744,9 @@ async def test_cdp_close_does_not_close_borrowed_context(self): mock_ctx.close.assert_not_called() @pytest.mark.asyncio - async def test_cdp_close_closes_owned_context(self): - """close() in CDP mode SHOULD close a context that bridgic created.""" + async def test_cdp_close_does_not_close_owned_context(self): + """close() in CDP mode must NOT call context.close() even when bridgic + created the context — the remote browser manages its own lifecycle.""" mock_pw, mock_cdp_browser, mock_ctx, _ = self._make_cdp_mocks(contexts_count=0) mock_cdp_browser.contexts = [] browser = await self._start_cdp_browser(mock_pw) @@ -1760,7 +1754,7 @@ async def test_cdp_close_closes_owned_context(self): assert browser._cdp_context_owned is True await browser.close() - mock_ctx.close.assert_awaited_once() + mock_ctx.close.assert_not_called() @pytest.mark.asyncio async def test_cdp_close_does_not_navigate_about_blank(self): @@ -1807,8 +1801,7 @@ async def test_cdp_close_clears_internal_references(self): @pytest.mark.asyncio async def test_cdp_close_multiple_borrowed_pages_not_closed(self): - """close() in CDP borrowed mode must leave all user tabs alone but - still close the bridgic-owned page.""" + """close() in CDP borrowed mode must not close or navigate any page.""" page1 = MagicMock() page1.close = AsyncMock() page1.goto = AsyncMock() @@ -1827,19 +1820,18 @@ async def test_cdp_close_multiple_borrowed_pages_not_closed(self): await browser.close() - # User tabs MUST NOT be closed or navigated. + # No page is closed or navigated — bridgic only disconnects. page1.close.assert_not_called() page2.close.assert_not_called() page1.goto.assert_not_called() page2.goto.assert_not_called() - # bridgic's owned page MUST be closed. - bridgic_pg.close.assert_awaited_once() + bridgic_pg.close.assert_not_called() - # --- Owned CDP context: pages and context ARE cleaned up --- + # --- Owned CDP context: still just disconnect, no page/context cleanup --- @pytest.mark.asyncio - async def test_cdp_owned_context_closes_page(self): - """Owned CDP context: page.close() IS called (bridgic created it).""" + async def test_cdp_owned_context_does_not_close_pages(self): + """Owned CDP context: page.close() is NOT called — bridgic only disconnects.""" mock_pw, mock_cdp_browser, mock_ctx, mock_pg = self._make_cdp_mocks(contexts_count=0) mock_cdp_browser.contexts = [] browser = await self._start_cdp_browser(mock_pw) @@ -1847,308 +1839,18 @@ async def test_cdp_owned_context_closes_page(self): assert browser._cdp_context_owned is True await browser.close() - # page.close() is called (either directly or via extra-pages loop) - assert mock_pg.close.await_count >= 1 + mock_pg.close.assert_not_called() @pytest.mark.asyncio - async def test_cdp_owned_context_closes_pages(self): - """Owned CDP context: pages are closed in parallel before context close.""" + async def test_cdp_owned_context_does_not_close_context(self): + """Owned CDP context: context.close() is NOT called — bridgic only disconnects.""" mock_pw, mock_cdp_browser, mock_ctx, mock_pg = self._make_cdp_mocks(contexts_count=0) mock_cdp_browser.contexts = [] browser = await self._start_cdp_browser(mock_pw) await browser.close() - # Pages are closed via parallel asyncio.gather with run_before_unload=False - mock_pg.close.assert_awaited() - - -# ───────────────────────────────────────────────────────────────────────────── -# Browser._cdp_owned_pages — bridgic-owned page tracking in CDP mode -# ───────────────────────────────────────────────────────────────────────────── - -class TestBrowserCdpOwnedPages: - """Tests for the _cdp_owned_pages tracking set used to clean up only - bridgic-created tabs in CDP borrowed-context mode.""" - - def _make_cdp_mocks(self, pages=None, contexts_count=1, new_page_factory=None): - """Return (mock_pw, mock_cdp_browser, mock_ctx, mock_page) tuple.""" - mock_pg = MagicMock() - mock_pg.bring_to_front = AsyncMock() - mock_pg.close = AsyncMock() - mock_pg.goto = AsyncMock() - mock_pg.video = None - mock_pg.is_closed = MagicMock(return_value=False) - - mock_ctx = MagicMock() - mock_ctx.add_init_script = AsyncMock() - if new_page_factory is not None: - mock_ctx.new_page = AsyncMock(side_effect=new_page_factory) - else: - mock_ctx.new_page = AsyncMock(return_value=mock_pg) - mock_ctx.pages = pages if pages is not None else [mock_pg] - mock_ctx.close = AsyncMock() - mock_ctx.tracing = MagicMock() - mock_ctx.tracing.stop = AsyncMock() - - mock_cdp_browser = MagicMock() - mock_cdp_browser.contexts = [mock_ctx] * contexts_count - mock_cdp_browser.new_context = AsyncMock(return_value=mock_ctx) - mock_cdp_browser.close = AsyncMock() - - mock_pw = MagicMock() - mock_pw.chromium.connect_over_cdp = AsyncMock(return_value=mock_cdp_browser) - mock_pw.stop = AsyncMock() - - return mock_pw, mock_cdp_browser, mock_ctx, mock_pg - - async def _start_cdp_browser(self, mock_pw, **kwargs): - browser = Browser( - cdp_url="ws://localhost:9222/devtools/browser/abc", - stealth=False, - **kwargs, - ) - with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: - mock_ap.return_value.start = AsyncMock(return_value=mock_pw) - await browser._start() - return browser - - @staticmethod - def _make_page(name="page"): - p = MagicMock(name=name) - p.bring_to_front = AsyncMock() - p.close = AsyncMock() - p.goto = AsyncMock() - p.video = None - p.is_closed = MagicMock(return_value=False) - return p - - @pytest.mark.asyncio - async def test_cdp_owned_page_added_on_start(self): - """After _start() in CDP mode, _cdp_owned_pages contains exactly the - bridgic-owned page.""" - mock_pw, _, _, mock_pg = self._make_cdp_mocks(pages=[self._make_page("user")]) - browser = await self._start_cdp_browser(mock_pw) - - assert len(browser._cdp_owned_pages) == 1 - assert browser._page in browser._cdp_owned_pages - assert browser._page is mock_pg - - @pytest.mark.asyncio - async def test_cdp_borrowed_close_keeps_borrowed_pages_closes_owned(self): - """Two borrowed user tabs survive close(); the bridgic-owned page is closed.""" - u1 = self._make_page("u1") - u2 = self._make_page("u2") - mock_pw, _, _, bridgic_pg = self._make_cdp_mocks(pages=[u1, u2]) - browser = await self._start_cdp_browser(mock_pw) - - await browser.close() - - u1.close.assert_not_called() - u2.close.assert_not_called() - bridgic_pg.close.assert_awaited_once() - - @pytest.mark.asyncio - async def test_cdp_owned_page_already_closed_handled(self): - """If the bridgic-owned page was already closed (user closed via Chrome - UI), close() must NOT call page.close() on it and must not raise.""" - mock_pw, _, _, bridgic_pg = self._make_cdp_mocks(pages=[self._make_page("u")]) - browser = await self._start_cdp_browser(mock_pw) - - # Simulate user closing the bridgic tab manually before close(). - bridgic_pg.is_closed = MagicMock(return_value=True) - - await browser.close() - - bridgic_pg.close.assert_not_called() - assert browser._cdp_owned_pages == set() - - @pytest.mark.asyncio - async def test_cdp_owned_page_close_failure_recorded(self): - """If a bridgic-owned page raises during close(), the error is recorded - in _last_shutdown_errors and close() still completes.""" - mock_pw, _, _, bridgic_pg = self._make_cdp_mocks(pages=[self._make_page("u")]) - bridgic_pg.close = AsyncMock(side_effect=RuntimeError("boom")) - browser = await self._start_cdp_browser(mock_pw) - - await browser.close() - - # close() must complete and record the failure - assert any("boom" in e for e in browser._last_shutdown_errors), ( - f"expected error to mention 'boom', got: {browser._last_shutdown_errors}" - ) - # downstream cleanup still ran - assert browser._page is None - assert browser._context is None - - @pytest.mark.asyncio - async def test_cdp_new_page_helper_tracks_owned(self): - """In CDP mode, calling _new_page() also adds the new page to - _cdp_owned_pages so the CLI's `new-tab` command stays trackable.""" - first = MagicMock(name="first") - first.bring_to_front = AsyncMock() - first.close = AsyncMock() - first.goto = AsyncMock() - first.video = None - first.is_closed = MagicMock(return_value=False) - - second = MagicMock(name="second") - second.bring_to_front = AsyncMock() - second.close = AsyncMock() - second.goto = AsyncMock() - second.video = None - second.is_closed = MagicMock(return_value=False) - - call_count = {"n": 0} - - async def _factory(): - call_count["n"] += 1 - return first if call_count["n"] == 1 else second - - mock_pw, _, _, _ = self._make_cdp_mocks( - pages=[self._make_page("u")], new_page_factory=_factory - ) - browser = await self._start_cdp_browser(mock_pw) - - # _start() consumed first via new_page() - assert browser._page is first - assert first in browser._cdp_owned_pages - assert len(browser._cdp_owned_pages) == 1 - - # second invocation via _new_page() should track the new page too - result = await browser._new_page() - assert result is second - assert second in browser._cdp_owned_pages - assert len(browser._cdp_owned_pages) == 2 - - @pytest.mark.asyncio - async def test_navigate_to_recovery_tracks_owned_in_cdp(self): - """In CDP mode, navigate_to() recovery (when self._page is None) must - track the recovery-created page in _cdp_owned_pages.""" - first = self._make_page("first") - recovery = self._make_page("recovery") - - call_count = {"n": 0} - - async def _factory(): - call_count["n"] += 1 - return first if call_count["n"] == 1 else recovery - - mock_pw, _, _, _ = self._make_cdp_mocks( - pages=[self._make_page("u")], new_page_factory=_factory - ) - browser = await self._start_cdp_browser(mock_pw) - - assert browser._page is first - assert first in browser._cdp_owned_pages - - # Simulate "all tabs closed" — navigate_to() will create a new page. - browser._page = None - await browser.navigate_to("about:blank") - - assert browser._page is recovery - assert recovery in browser._cdp_owned_pages - assert len(browser._cdp_owned_pages) == 2 - - @pytest.mark.asyncio - async def test_owned_pages_cleared_after_close(self): - """_cdp_owned_pages must be reset to an empty set after close().""" - mock_pw, _, _, _ = self._make_cdp_mocks(pages=[self._make_page("u")]) - browser = await self._start_cdp_browser(mock_pw) - - assert len(browser._cdp_owned_pages) == 1 - await browser.close() - - assert browser._cdp_owned_pages == set() - - @pytest.mark.asyncio - async def test_close_page_discards_from_cdp_owned_pages(self): - """_close_page() must remove the closed page from _cdp_owned_pages. - - Regression guard for M1: long-running CDP daemon would otherwise leak - Page references for every new-tab + close-tab cycle, holding onto - frames, listeners, and cached resources. - """ - first = self._make_page("first") - second = self._make_page("second") - # _close_page() awaits title() on the new active page after switching. - second.title = AsyncMock(return_value="second-title") - second.url = "https://second.example/" - - call_count = {"n": 0} - - async def _factory(): - call_count["n"] += 1 - return first if call_count["n"] == 1 else second - - mock_pw, _, mock_ctx, _ = self._make_cdp_mocks( - pages=[self._make_page("u")], new_page_factory=_factory - ) - browser = await self._start_cdp_browser(mock_pw) - - # _start() consumed first via new_page() and added it to the owned set. - assert first in browser._cdp_owned_pages - assert browser._page is first - - # Open a second bridgic-owned tab; ownership grows to 2. - second_returned = await browser._new_page() - assert second_returned is second - assert second in browser._cdp_owned_pages - assert len(browser._cdp_owned_pages) == 2 - - # After _close_page(first), the post-close switch reads - # self._context.pages[0] — make that be `second` so title() works. - mock_ctx.pages = [second] - - await browser._close_page(first) - - # first must vanish from the owned set; second must remain. - assert first not in browser._cdp_owned_pages - assert second in browser._cdp_owned_pages - assert len(browser._cdp_owned_pages) == 1 - - @pytest.mark.asyncio - async def test_cdp_owned_pages_unused_in_launch_mode(self): - """Launch mode (no cdp_url) must NOT touch _cdp_owned_pages — the - tracking logic is CDP-specific and must not leak elsewhere.""" - mock_pg = MagicMock() - mock_pg.bring_to_front = AsyncMock() - mock_pg.close = AsyncMock() - mock_pg.video = None - mock_pg.is_closed = MagicMock(return_value=False) - - new_pg = MagicMock() - new_pg.bring_to_front = AsyncMock() - new_pg.close = AsyncMock() - new_pg.video = None - new_pg.is_closed = MagicMock(return_value=False) - - mock_ctx = MagicMock() - mock_ctx.add_init_script = AsyncMock() - mock_ctx.new_page = AsyncMock(return_value=new_pg) - mock_ctx.pages = [mock_pg] - mock_ctx.close = AsyncMock() - mock_ctx.tracing = MagicMock() - mock_ctx.tracing.stop = AsyncMock() - - mock_browser = MagicMock() - mock_browser.new_context = AsyncMock(return_value=mock_ctx) - mock_browser.close = AsyncMock() - - mock_pw = MagicMock() - mock_pw.chromium.launch_persistent_context = AsyncMock(return_value=mock_ctx) - mock_pw.chromium.launch = AsyncMock(return_value=mock_browser) - mock_pw.stop = AsyncMock() - - browser = Browser(stealth=False, clear_user_data=True) - with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: - mock_ap.return_value.start = AsyncMock(return_value=mock_pw) - await browser._start() - # Launch mode: no tracking - assert browser._cdp_owned_pages == set() - await browser._new_page() - # _new_page should NOT add to the set in launch mode - assert browser._cdp_owned_pages == set() + mock_ctx.close.assert_not_called() # ───────────────────────────────────────────────────────────────────────────── From 56d77468f1096ed6a734d7cdc54303c00300aa5f Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 18:59:05 +0800 Subject: [PATCH 08/72] test: remove _cdp_owned_pages from helpers and update CDP borrowed-mode assertions --- tests/unit/test_browser_methods.py | 45 +++++++++--------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/tests/unit/test_browser_methods.py b/tests/unit/test_browser_methods.py index a4c439b..379d787 100644 --- a/tests/unit/test_browser_methods.py +++ b/tests/unit/test_browser_methods.py @@ -89,7 +89,6 @@ def _make_browser_with_mock_page() -> tuple: # this file simulate launch-mode (non-CDP), so both default to "not CDP". browser._cdp_url = None browser._cdp_context_owned = False - browser._cdp_owned_pages = set() browser._context = MagicMock() browser._page = MagicMock() # get_current_page() returns self._page @@ -246,19 +245,16 @@ async def test_start_video_already_active_does_not_destroy_existing_session(): # --------------------------------------------------------------------------- -# CDP borrowed-context isolation: bridgic must never expose, record, or -# auto-switch to user-owned tabs in CDP borrowed mode. These tests cover -# the regression set R2-A / R2-B / R2-C found in the second-round CR. +# CDP borrowed-context behaviour: get_pages returns all tabs, start_video +# records all tabs, _close_page switches to the next available tab. # --------------------------------------------------------------------------- def _make_borrowed_cdp_browser_with_pages(owned_page, user_page): """Build a Browser configured as if it had connected to a user's Chrome - via CDP, with one bridgic-owned tab and one user-owned tab in the - same context.""" + via CDP, with two tabs in the same context.""" browser = _make_browser_with_mock_page() browser._cdp_url = "ws://localhost:9222/devtools/browser/abc" browser._cdp_context_owned = False # borrowed - browser._cdp_owned_pages = {owned_page} fake_context = MagicMock() # Order matters — get_pages preserves the underlying tab order fake_context.pages = [user_page, owned_page] @@ -267,23 +263,10 @@ def _make_borrowed_cdp_browser_with_pages(owned_page, user_page): return browser -def test_get_pages_filters_user_tabs_in_cdp_borrowed_mode(): - """get_pages must hide user-owned tabs when bridgic borrowed the context.""" - owned = MagicMock(name="bridgic_tab") - user = MagicMock(name="user_tab") - browser = _make_borrowed_cdp_browser_with_pages(owned, user) - - visible = browser.get_pages() - assert visible == [owned] - assert user not in visible - - -def test_get_pages_returns_all_pages_when_context_owned(): - """When bridgic owns the context (launch / persistent / owned-CDP), - every page in the context should be visible.""" +def test_get_pages_returns_all_context_pages(): + """get_pages() must return every page in the context regardless of how + the browser was started (launch, persistent, or CDP).""" browser = _make_browser_with_mock_page() - browser._cdp_url = None # launch mode - browser._cdp_context_owned = False p1 = MagicMock(name="p1") p2 = MagicMock(name="p2") browser._context.pages = [p1, p2] @@ -292,22 +275,21 @@ def test_get_pages_returns_all_pages_when_context_owned(): @pytest.mark.asyncio -async def test_close_page_does_not_switch_to_user_tab_in_cdp_borrowed_mode(): - """Closing the last bridgic tab must NOT silently land self._page on - a user-owned tab — that would route subsequent commands into the - user's banking / email page. - """ +async def test_close_page_switches_to_remaining_tab_in_cdp_borrowed_mode(): + """After closing the active tab in CDP mode, self._page must be set to + the next available page in the context (there is no ownership filter).""" owned = MagicMock(name="bridgic_tab") owned.close = AsyncMock() owned.title = AsyncMock(return_value="bridgic") user = MagicMock(name="user_tab") + user.is_closed = MagicMock(return_value=False) + user.title = AsyncMock(return_value="user-tab-title") browser = _make_borrowed_cdp_browser_with_pages(owned, user) success, _msg = await browser._close_page(owned) assert success - # No bridgic-owned pages remain → self._page must be None, - # NOT the user's still-open tab. - assert browser._page is None + # A remaining page exists → self._page switches to it. + assert browser._page is user @pytest.mark.asyncio @@ -324,7 +306,6 @@ async def test_start_video_records_all_tabs_in_cdp_borrowed_mode(): browser = _make_browser_with_mock_page() browser._cdp_url = "ws://localhost:9222/devtools/browser/abc" browser._cdp_context_owned = False - browser._cdp_owned_pages = {owned} fake_context = MagicMock() fake_context.pages = [owned, user] From bf26f6723dc6e1a58c7f98a3c953bd3d3a5e8a4d Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 18:59:36 +0800 Subject: [PATCH 09/72] =?UTF-8?q?docs:=20update=20CDP=5FMODE.md=20?= =?UTF-8?q?=E2=80=94=20close()=20is=20pure=20disconnect,=20all=20tabs=20vi?= =?UTF-8?q?sible?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/CDP_MODE.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/CDP_MODE.md b/docs/CDP_MODE.md index ec1b6d6..7c908fd 100644 --- a/docs/CDP_MODE.md +++ b/docs/CDP_MODE.md @@ -20,9 +20,11 @@ bridgic-browser open https://example.com --cdp "ws://localhost:9222/..." ## Tab ownership in CDP mode -After connecting via CDP, bridgic **always opens its own brand-new tab** in the borrowed browser context. **Your existing tabs are never navigated, refreshed, or closed.** When `close()` runs (or the daemon shuts down), bridgic only closes the tabs it created itself. +After connecting via CDP, bridgic **always opens its own brand-new tab** in the borrowed browser context. **Your existing tabs are never navigated, refreshed, or closed.** -Each call to `bridgic-browser new-tab` creates an additional bridgic-owned tab; all of them are tracked and cleaned up on shutdown. Tabs you opened manually in Chrome — or pop-ups (`target=_blank` etc.) spawned by pages bridgic was driving — are **not** tracked and will not be touched by bridgic. +All tabs in the context — including the ones you had open before bridgic connected, and any pop-up tabs (`target=_blank`, `window.open()`) spawned by pages bridgic is driving — are fully visible via `get_tabs` / `switch_tab` / `close_tab`. + +When `close()` runs (or the daemon shuts down), bridgic **only disconnects** — no tabs are closed. The remote Chrome continues running exactly as the user left it. When bridgic connects, the daemon log records which Chrome instance was joined and how many user tabs were preserved: @@ -83,19 +85,18 @@ bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), ### `close()` only disconnects -`close()` in CDP mode preserves the remote browser state — only bridgic's own tabs are cleaned up: +`close()` in CDP mode is a pure disconnect — no pages or contexts are touched: -| Operation | Launch mode | CDP (borrowed context) | -|-----------|------------|----------------------| +| Operation | Launch mode | CDP mode | +|-----------|------------|---------| | Navigate pages to about:blank | Yes | **Skipped** | -| `page.close()` on user tabs | Yes | **Skipped** | -| `page.close()` on bridgic-owned tabs | Yes | Yes | +| `page.close()` | Yes | **Skipped** | | `context.close()` | Yes | **Skipped** | | `browser.close()` | Kills process | **Disconnects only** | | Save tracing artifacts | Yes | Yes | | Save video artifacts | Yes | Yes (all tabs in context) | -After `close()`, the remote Chrome continues running with all of the **user's** tabs intact; only the tabs bridgic explicitly created are gone. +After `close()`, the remote Chrome continues running with all tabs intact. ### Connection drops From 61ee33ca0cf20e363b07cf74d20747af0ead015b Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Wed, 8 Apr 2026 19:20:04 +0800 Subject: [PATCH 10/72] perf: parallelize page.title() calls in get_all_page_descs with asyncio.gather --- bridgic/browser/session/_browser.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index de33b73..faf037d 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -2075,12 +2075,19 @@ async def get_page_desc(self, page: Optional[Page] = None) -> Optional[PageDesc] async def get_all_page_descs(self) -> List[PageDesc]: pages = self.get_pages() - page_descs = [] - for page in pages: - page_desc = await self.get_page_desc(page) - if page_desc: - page_descs.append(page_desc) - return page_descs + if not pages: + return [] + + async def _safe_desc(page: Page) -> Optional[PageDesc]: + try: + page_id = generate_page_id(page) + title = await page.title() + return PageDesc(url=page.url, title=title, page_id=page_id) + except Exception: + return None + + results = await asyncio.gather(*(_safe_desc(p) for p in pages)) + return [d for d in results if d is not None] def get_pages(self) -> List[Page]: """Return all pages in the current browser context. From fe639b726c044fa778cee0241eb129ac30f8192b Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Fri, 10 Apr 2026 11:12:09 +0800 Subject: [PATCH 11/72] refactor: enhance CLI command descriptions and improve wait command functionality - Updated descriptions for CLI commands to clarify usage and parameters, including `fill-form`, `press`, `mouse-down`, `mouse-up`, `wait`, `switch-tab`, and `close-tab`. - Enhanced the `wait` command to support a custom timeout option, allowing users to specify a maximum wait duration. - Adjusted the implementation of the `wait` command to include timeout handling in the underlying logic. - Improved mock setups in tests to reflect changes in command behavior and ensure accurate testing of new functionalities. --- bridgic/browser/_cli_catalog.py | 14 +- bridgic/browser/cli/_commands.py | 45 +- bridgic/browser/cli/_daemon.py | 17 +- bridgic/browser/session/_browser.py | 1245 ++++++++++++++------ bridgic/browser/session/_video_recorder.py | 109 +- tests/conftest.py | 17 +- tests/unit/test_browser.py | 364 +++++- tests/unit/test_browser_methods.py | 88 +- tests/unit/test_cli.py | 12 +- tests/unit/test_tools.py | 146 +-- tests/unit/test_video_recorder.py | 81 ++ 11 files changed, 1596 insertions(+), 542 deletions(-) diff --git a/bridgic/browser/_cli_catalog.py b/bridgic/browser/_cli_catalog.py index fd8bca8..98cd5b0 100644 --- a/bridgic/browser/_cli_catalog.py +++ b/bridgic/browser/_cli_catalog.py @@ -97,8 +97,8 @@ "drag": (ToolCategory.ELEMENT_INTERACTION, "Drag from START_REF to END_REF"), "options": (ToolCategory.ELEMENT_INTERACTION, "Get all available options for a dropdown element by ref"), "upload": (ToolCategory.ELEMENT_INTERACTION, "Upload a file at PATH to a file input element by ref"), - "fill-form": (ToolCategory.ELEMENT_INTERACTION, "Fill multiple form fields via JSON array [--submit]"), - "press": (ToolCategory.KEYBOARD, "Press a key or combination (Enter, Control+A, ...)"), + "fill-form": (ToolCategory.ELEMENT_INTERACTION, "Fill multiple form fields [--submit]; FIELDS_JSON: '[{\"ref\":\"REF\",\"value\":\"TEXT\"}]'"), + "press": (ToolCategory.KEYBOARD, "Press a key or combination (Enter, Control+A, ...); macOS: use Meta for Cmd (Meta+A, Meta+C)"), "type": (ToolCategory.KEYBOARD, "Type TEXT into the focused element character-by-character (use 'click'/'focus' first) [--submit]"), "key-down": (ToolCategory.KEYBOARD, "Press and hold a keyboard key"), "key-up": (ToolCategory.KEYBOARD, "Release a held keyboard key"), @@ -106,13 +106,13 @@ "mouse-move": (ToolCategory.MOUSE, "Move the mouse to viewport-pixel coordinates (X Y from top-left)"), "mouse-click": (ToolCategory.MOUSE, "Click mouse at viewport-pixel coordinates (X Y) [--button left|right|middle] [--count N]"), "mouse-drag": (ToolCategory.MOUSE, "Drag mouse from viewport-pixel (X1 Y1) to (X2 Y2)"), - "mouse-down": (ToolCategory.MOUSE, "Press and hold a mouse button [--button left]"), - "mouse-up": (ToolCategory.MOUSE, "Release a held mouse button [--button left]"), - "wait": (ToolCategory.WAIT, "Wait N seconds (unit: SECONDS not ms) or until TEXT appears; TEXT --gone waits for disappearance"), + "mouse-down": (ToolCategory.MOUSE, "Press and hold a mouse button at current position [--button left]; call mouse-move first"), + "mouse-up": (ToolCategory.MOUSE, "Release a held mouse button at current position [--button left]; call mouse-move first"), + "wait": (ToolCategory.WAIT, "Wait N seconds (unit: SECONDS not ms) or until TEXT appears [--timeout S]; TEXT --gone waits for disappearance"), "tabs": (ToolCategory.TABS, "List all open tabs"), "new-tab": (ToolCategory.TABS, "Open a new tab [URL]"), - "switch-tab": (ToolCategory.TABS, "Switch to a tab by page_id"), - "close-tab": (ToolCategory.TABS, "Close a tab by page_id (or current tab if omitted)"), + "switch-tab": (ToolCategory.TABS, "Switch to a tab by page_id; run 'tabs' first to list available page IDs"), + "close-tab": (ToolCategory.TABS, "Close a tab by page_id (or current tab if omitted); run 'tabs' first to list page IDs"), "screenshot": (ToolCategory.CAPTURE, "Save a screenshot to PATH [--full-page]"), "pdf": (ToolCategory.CAPTURE, "Save the current page as PDF"), "console-start": (ToolCategory.DEVELOPER, "Start capturing browser console output"), diff --git a/bridgic/browser/cli/_commands.py b/bridgic/browser/cli/_commands.py index 140c405..e844599 100644 --- a/bridgic/browser/cli/_commands.py +++ b/bridgic/browser/cli/_commands.py @@ -366,7 +366,12 @@ def cmd_upload(ref: str, path: str) -> None: @click.option("--submit", is_flag=True, default=False, help="Press Enter after filling the last field.") def cmd_fill_form(fields_json: str, submit: bool) -> None: - """Fill multiple form fields all at once. FIELDS_JSON is a JSON array like '[{"ref":"8d4a07a9","value":"hi"}]'.""" + """Fill multiple form fields all at once. + + FIELDS_JSON is a JSON array of {"ref": "REF", "value": "TEXT"} objects. + Example: '[{"ref":"8d4a07a9","value":"Alice"},{"ref":"9e5f18b0","value":"secret"}]' + Get refs from the 'snapshot' command. + """ try: _ok(send_command("fill_form", {"fields": fields_json, "submit": submit}, start_if_needed=False)) except Exception as exc: @@ -378,7 +383,10 @@ def cmd_fill_form(fields_json: str, submit: bool) -> None: @cli.command("press", context_settings=CONTEXT_SETTINGS) @click.argument("key") def cmd_press(key: str) -> None: - """Press a keyboard key or combination (Enter, Control+A, Shift+Tab…).""" + """Press a keyboard key or combination (Enter, Control+A, Shift+Tab…). + + On macOS use Meta for the Command key (e.g. Meta+A for select-all, Meta+C for copy). + """ try: _ok(send_command("press", {"key": key}, start_if_needed=False)) except Exception as exc: @@ -477,7 +485,10 @@ def cmd_mouse_drag(x1: float, y1: float, x2: float, y2: float) -> None: type=click.Choice(["left", "right", "middle"], case_sensitive=False), help="Mouse button to press (default: left).") def cmd_mouse_down(button: str) -> None: - """Press and hold a mouse button.""" + """Press and hold a mouse button at the current cursor position. + + Call mouse-move first to position the cursor before pressing. + """ try: _ok(send_command("mouse_down", {"button": button}, start_if_needed=False)) except Exception as exc: @@ -489,7 +500,10 @@ def cmd_mouse_down(button: str) -> None: type=click.Choice(["left", "right", "middle"], case_sensitive=False), help="Mouse button to release (default: left).") def cmd_mouse_up(button: str) -> None: - """Release a held mouse button.""" + """Release a held mouse button at the current cursor position. + + Call mouse-move first to position the cursor before releasing. + """ try: _ok(send_command("mouse_up", {"button": button}, start_if_needed=False)) except Exception as exc: @@ -502,28 +516,33 @@ def cmd_mouse_up(button: str) -> None: @click.argument("seconds_or_text") @click.option("--gone", is_flag=True, default=False, help="Wait for SECONDS_OR_TEXT to disappear instead of appear.") -def cmd_wait(seconds_or_text: str, gone: bool) -> None: +@click.option("--timeout", "timeout_seconds", default=30.0, show_default=True, type=float, + help="Max seconds to wait for text to appear/disappear (ignored for numeric waits).") +def cmd_wait(seconds_or_text: str, gone: bool, timeout_seconds: float) -> None: """Wait for N seconds (float) or until TEXT appears/disappears. \b SECONDS_OR_TEXT: If a number → wait exactly that many seconds (e.g. 2, 0.5). Max 60. NOTE: unit is SECONDS, not milliseconds. - --gone is ignored when a number is given. + --gone and --timeout are ignored when a number is given. If text → wait until that text appears on the page. Add --gone to wait until it disappears instead. + Use --timeout to set a custom wait limit (default: 30s). \b Examples: - bridgic-browser wait 2 # wait for 2 seconds - bridgic-browser wait 0.5 # wait for 0.5 senond - bridgic-browser wait "Submit" # wait for text to appear - bridgic-browser wait --gone "Loading" # wait for text to disappear + bridgic-browser wait 2 # wait for 2 seconds + bridgic-browser wait 0.5 # wait for 0.5 second + bridgic-browser wait "Submit" # wait for text to appear (30s limit) + bridgic-browser wait --timeout 5 "Submit" # wait up to 5 seconds + bridgic-browser wait --gone "Loading" # wait for text to disappear + bridgic-browser wait --gone --timeout 10 "Spinner" # disappear within 10s """ value = seconds_or_text try: # Try to parse as a number for time-based wait. - # --gone is irrelevant for numeric waits (no text to watch for). + # --gone and --timeout are irrelevant for numeric waits. try: seconds = float(value) except ValueError: @@ -532,9 +551,9 @@ def cmd_wait(seconds_or_text: str, gone: bool) -> None: if seconds is not None: _ok(send_command("wait", {"seconds": seconds}, start_if_needed=False)) elif gone: - _ok(send_command("wait", {"text_gone": value}, start_if_needed=False)) + _ok(send_command("wait", {"text_gone": value, "timeout": timeout_seconds}, start_if_needed=False)) else: - _ok(send_command("wait", {"text": value}, start_if_needed=False)) + _ok(send_command("wait", {"text": value, "timeout": timeout_seconds}, start_if_needed=False)) except Exception as exc: _err(exc) diff --git a/bridgic/browser/cli/_daemon.py b/bridgic/browser/cli/_daemon.py index 360c567..2040a26 100644 --- a/bridgic/browser/cli/_daemon.py +++ b/bridgic/browser/cli/_daemon.py @@ -265,11 +265,14 @@ async def _handle_mouse_up(browser: "Browser", args: Dict[str, Any]) -> str: # ── Wait ────────────────────────────────────────────────────────────────────── async def _handle_wait(browser: "Browser", args: Dict[str, Any]) -> str: - return await browser.wait_for( - time_seconds=args.get("seconds"), - text=args.get("text"), - text_gone=args.get("text_gone"), - ) + kwargs: dict = { + "time_seconds": args.get("seconds"), + "text": args.get("text"), + "text_gone": args.get("text_gone"), + } + if "timeout" in args: + kwargs["timeout"] = float(args["timeout"]) + return await browser.wait_for(**kwargs) # ── Tabs ────────────────────────────────────────────────────────────────────── @@ -669,9 +672,9 @@ async def _dispatch(browser: "Browser", command: str, args: Dict[str, Any]) -> D _READ_TIMEOUT = 60.0 # seconds to wait for a command line from the client try: - _DAEMON_STOP_TIMEOUT = float(os.environ.get("BRIDGIC_DAEMON_STOP_TIMEOUT", "45")) + _DAEMON_STOP_TIMEOUT = float(os.environ.get("BRIDGIC_DAEMON_STOP_TIMEOUT", "300")) except (ValueError, TypeError): - _DAEMON_STOP_TIMEOUT = 45.0 + _DAEMON_STOP_TIMEOUT = 300.0 def _setup_signal_handlers(stop_event: asyncio.Event) -> None: diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index faf037d..12c1b5e 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -455,13 +455,11 @@ def _css_attr_equals(name: str, value: str) -> str: async def _prefer_visible_locators(locators: list) -> list: """Keep only visible locators when possible, otherwise preserve original order.""" - visible = [] - for locator in locators: - try: - if await locator.is_visible(): - visible.append(locator) - except Exception: - continue + results = await asyncio.gather( + *[locator.is_visible() for locator in locators], + return_exceptions=True, + ) + visible = [loc for loc, r in zip(locators, results) if r is True] return visible or locators @@ -505,27 +503,149 @@ async def _get_dropdown_option_locators(page, locator) -> list: async def _is_native_checkbox_or_radio(locator) -> bool: - """Return True when locator points to .""" + """Return True when locator points to . + + Uses ``get_attribute("type")`` instead of ``evaluate()`` to avoid + Playwright's ``_mainContext()`` hang on pre-existing CDP tabs. + Only ```` elements carry those type values, so + the tagName check is redundant. + """ try: - tag_name = await locator.evaluate("el => el.tagName.toLowerCase()") + input_type = (await locator.get_attribute("type") or "").strip().lower() + return input_type in {"checkbox", "radio"} except Exception: return False - if tag_name != "input": - return False - input_type = (await locator.get_attribute("type") or "").strip().lower() - return input_type in {"checkbox", "radio"} async def _is_checked(locator) -> bool: - """Check both native .checked and aria-checked state.""" - return bool( - await locator.evaluate( - "el => el.checked === true || el.getAttribute('aria-checked') === 'true'" + """Check both native .checked and aria-checked state. + + Uses ``is_checked()`` (CDP-backed, has timeout) plus ``get_attribute`` + instead of ``evaluate()`` to avoid the ``_mainContext()`` hang on + pre-existing CDP tabs. + """ + try: + if await locator.is_checked(): + return True + except Exception: + pass + try: + aria = (await locator.get_attribute("aria-checked") or "").strip().lower() + return aria == "true" + except Exception: + return False + + +async def _cdp_evaluate_on_element(cdp_context, page, locator, code: str) -> Any: + """Evaluate *code* (an arrow function ``el => ...``) on the DOM element + identified by *locator*, using a raw CDPSession. + + Resolves the element via bounding-box coordinates + ``document.elementFromPoint`` + so it bypasses Playwright's ``_mainContext()`` which hangs on pre-existing + CDP-borrowed tabs. Raises on any failure (caller must handle). + """ + bbox = await locator.bounding_box() + if bbox is None: + raise RuntimeError("Element has no bounding box — cannot resolve via CDPSession") + cx = int(bbox["x"] + bbox["width"] / 2) + cy = int(bbox["y"] + bbox["height"] / 2) + session = await cdp_context.new_cdp_session(page) + try: + # Step 1: get the element's objectId via Runtime.evaluate (works in CDP borrowed mode) + elem_result = await asyncio.wait_for( + session.send("Runtime.evaluate", { + "expression": f"document.elementFromPoint({cx},{cy})", + "returnByValue": False, + }), + timeout=5.0, ) - ) + object_id = elem_result.get("result", {}).get("objectId") + if not object_id: + raise RuntimeError("No element found at coordinates via CDPSession") + # Step 2: call the user's arrow function with the element as the first + # argument (matching Playwright's locator.evaluate() calling convention). + # objectId is used as the execution context; arguments[0] passes it as + # the first parameter so ``(el) => el.value`` receives the element. + call_result = await asyncio.wait_for( + session.send("Runtime.callFunctionOn", { + "functionDeclaration": code, + "objectId": object_id, + "arguments": [{"objectId": object_id}], + "returnByValue": True, + "awaitPromise": True, + }), + timeout=30.0, + ) + if call_result.get("exceptionDetails"): + raise RuntimeError(f"JS exception: {call_result['exceptionDetails']}") + return call_result.get("result", {}).get("value") + finally: + try: + await session.detach() + except Exception: + pass + +async def _check_element_covered(locator, cx: float, cy: float, cdp_context=None) -> bool: + """Return True when another element sits on top of (cx, cy). -async def _click_checkable_target(page, locator, bbox) -> None: + In CDP borrowed mode (``cdp_context`` provided) ``locator.evaluate()`` + hangs because Playwright's ``_mainContext()`` never resolves for + pre-existing tabs. We return ``False`` immediately so callers fall + through to ``locator.click()`` which uses the utility world and handles + overlays internally. + """ + if cdp_context is not None: + return False + try: + return await asyncio.wait_for( + locator.evaluate( + f"(el) => {{ if (window.parent !== window) return false; " + f"const t = document.elementFromPoint({cx}, {cy}); " + f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" + ), + timeout=10.0, + ) + except Exception: + return False + + +async def _click_covering_element(page, locator, cx: float, cy: float, cdp_context=None) -> None: + """Click the element that covers position (cx, cy). + + In CDP borrowed mode (``cdp_context`` provided) uses a raw CDPSession + ``Runtime.evaluate`` to click the topmost element at the coordinates, + bypassing ``page.evaluate()`` which hangs on pre-existing tabs. + Falls back to ``locator.dispatch_event("click")`` on any failure. + """ + if cdp_context is not None: + session = None + try: + session = await cdp_context.new_cdp_session(page) + expr = f"document.elementFromPoint({cx}, {cy})?.click()" + await asyncio.wait_for( + session.send("Runtime.evaluate", {"expression": expr}), + timeout=5.0, + ) + except Exception: + await locator.dispatch_event("click") + finally: + if session: + try: + await session.detach() + except Exception: + pass + return + try: + await asyncio.wait_for( + page.evaluate(f"document.elementFromPoint({cx}, {cy})?.click()"), + timeout=10.0, + ) + except Exception: + await locator.dispatch_event("click") + + +async def _click_checkable_target(page, locator, bbox, cdp_context=None) -> None: """Click a checkable target with overlay handling and shadow DOM fallback.""" if bbox is not None: cx = bbox["x"] + bbox["width"] / 2 @@ -535,15 +655,11 @@ async def _click_checkable_target(page, locator, bbox) -> None: await locator.dispatch_event("click") return - covered = await locator.evaluate( - f"(el) => {{ if (window.parent !== window) return false; " - f"const t = document.elementFromPoint({cx}, {cy}); " - f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" - ) + covered = await _check_element_covered(locator, cx, cy, cdp_context=cdp_context) if covered: logger.debug("_click_checkable_target: covered at (%.1f, %.1f), clicking intercepting element", cx, cy) if page: - await page.evaluate(f"document.elementFromPoint({cx}, {cy})?.click()") + await _click_covering_element(page, locator, cx, cy, cdp_context=cdp_context) else: await locator.dispatch_event("click") else: @@ -901,12 +1017,10 @@ def __init__( # Context-scoped state (keyed by _get_context_key) self._tracing_state: Dict[str, bool] = {} self._video_state: Dict[str, bool] = {} - # Multi-page CDP screencast video recording state. - # Mirrors Playwright CLI behaviour (packages/playwright-core/src/tools/ - # backend/context.ts — ``startVideoRecording`` / ``stopVideoRecording``): - # one ``start_video`` call records EVERY page in the context, including - # pages opened after start, each to its own .webm file. - self._video_recorders: Dict[Any, "_video_recorder_mod.VideoRecorder"] = {} + # Single-stream video recording: one ffmpeg process records the + # active tab. When the user switches tabs the screencast source + # is hot-swapped via VideoRecorder.switch_page(). + self._video_recorder: Optional["_video_recorder_mod.VideoRecorder"] = None # When a recording session is active, holds {"width", "height", # "context", "page_listener"}. None means no active session. self._video_session: Optional[Dict[str, Any]] = None @@ -1457,6 +1571,8 @@ async def _ensure_started(self) -> None: _CONTEXT_CLOSE_TIMEOUT = 15.0 _BROWSER_CLOSE_TIMEOUT = 15.0 _PLAYWRIGHT_STOP_TIMEOUT = 15.0 + _VIDEO_PREPARE_STOP_TIMEOUT = 15.0 # single recorder prepare_stop() in close() + _VIDEO_FINALIZE_TIMEOUT = 30.0 # single ffmpeg finalize() in close() @staticmethod async def _force_kill_playwright_driver(pw: Any) -> None: @@ -1607,7 +1723,7 @@ def inspect_pending_close_artifacts(self) -> Dict[str, Any]: context_key = _get_context_key(self._context) tracing_active = bool(self._tracing_state.get(context_key)) - video_count = len(self._video_recorders) + video_count = 1 if self._video_recorder is not None else 0 if not tracing_active and video_count == 0: # Nothing to write — don't create a directory. return artifacts @@ -1674,6 +1790,9 @@ async def close(self) -> str: errors: List[str] = [] shutdown_artifacts: Dict[str, List[str]] = {"trace": [], "video": []} context_key: Optional[str] = None + # Recorders whose prepare_stop() has run but finalize() is deferred + # until after Chrome exits (two-phase video shutdown). + _deferred_recorders: list = [] # Deferred re-raise: if CancelledError / KeyboardInterrupt arrives during any # cleanup await we record it here, finish ALL cleanup steps, then re-raise at # the very end. This ensures no Playwright/Chromium process is left orphaned @@ -1731,16 +1850,26 @@ async def close(self) -> str: finally: self._tracing_state[context_key] = False - # Stop every active CDP screencast recorder (one per page). - # Mirrors Playwright CLI's context.ts ``dispose()`` → - # ``stopVideoRecording()``: when the context closes, every - # per-page recorder is finalized. + # Two-phase video recorder shutdown. + # + # Phase 1 (here, before Chrome exits): prepare_stop() each + # recorder — stops the CDP screencast, pads frames, detaches + # the CDP session. Fast (~milliseconds per recorder). + # + # Phase 2 (after Chrome exits): finalize() each recorder — + # flushes the frame queue to ffmpeg and waits for the process + # to write the .webm file. Slow (seconds), but Chrome is + # already dead so user_data_dir is released. + # + # Why two phases: the old single-phase stop() held Chrome + # alive while 50 ffmpeg processes fought for CPU, blocking + # user_data_dir release. Splitting lets Chrome exit ASAP. # # Why we snapshot the dict before awaiting: # stop_video() and close() can race in the daemon flow. We # clear the dict first so the other path observes "no work # left" and skips the duplicate stop() call. - if self._video_recorders or self._video_session is not None: + if self._video_recorder is not None or self._video_session is not None: # Detach the context "page" listener so new pages aren't # auto-started during shutdown. if self._video_session: @@ -1750,42 +1879,42 @@ async def close(self) -> str: self._context.remove_listener("page", _listener) except Exception: pass - _recorders = list(self._video_recorders.items()) - self._video_recorders.clear() + _recorder = self._video_recorder + self._video_recorder = None self._video_session = None - for _idx, (_page_ref, _recorder) in enumerate(_recorders): + + # Phase 1: prepare_stop() the single recorder (fast). + if _recorder is not None: try: - rec_path = await asyncio.wait_for( - _recorder.stop(), timeout=10.0 + await asyncio.wait_for( + _recorder.prepare_stop(), + timeout=self._VIDEO_PREPARE_STOP_TIMEOUT, ) - # Move the video file into the close-session dir, - # next to the trace. - if self._close_session_dir: - if _idx == 0: - dest_name = "video.webm" - else: - dest_name = f"video-{_idx}.webm" - dest = os.path.join(self._close_session_dir, dest_name) - self._move_video_local(Path(rec_path), dest) - shutdown_artifacts["video"].append(dest) - else: - shutdown_artifacts["video"].append(rec_path) - except asyncio.TimeoutError: - errors.append("video_recorder.stop: timeout after 10.0s") - except Exception as e: - errors.append(f"video_recorder.stop: {e}") - except BaseException as e: - errors.append(f"video_recorder.stop: {e}") + except Exception as _pr: + logger.warning( + "[close] prepare_stop failed: %s(%r)", + type(_pr).__name__, str(_pr), + ) + _recorder._is_stopped = True + _recorder._cdp_session = None + except BaseException as _pr: + logger.warning("[close] prepare_stop cancelled: %s", _pr) + _recorder._is_stopped = True + _recorder._cdp_session = None if _pending_cancel is None: - _pending_cancel = e - self._video_state.pop(context_key, None) + _pending_cancel = _pr + # Stash for Phase 2 (runs after Chrome exits). + _deferred_recorders = [("single", _recorder)] + + logger.debug("[close] Phase 1 done, clearing page state") # Always clear page-scoped listeners/caches for every context page. for page in list(self._context.pages): self._clear_page_scoped_state(page, errors) else: self._clear_page_scoped_state(self._page, errors) + logger.debug("[close] disconnecting browser") # Detach download manager before context closes to remove handlers if self._download_manager and self._context: try: @@ -1891,6 +2020,37 @@ async def close(self) -> str: if _pending_cancel is None: _pending_cancel = e + # Phase 2: finalize() deferred video recorders. + # Chrome is dead, user_data_dir is released. Now flush the ffmpeg + # frame queues with a semaphore to bound CPU usage. + if _deferred_recorders: + logger.info("[close] Phase 2: finalize single recorder") + _, _rec_to_finalize = _deferred_recorders[0] + try: + rec_path: str = await asyncio.wait_for( + _rec_to_finalize.finalize(), + timeout=self._VIDEO_FINALIZE_TIMEOUT, + ) + if self._close_session_dir: + dest = os.path.join(self._close_session_dir, "video.webm") + self._move_video_local(Path(rec_path), dest) + shutdown_artifacts["video"].append(dest) + else: + shutdown_artifacts["video"].append(rec_path) + except asyncio.TimeoutError: + errors.append( + f"video_recorder.finalize: timeout after " + f"{self._VIDEO_FINALIZE_TIMEOUT:.1f}s" + ) + except Exception as _fin_err: + errors.append(f"video_recorder.finalize: {_fin_err}") + except BaseException as _fin_err: + errors.append(f"video_recorder.finalize: {_fin_err}") + if _pending_cancel is None: + _pending_cancel = _fin_err + if context_key is not None: + self._video_state.pop(context_key, None) + # Clear snapshot cache self._last_snapshot = None self._last_snapshot_url = None @@ -2020,6 +2180,7 @@ async def navigate_to( # All tabs were closed (e.g. via close_tab); _context is still alive. logger.info("No page is open, creating a new page in existing context") self._page = await self._context.new_page() + await self._switch_video_to_page(self._page) kwargs: Dict[str, Any] = {"wait_until": wait_until} if timeout is not None: @@ -2053,11 +2214,94 @@ async def _new_page( code="NO_BROWSER_CONTEXT", ) self._page = await self._context.new_page() + await self._switch_video_to_page(self._page) if url: await self.navigate_to(url, wait_until=wait_until, timeout=timeout) await self._page.bring_to_front() return self._page + async def _cdp_navigate_history(self, page: "Page", delta: int) -> None: + """Navigate browser history by *delta* (-1 = back, +1 = forward) using a + raw CDPSession, bypassing ``page.go_back/forward()`` which relies on + Playwright's ``_mainContext()`` tracking. That tracking can hang on tabs + opened before bridgic attached (CDP borrowed mode). + """ + session = None + try: + session = await self._context.new_cdp_session(page) + history = await asyncio.wait_for( + session.send("Page.getNavigationHistory"), + timeout=5.0, + ) + current_idx = history.get("currentIndex", 0) + entries = history.get("entries", []) + target_idx = current_idx + delta + if target_idx < 0 or target_idx >= len(entries): + direction = "back" if delta < 0 else "forward" + _raise_state_error( + f"Cannot navigate {direction}: no history entry", + code="NO_HISTORY_ENTRY", + retryable=False, + ) + entry_id = entries[target_idx]["id"] + await asyncio.wait_for( + session.send("Page.navigateToHistoryEntry", {"entryId": entry_id}), + timeout=15.0, + ) + finally: + if session: + try: + await session.detach() + except Exception: + pass + # Wait for page to reach domcontentloaded; ignore timeout (navigation may + # already be complete when we get here for cached/fast pages). + try: + await asyncio.wait_for( + page.wait_for_load_state("domcontentloaded"), + timeout=10.0, + ) + except Exception: + pass + + async def _get_page_title(self, page: Page) -> str: + """Return the title of *page*, handling CDP borrowed-mode pages correctly. + + ``page.title()`` internally calls Playwright's ``frame._mainContext()``, + which waits on a Promise that is resolved when Playwright sees the CDP + ``Runtime.executionContextCreated`` event. For **pre-existing tabs** + when bridgic connects via ``connect_over_cdp()``, Playwright may have + missed that event (it fired before Playwright registered its listener), + so the Promise never resolves and ``page.title()`` hangs indefinitely. + + In CDP borrowed-mode we bypass Playwright's context-tracking entirely by + opening a fresh ``CDPSession`` directly to the target and sending + ``Runtime.evaluate`` ourselves. Chrome responds immediately regardless + of Playwright's internal state. For pages that genuinely cannot run JS + (e.g. ``chrome://`` internal pages) we fall back to the URL. + """ + if self._cdp_url and not self._cdp_context_owned and self._context: + session = None + try: + session = await self._context.new_cdp_session(page) + result = await asyncio.wait_for( + session.send( + "Runtime.evaluate", + {"expression": "document.title", "returnByValue": True}, + ), + timeout=5.0, + ) + return result.get("result", {}).get("value", "") or page.url + except Exception: + return page.url + finally: + if session: + try: + await session.detach() + except Exception: + pass + return await page.title() + async def get_page_desc(self, page: Optional[Page] = None) -> Optional[PageDesc]: if not page: page = self._page @@ -2065,7 +2309,7 @@ async def get_page_desc(self, page: Optional[Page] = None) -> Optional[PageDesc] logger.warning("No page is open") return None page_id = generate_page_id(page) - title = await page.title() + title = await self._get_page_title(page) page_desc = PageDesc( url=page.url, title=title, @@ -2078,11 +2322,11 @@ async def get_all_page_descs(self) -> List[PageDesc]: if not pages: return [] - async def _safe_desc(page: Page) -> Optional[PageDesc]: + async def _safe_desc(p: Page) -> Optional[PageDesc]: try: - page_id = generate_page_id(page) - title = await page.title() - return PageDesc(url=page.url, title=title, page_id=page_id) + page_id = generate_page_id(p) + title = await self._get_page_title(p) + return PageDesc(url=p.url, title=title, page_id=page_id) except Exception: return None @@ -2124,10 +2368,11 @@ async def switch_to_page(self, page_id: str) -> tuple[bool, str]: return False, f"Page with page_id '{page_id}' not found" await page.bring_to_front() self._page = page + await self._switch_video_to_page(page) # Clear snapshot cache after switching pages self._last_snapshot = None self._last_snapshot_url = None - title = await page.title() + title = await self._get_page_title(page) return True, f"Switched to tab {page_id}: {page.url} (title: {title})" async def _close_page(self, page: Page | str) -> tuple[bool, str]: @@ -2160,20 +2405,34 @@ async def _close_page(self, page: Page | str) -> tuple[bool, str]: logger.warning("Page is None, can't close") return False, "Page is None, can't close" - # If the page being closed is currently recording, stop its - # recorder first and remove it from the registry. Why: a - # VideoRecorder's CDP session is bound to a specific page; once - # the page is closed the CDP session is dead and any later - # stop()/detach() call would block waiting on a 10 s timeout. - # Recorders for other pages stay active — same multi-page - # semantics as Playwright CLI. - if page in self._video_recorders: - _recorder = self._video_recorders.pop(page) - try: - await asyncio.wait_for(_recorder.stop(), timeout=10.0) - logger.debug("[_close_page] auto-stopped video recorder for closing page") - except Exception as e: - logger.debug("[_close_page] video recorder stop error: %s", e) + # If the page being closed is the one currently recorded, + # switch the single-stream recorder to a remaining page BEFORE + # closing — the CDP session is bound to this page and will die + # once the page is gone. + if ( + self._video_recorder is not None + and not self._video_recorder.is_stopped + and self._video_recorder.current_page == page + ): + remaining = [p for p in self.get_pages() if p != page and not p.is_closed()] + if remaining: + try: + await self._video_recorder.switch_page(remaining[0]) + logger.debug("[_close_page] video switched to remaining page") + except Exception as e: + logger.debug("[_close_page] video switch error: %s", e) + else: + # Last page — stop screencast but keep ffmpeg alive for finalize. + if self._video_recorder._cdp_session: + try: + await self._video_recorder._cdp_session.send("Page.stopScreencast") + except Exception: + pass + try: + await self._video_recorder._cdp_session.detach() + except Exception: + pass + self._video_recorder._cdp_session = None await page.close() @@ -2187,7 +2446,7 @@ async def _close_page(self, page: Page | str) -> tuple[bool, str]: if self._page: now_id = generate_page_id(self._page) - now_title = await self._page.title() + now_title = await self._get_page_title(self._page) return True, f"Closed tab {page_id}. Now on {now_id}: {self._page.url} (title: {now_title})" return True, f"Closed tab {page_id}. No tabs remaining" @@ -2195,96 +2454,36 @@ async def get_page_size_info(self) -> Optional[PageSizeInfo]: if not self._page: logger.warning("No page is open") return None - # use CDP to get page size info - if self._context: - cdp_session = None + try: + # Use CDP Page.getLayoutMetrics directly — avoids page.evaluate() which hangs + # indefinitely on pre-existing tabs in CDP borrowed mode (Playwright misses the + # Runtime.executionContextCreated event for those tabs). + session = None try: - # NOTE: CDP sessions are only supported on Chromium-based browsers. - # create cdp session for the page - cdp_session = await self._context.new_cdp_session(self._page) - # get page size info:more information see https://chromedevtools.github.io/devtools-protocol/tot/Page/#method-getLayoutMetrics - result = await cdp_session.send("Page.getLayoutMetrics") - logger.debug(f"Page size info: {result}") - # use modern css properties if available - layout_viewport = result.get('cssLayoutViewport') or result.get('layoutViewport', {}) - content_size = result.get('cssContentSize') or result.get('contentSize', {}) - visual_viewport = result.get('cssVisualViewport') or result.get('visualViewport') - # viewport size (visualViewport is more accurate, considering zoom) - if visual_viewport: - viewport_width = int(visual_viewport.get('clientWidth') or 0) - viewport_height = int(visual_viewport.get('clientHeight') or 0) - else: - viewport_width = int(layout_viewport.get('clientWidth') or 0) - viewport_height = int(layout_viewport.get('clientHeight') or 0) - - # scroll position (get pageX/pageY from layoutViewport) - scroll_x = int(layout_viewport.get('pageX') or 0) - scroll_y = int(layout_viewport.get('pageY') or 0) - - # page total size (contentSize contains all scrollable content) - page_width = int(content_size.get('width') or viewport_width) - page_height = int(content_size.get('height') or viewport_height) - - # calculate scrollable distance - pixels_above = scroll_y - pixels_below = max(0, page_height - viewport_height - scroll_y) - pixels_left = scroll_x - pixels_right = max(0, page_width - viewport_width - scroll_x) - - return PageSizeInfo( - viewport_width=viewport_width, - viewport_height=viewport_height, - page_width=page_width, - page_height=page_height, - scroll_x=scroll_x, - scroll_y=scroll_y, - pixels_above=pixels_above, - pixels_below=pixels_below, - pixels_left=pixels_left, - pixels_right=pixels_right, + session = await self._context.new_cdp_session(self._page) + metrics = await asyncio.wait_for( + session.send("Page.getLayoutMetrics"), + timeout=5.0, ) - except Exception as e: - logger.debug(f"Failed to get page size info: {e}") finally: - # Always detach CDP session to prevent resource leak - if cdp_session: + if session: try: - await cdp_session.detach() + await session.detach() except Exception: pass - # fallback to js to get page size info - try: - page_size_info = await self._page.evaluate("""() => { - // 1. viewport size (without scrollbar, aligned with cssLayoutViewport in CDP) - const viewportWidth = document.documentElement.clientWidth || window.innerWidth; - const viewportHeight = document.documentElement.clientHeight || window.innerHeight; - - // 2. page total size (most reliable in standard mode) - const pageWidth = document.documentElement.scrollWidth; - const pageHeight = document.documentElement.scrollHeight; - - // 3. scroll position (modern browser universal API) - const scrollX = window.scrollX || window.pageXOffset; - const scrollY = window.scrollY || window.pageYOffset; - - return { - viewport_width: viewportWidth, - viewport_height: viewportHeight, - page_width: pageWidth, - page_height: pageHeight, - scroll_x: scrollX, - scroll_y: scrollY - }; - }""") - logger.debug(f"Page size info: {page_size_info}") - - viewport_width = page_size_info.get('viewport_width', 0) - viewport_height = page_size_info.get('viewport_height', 0) - page_width = page_size_info.get('page_width', 0) - page_height = page_size_info.get('page_height', 0) - scroll_x = page_size_info.get('scroll_x', 0) - scroll_y = page_size_info.get('scroll_y', 0) + layout = metrics.get("cssLayoutViewport", {}) + content = metrics.get("cssContentSize", {}) + visual = metrics.get("cssVisualViewport", {}) + + viewport_width = layout.get("clientWidth", 0) + viewport_height = layout.get("clientHeight", 0) + page_width = content.get("width", 0) + page_height = content.get("height", 0) + scroll_x = layout.get("pageX", 0) + scroll_y = layout.get("pageY", 0) + logger.debug("Page size info via CDP: vp=%dx%d page=%dx%d scroll=(%d,%d)", + viewport_width, viewport_height, page_width, page_height, scroll_x, scroll_y) pixels_above = scroll_y pixels_below = max(0, page_height - viewport_height - scroll_y) @@ -2321,7 +2520,9 @@ async def get_current_page_title(self) -> Optional[str]: Optional[str] Page title, or None if no page is open. """ - return await self._page.title() if self._page else None + if not self._page: + return None + return await self._get_page_title(self._page) async def _get_page_info(self) -> Optional[PageInfo]: if not self._page: @@ -2348,23 +2549,18 @@ async def get_full_page_info(self, logger.warning("No page is open, can't get full page info") return None try: - snapshot = await self.get_snapshot( - interactive=interactive, - full_page=full_page, + snapshot, page_info = await asyncio.gather( + self.get_snapshot(interactive=interactive, full_page=full_page), + self._get_page_info(), + return_exceptions=True, ) - if snapshot is None: + if isinstance(snapshot, BaseException) or snapshot is None: logger.warning("Failed to get snapshot") return None - page_info = await self._get_page_info() - if page_info is None: + if isinstance(page_info, BaseException) or page_info is None: logger.warning("Failed to get page info") return None - full_page_info = FullPageInfo( - url=page_info.url, - title=page_info.title, - **page_info.model_dump(), - tree=snapshot.tree, - ) + full_page_info = FullPageInfo(**page_info.model_dump(), tree=snapshot.tree) return full_page_info except Exception as e: logger.debug(f"Failed to get full page info: {e}") @@ -2926,13 +3122,20 @@ async def get_snapshot_text( details={"file": file}, ) - snapshot = await self.get_snapshot( - interactive=interactive, - full_page=full_page, - ) _page = getattr(self, "_page", None) + + async def _get_title() -> str: + if not _page: + return "" + return await self._get_page_title(_page) + + snapshot, page_title = await asyncio.gather( + self.get_snapshot(interactive=interactive, full_page=full_page), + _get_title(), + ) + if snapshot is None: + _raise_operation_error("Failed to get snapshot") page_url = _page.url if _page else "" - page_title = await _page.title() if _page else "" header = f"[Page: {page_url} | {page_title}]\n" full_text = snapshot.tree @@ -3089,7 +3292,16 @@ async def go_back(self) -> str: if page is None: _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") - await page.go_back() + if self._cdp_url and not self._cdp_context_owned and self._context: + # CDP borrowed mode: page.go_back() hangs because Playwright's + # navigation tracking relies on _mainContext() which is broken for + # pre-existing tabs. Use CDPSession to navigate directly. + await self._cdp_navigate_history(page, delta=-1) + else: + await asyncio.wait_for( + page.go_back(wait_until="domcontentloaded"), + timeout=20.0, + ) result = f"Navigated back to: {page.url}" logger.info(f"[go_back] done {result}") return result @@ -3125,7 +3337,13 @@ async def go_forward(self) -> str: page = await self.get_current_page() if page is None: _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") - await page.go_forward() + if self._cdp_url and not self._cdp_context_owned and self._context: + await self._cdp_navigate_history(page, delta=+1) + else: + await asyncio.wait_for( + page.go_forward(wait_until="domcontentloaded"), + timeout=20.0, + ) result = f"Navigated forward to: {page.url}" logger.info(f"[go_forward] done {result}") return result @@ -3171,7 +3389,7 @@ async def reload_page( if timeout is not None: kwargs["timeout"] = timeout * 1000.0 await page.reload(**kwargs) - title = await page.title() + title = await self._get_page_title(page) result = f"Page reloaded: {page.url} (title: {title})" logger.info(f"[reload_page] done {result}") return result @@ -3349,7 +3567,29 @@ async def evaluate_javascript(self, code: str) -> str: if page is None: _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") - result = await page.evaluate(code) + if self._cdp_url and not self._cdp_context_owned and self._context: + # CDP borrowed mode: page.evaluate() hangs on pre-existing tabs + # because _mainContext() never resolves. Use a raw CDPSession + # Runtime.evaluate call — Chrome responds immediately. + session = None + try: + session = await self._context.new_cdp_session(page) + raw = await asyncio.wait_for( + session.send( + "Runtime.evaluate", + {"expression": code, "returnByValue": True}, + ), + timeout=30.0, + ) + result = raw.get("result", {}).get("value") + finally: + if session: + try: + await session.detach() + except Exception: + pass + else: + result = await page.evaluate(code) if isinstance(result, bool): result_str = "True" if result else "False" @@ -3619,14 +3859,99 @@ async def browser_resize(self, width: int, height: int) -> str: async def _is_text_visible_in_any_frame( self, page: "Page", text: str, exact: bool = False, ) -> bool: - """Check whether *text* is visible in any frame (main + all iframes).""" + """Check whether *text* is visible in any frame (main + all iframes). + + In CDP borrowed mode, ``locator.count()`` and ``locator.is_visible()`` + call into Playwright's ``_mainContext()`` which never resolves for + pre-existing tabs (see :meth:`_get_page_title` for the full explanation). + We bypass this by using a raw CDPSession ``Runtime.evaluate`` call that + queries ``document.body.innerText`` directly from Chrome — no Playwright + context tracking needed. + """ + if self._cdp_url and not self._cdp_context_owned and self._context: + # Iterate every frame (main + all iframes) to match the non-CDP path. + # + # ``new_cdp_session(child_frame)`` silently fails for same-process iframes + # (same-origin / file://) because they share the page's CDP target and have + # no separate Target to attach to. Instead we use two CDP page-level calls: + # + # 1. ``Page.getFrameTree()`` — enumerate all frame IDs recursively + # 2. ``Page.createIsolatedWorld()`` — create a JS world IN that specific + # frame (independent of Playwright's + # _mainContext() tracking) + # 3. ``Runtime.evaluate()`` with ``contextId`` — run in the frame's world + # + # This avoids the ``_mainContext()`` hang because Page/Runtime CDP commands + # do not go through Playwright's context-tracking machinery. + session = None + try: + session = await self._context.new_cdp_session(page) + # Step 1: collect all frame IDs in document order. + frame_tree_result = await asyncio.wait_for( + session.send("Page.getFrameTree"), + timeout=5.0, + ) + frame_ids: list[str] = [] + + def _collect_frame_ids(node: dict) -> None: + fid = node.get("frame", {}).get("id") + if fid: + frame_ids.append(fid) + for child in node.get("childFrames", []): + _collect_frame_ids(child) + + _collect_frame_ids(frame_tree_result.get("frameTree", {})) + + needle = json.dumps(text if exact else text.lower()) + expr = ( + "(function(){" + " var t = document.body ? document.body.innerText : '';" + + (" return t.includes(" + needle + ");}" if exact + else " return t.toLowerCase().includes(" + needle + ");}") + + ")()" + ) + # Step 2+3: for each frame, create an isolated world and evaluate. + for frame_id in frame_ids: + try: + world_result = await asyncio.wait_for( + session.send("Page.createIsolatedWorld", { + "frameId": frame_id, + "worldName": "bridgic-text-search", + "grantUniversalAccess": False, + }), + timeout=5.0, + ) + ctx_id = world_result.get("executionContextId") + if ctx_id is None: + continue + result = await asyncio.wait_for( + session.send("Runtime.evaluate", { + "expression": expr, + "contextId": ctx_id, + "returnByValue": True, + }), + timeout=5.0, + ) + if bool(result.get("result", {}).get("value", False)): + return True + except Exception: + continue + except Exception: + return False + finally: + if session: + try: + await session.detach() + except Exception: + pass + return False + for frame in page.frames: try: locator = frame.get_by_text(text, exact=exact) if await locator.count() > 0 and await locator.first.is_visible(): return True except Exception: - # Frame may have been detached or navigated away. continue return False @@ -3840,35 +4165,54 @@ async def input_text_by_ref( "}" ) + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + if clear: if is_vis: await locator.clear() + elif _cdp_ctx is not None: + # CDP borrowed mode: locator.evaluate() (main world) hangs. + # locator.fill("") clears via the utility world and also + # dispatches input/change events — equivalent behaviour. + logger.debug("[input_text_by_ref] CDP mode + is_visible()=False; clearing via locator.fill('')") + await locator.fill("") else: logger.debug("[input_text_by_ref] is_visible()=False; clearing via JS") - await locator.evaluate( - "(el) => { if ('value' in el) el.value = ''; " - "else if (el.isContentEditable) el.textContent = ''; }" + await asyncio.wait_for( + locator.evaluate( + "(el) => { if ('value' in el) el.value = ''; " + "else if (el.isContentEditable) el.textContent = ''; }" + ), + timeout=10.0, ) if slowly: if is_vis: await locator.focus() await locator.type(text, delay=100) + elif _cdp_ctx is not None: + logger.debug("[input_text_by_ref] CDP mode + is_visible()=False; using locator.fill() (slowly unavailable)") + await locator.fill(text) else: logger.debug("[input_text_by_ref] is_visible()=False; setting value via JS (slowly mode unavailable)") - await locator.evaluate("el => el.focus()") - await locator.evaluate(_js_set_value, text) + await locator.focus() + await asyncio.wait_for(locator.evaluate(_js_set_value, text), timeout=10.0) else: if is_vis and clear: await locator.fill(text) + elif _cdp_ctx is not None: + # CDP borrowed mode: use fill() (utility world) for hidden elements too. + if not is_vis: + logger.debug("[input_text_by_ref] CDP mode + is_visible()=False; using locator.fill()") + await locator.fill(text) else: if not is_vis: logger.debug("[input_text_by_ref] is_visible()=False; setting value via JS") - await locator.evaluate(_js_set_value, text) + await asyncio.wait_for(locator.evaluate(_js_set_value, text), timeout=10.0) if submit: if not is_vis: - await locator.evaluate("el => el.focus()") + await locator.focus() page = await self.get_current_page() if page: await page.keyboard.press("Enter") @@ -3930,34 +4274,34 @@ async def click_element_by_ref(self, ref: str) -> str: logger.warning(f'[click_element_by_ref] {msg}') _raise_state_error(msg, code="REF_NOT_AVAILABLE", details={"ref": ref}) - bbox = await locator.bounding_box() + bbox, is_vis = await asyncio.gather( + locator.bounding_box(), + locator.is_visible(), + ) if bbox is not None: cx = bbox["x"] + bbox["width"] / 2 cy = bbox["y"] + bbox["height"] / 2 - if not await locator.is_visible(): + if not is_vis: logger.debug( "[click_element_by_ref] element has bbox but is_visible()=False " "(likely shadow-DOM slot); using dispatch_event click" ) await locator.dispatch_event("click") else: - covered = await locator.evaluate( - f"(el) => {{ if (window.parent !== window) return false; " - f"const t = document.elementFromPoint({cx}, {cy}); " - f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" - ) + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + covered = await _check_element_covered(locator, cx, cy, cdp_context=_cdp_ctx) if covered: logger.debug("[click_element_by_ref] covered at (%.1f, %.1f), clicking intercepting element", cx, cy) page = await self.get_current_page() if page: - await page.evaluate(f"document.elementFromPoint({cx}, {cy})?.click()") + await _click_covering_element(page, locator, cx, cy, cdp_context=_cdp_ctx) else: - await locator.evaluate("el => el.click()") + await locator.dispatch_event("click") else: await locator.click() else: - if not await locator.is_visible(): + if not is_vis: logger.debug("[click_element_by_ref] bbox=None and is_visible()=False; using dispatch_event click") await locator.dispatch_event("click") else: @@ -4000,18 +4344,31 @@ async def get_dropdown_options_by_ref(self, ref: str) -> str: _raise_state_error('This dropdown has no options', code='ELEMENT_STATE_ERROR') # Detect currently selected option(s) - selected_values = set() - try: - selected_values = set(await locator.evaluate( - "el => el.tagName === 'SELECT' ? Array.from(el.selectedOptions).map(o => o.value) : []" - )) - except Exception: + selected_values: set = set() + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + if _cdp_ctx is not None: + # CDP borrowed mode: locator.evaluate() hangs. Skip — callers + # get no [selected] markers, which is a minor cosmetic loss. pass + else: + try: + selected_values = set(await asyncio.wait_for( + locator.evaluate( + "el => el.tagName === 'SELECT' ? Array.from(el.selectedOptions).map(o => o.value) : []" + ), + timeout=10.0, + )) + except Exception: + pass option_texts = [] - for i, option in enumerate(options): - text = await option.text_content() - value = await option.get_attribute("value") + # Fetch text and value for all options in parallel (two awaits per + # option reduced to one asyncio.gather per option). + _text_value_pairs = await asyncio.gather( + *(asyncio.gather(option.text_content(), option.get_attribute("value")) + for option in options) + ) + for i, (text, value) in enumerate(_text_value_pairs): if text: line = f"{i + 1}. {text.strip()}" + (f" (value: {value})" if value else "") if value in selected_values: @@ -4075,7 +4432,30 @@ async def select_dropdown_option_by_ref(self, ref: str, text: str) -> str: logger.warning(f'[select_dropdown_option_by_ref] {msg}') _raise_state_error(msg, code="REF_NOT_AVAILABLE", details={"ref": ref}) - tag_name = await locator.evaluate("el => el.tagName.toLowerCase()") + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + if _cdp_ctx is not None: + # CDP borrowed mode: locator.evaluate() (main world) hangs. + # locator.select_option() uses the utility world and works correctly. + # Try it first; if the element is not a native ; fall through to custom path + else: + try: + tag_name = await asyncio.wait_for( + locator.evaluate("el => el.tagName.toLowerCase()"), + timeout=10.0, + ) + except Exception: + tag_name = "" if tag_name == "select": try: @@ -4157,12 +4537,15 @@ async def hover_element_by_ref(self, ref: str) -> str: logger.warning(f'[hover_element_by_ref] {msg}') _raise_state_error(msg, code="REF_NOT_AVAILABLE", details={"ref": ref}) - bbox = await locator.bounding_box() + bbox, is_vis = await asyncio.gather( + locator.bounding_box(), + locator.is_visible(), + ) if bbox is not None: cx = bbox["x"] + bbox["width"] / 2 cy = bbox["y"] + bbox["height"] / 2 - if not await locator.is_visible(): + if not is_vis: logger.debug( "[hover_element_by_ref] element has bbox but is_visible()=False " "(likely shadow-DOM slot); moving mouse to coordinates directly" @@ -4173,11 +4556,8 @@ async def hover_element_by_ref(self, ref: str) -> str: else: await locator.hover(force=True) else: - covered = await locator.evaluate( - f"(el) => {{ if (window.parent !== window) return false; " - f"const t = document.elementFromPoint({cx}, {cy}); " - f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" - ) + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + covered = await _check_element_covered(locator, cx, cy, cdp_context=_cdp_ctx) if covered: logger.debug("[hover_element_by_ref] covered at (%.1f, %.1f), moving mouse to coordinates", cx, cy) page = await self.get_current_page() @@ -4188,7 +4568,7 @@ async def hover_element_by_ref(self, ref: str) -> str: else: await locator.hover() else: - if not await locator.is_visible(): + if not is_vis: msg = ( f'Could not hover element {ref}: element is not visible and has ' 'no screen coordinates' @@ -4238,9 +4618,10 @@ async def focus_element_by_ref(self, ref: str) -> str: else: logger.debug( "[focus_element_by_ref] is_visible()=False (likely shadow-DOM slot); " - "using el.focus() via evaluate to properly update document.activeElement" + "using el.focus() via focus() to properly update document.activeElement" ) - await locator.evaluate("el => el.focus()") + # locator.focus() has a built-in timeout (unlike evaluate which has none). + await locator.focus() msg = f'Focused element ref {ref}' logger.info(f'[focus_element_by_ref] {msg}') @@ -4277,7 +4658,38 @@ async def evaluate_javascript_on_ref(self, ref: str, code: str) -> str: logger.warning(f'[evaluate_javascript_on_ref] {msg}') _raise_state_error(msg, code="REF_NOT_AVAILABLE", details={"ref": ref}) - result = await locator.evaluate(code) + if self._cdp_url and not self._cdp_context_owned and self._context: + # CDP borrowed mode: try native evaluate first (works on pages + # navigated via page.goto() — including iframe elements). + # Falls back to CDPSession bypass only on truly pre-existing + # tabs where _mainContext() hangs. + try: + result = await asyncio.wait_for(locator.evaluate(code), timeout=5.0) + except (asyncio.TimeoutError, Exception) as native_err: + if isinstance(native_err, asyncio.TimeoutError): + logger.debug( + f'[evaluate_javascript_on_ref] native evaluate timed out ' + f'(pre-existing tab?), falling back to CDPSession bypass' + ) + else: + logger.debug( + f'[evaluate_javascript_on_ref] native evaluate failed: ' + f'{type(native_err).__name__}: {native_err}, ' + f'falling back to CDPSession bypass' + ) + ref_data = self._last_snapshot.refs.get(ref) if self._last_snapshot else None + if ref_data is not None and ref_data.frame_path: + _raise_operation_error( + f"eval-on does not support iframe elements on pre-existing " + f"CDP tabs (ref={ref}, frame_path={ref_data.frame_path}). " + f"Navigate to the page first with 'open', or use 'eval' with " + f"contentDocument.querySelector() as a workaround.", + code="IFRAME_EVAL_NOT_SUPPORTED", + ) + page = await self.get_current_page() + result = await _cdp_evaluate_on_element(self._context, page, locator, code) + else: + result = await asyncio.wait_for(locator.evaluate(code), timeout=30.0) if result is None: result_str = "null" @@ -4323,8 +4735,29 @@ async def upload_file_by_ref(self, ref: str, file_path: str) -> str: logger.warning(f'[upload_file_by_ref] {msg}') _raise_state_error(msg, code="REF_NOT_AVAILABLE", details={"ref": ref}) - tag_name = await locator.evaluate("el => el.tagName.toLowerCase()") - input_type = await locator.get_attribute("type") if tag_name == "input" else None + # Determine tag and type to verify this is a file input. + # In CDP borrowed mode use get_attribute() (utility world) instead of + # locator.evaluate() which hangs. get_attribute("type") works reliably + # because Playwright's attribute queries use the utility world. + if self._cdp_url and not self._cdp_context_owned: + # get_attribute returns None for elements that don't have the attribute, + # and '' for elements that have it but with no value. A file input + # always has an explicit type="file" so a None/non-"file" result means + # this isn't a direct file input — fall through to nested-search path. + input_type_attr = await locator.get_attribute("type") + if input_type_attr and input_type_attr.lower() == "file": + tag_name, input_type = "input", "file" + else: + tag_name, input_type = "", None + else: + try: + tag_name = await asyncio.wait_for( + locator.evaluate("el => el.tagName.toLowerCase()"), + timeout=10.0, + ) + except Exception: + tag_name = "" + input_type = await locator.get_attribute("type") if tag_name == "input" else None if tag_name != "input" or input_type != "file": nested = locator.locator("input[type='file']") if await nested.count() > 0: @@ -4450,42 +4883,43 @@ async def check_checkbox_or_radio_by_ref(self, ref: str) -> str: logger.info(f'[check_checkbox_or_radio_by_ref] {msg}') return msg - bbox = await locator.bounding_box() + bbox, is_vis = await asyncio.gather( + locator.bounding_box(), + locator.is_visible(), + ) if is_native: if bbox is not None: cx = bbox["x"] + bbox["width"] / 2 cy = bbox["y"] + bbox["height"] / 2 - if not await locator.is_visible(): + if not is_vis: logger.debug( "[check_checkbox_or_radio_by_ref] native input has bbox but is_visible()=False; " "using dispatch_event click" ) await locator.dispatch_event("click") else: - covered = await locator.evaluate( - f"(el) => {{ if (window.parent !== window) return false; " - f"const t = document.elementFromPoint({cx}, {cy}); " - f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" - ) + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + covered = await _check_element_covered(locator, cx, cy, cdp_context=_cdp_ctx) if covered: logger.debug("[check_checkbox_or_radio_by_ref] covered at (%.1f, %.1f), clicking intercepting element", cx, cy) page = await self.get_current_page() if page: - await page.evaluate(f"document.elementFromPoint({cx}, {cy})?.click()") + await _click_covering_element(page, locator, cx, cy, cdp_context=_cdp_ctx) else: await locator.check(force=True) else: await locator.check() else: - if not await locator.is_visible(): + if not is_vis: logger.debug("[check_checkbox_or_radio_by_ref] native input bbox=None and is_visible()=False; using dispatch_event click") await locator.dispatch_event("click") else: await locator.check() else: + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None page = await self.get_current_page() - await _click_checkable_target(page, locator, bbox) + await _click_checkable_target(page, locator, bbox, cdp_context=_cdp_ctx) if not await _is_checked(locator): msg = f'Failed to check element {ref}: state is still unchecked' @@ -4547,42 +4981,43 @@ async def uncheck_checkbox_by_ref(self, ref: str) -> str: logger.info(f'[uncheck_checkbox_by_ref] {msg}') return msg - bbox = await locator.bounding_box() + bbox, is_vis = await asyncio.gather( + locator.bounding_box(), + locator.is_visible(), + ) if is_native: if bbox is not None: cx = bbox["x"] + bbox["width"] / 2 cy = bbox["y"] + bbox["height"] / 2 - if not await locator.is_visible(): + if not is_vis: logger.debug( "[uncheck_checkbox_by_ref] native input has bbox but is_visible()=False; " "using dispatch_event click" ) await locator.dispatch_event("click") else: - covered = await locator.evaluate( - f"(el) => {{ if (window.parent !== window) return false; " - f"const t = document.elementFromPoint({cx}, {cy}); " - f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" - ) + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + covered = await _check_element_covered(locator, cx, cy, cdp_context=_cdp_ctx) if covered: logger.debug("[uncheck_checkbox_by_ref] covered at (%.1f, %.1f), clicking intercepting element", cx, cy) page = await self.get_current_page() if page: - await page.evaluate(f"document.elementFromPoint({cx}, {cy})?.click()") + await _click_covering_element(page, locator, cx, cy, cdp_context=_cdp_ctx) else: await locator.uncheck(force=True) else: await locator.uncheck() else: - if not await locator.is_visible(): + if not is_vis: logger.debug("[uncheck_checkbox_by_ref] native input bbox=None and is_visible()=False; using dispatch_event click") await locator.dispatch_event("click") else: await locator.uncheck() else: + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None page = await self.get_current_page() - await _click_checkable_target(page, locator, bbox) + await _click_checkable_target(page, locator, bbox, cdp_context=_cdp_ctx) is_native_radio = is_native and (await locator.get_attribute("type") or "").strip().lower() == "radio" if not is_native_radio and await _is_checked(locator): @@ -4638,39 +5073,63 @@ async def double_click_element_by_ref(self, ref: str) -> str: logger.warning(f'[double_click_element_by_ref] {msg}') _raise_state_error(msg, code="REF_NOT_AVAILABLE", details={"ref": ref}) - bbox = await locator.bounding_box() + bbox, is_vis = await asyncio.gather( + locator.bounding_box(), + locator.is_visible(), + ) if bbox is not None: cx = bbox["x"] + bbox["width"] / 2 cy = bbox["y"] + bbox["height"] / 2 - if not await locator.is_visible(): + if not is_vis: logger.debug( "[double_click_element_by_ref] element has bbox but is_visible()=False " "(likely shadow-DOM slot); using dispatch_event dblclick" ) await locator.dispatch_event("dblclick") else: - covered = await locator.evaluate( - f"(el) => {{ if (window.parent !== window) return false; " - f"const t = document.elementFromPoint({cx}, {cy}); " - f"return !!t && t !== el && !el.contains(t) && !t.contains(el); }}" - ) + _cdp_ctx = self._context if (self._cdp_url and not self._cdp_context_owned) else None + covered = await _check_element_covered(locator, cx, cy, cdp_context=_cdp_ctx) if covered: logger.debug("[double_click_element_by_ref] covered at (%.1f, %.1f), dispatching dblclick on intercepting element", cx, cy) page = await self.get_current_page() if page: - await page.evaluate( + dblclick_expr = ( f"(function(){{" f"const el=document.elementFromPoint({cx},{cy});" f"if(el)el.dispatchEvent(new MouseEvent('dblclick',{{bubbles:true,cancelable:true,view:window}}));" f"}})()" ) + if _cdp_ctx is not None: + session = None + try: + session = await _cdp_ctx.new_cdp_session(page) + await asyncio.wait_for( + session.send("Runtime.evaluate", {"expression": dblclick_expr}), + timeout=5.0, + ) + except Exception: + await locator.dispatch_event("dblclick") + finally: + if session: + try: + await session.detach() + except Exception: + pass + else: + try: + await asyncio.wait_for( + page.evaluate(dblclick_expr), + timeout=10.0, + ) + except Exception: + await locator.dispatch_event("dblclick") else: await locator.dblclick(force=True) else: await locator.dblclick() else: - if not await locator.is_visible(): + if not is_vis: logger.debug("[double_click_element_by_ref] bbox=None and is_visible()=False; using dispatch_event dblclick") await locator.dispatch_event("dblclick") else: @@ -6097,21 +6556,66 @@ async def restore_storage_state(self, filename: str) -> str: if cookies: await context.add_cookies(cookies) + _skipped_ls_items: list[str] = [] origins = state.get("origins", []) for origin_data in origins: origin = origin_data.get("origin", "") local_storage = origin_data.get("localStorage", []) if local_storage and origin: - for item in local_storage: - name = item.get("name", "") - value = item.get("value", "") - if name: - await page.evaluate( - f"localStorage.setItem({json.dumps(name)}, {json.dumps(value)})" - ) + if self._cdp_url and not self._cdp_context_owned and self._context: + # CDP borrowed mode: page.evaluate() hangs. Use DOMStorage CDP protocol. + # DOMStorage.setDOMStorageItem may fail with "Frame not found" when + # the target origin has no active frame — this is expected in CDP + # borrowed mode. Collect failures and warn rather than hard-fail, + # because cookies have already been restored successfully. + session = await self._context.new_cdp_session(page) + try: + storage_id = {"storageId": {"securityOrigin": origin, "isLocalStorage": True}} + for item in local_storage: + name = item.get("name", "") + value = item.get("value", "") + if name: + try: + await asyncio.wait_for( + session.send("DOMStorage.setDOMStorageItem", { + **storage_id, + "key": name, + "value": value, + }), + timeout=5.0, + ) + except Exception as _ls_err: + logger.debug( + "[restore_storage_state] localStorage item skipped " + "(origin=%s key=%s): %s", + origin, name, _ls_err, + ) + _skipped_ls_items.append(f"{origin}/{name}") + finally: + try: + await session.detach() + except Exception: + pass + else: + for item in local_storage: + name = item.get("name", "") + value = item.get("value", "") + if name: + await asyncio.wait_for( + page.evaluate( + f"localStorage.setItem({json.dumps(name)}, {json.dumps(value)})" + ), + timeout=10.0, + ) result = f"Storage state restored from: {filename} ({len(cookies)} cookies)" + if _skipped_ls_items: + result += ( + f". Warning: {len(_skipped_ls_items)} localStorage item(s) could not be" + " restored in CDP borrowed mode (navigate to the target origin first," + " then call storage-load again to apply localStorage)" + ) logger.info(f"[restore_storage_state] done {result}") return result except BridgicBrowserError: @@ -6711,7 +7215,7 @@ async def verify_title(self, expected_title: str, exact: bool = False) -> str: if page is None: _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") - actual_title = await page.title() + actual_title = await self._get_page_title(page) if exact: matches = actual_title == expected_title @@ -6871,52 +7375,44 @@ def _allocate_video_temp_path() -> str: pass return path - async def _start_page_video_recorder(self, page: Page) -> None: - """Start a VideoRecorder for one page within the active session. - - Idempotent — a no-op if the session is inactive or the page is - already being recorded. Mirrors Playwright CLI's - ``Context._startPageVideo`` (``tools/backend/context.ts``). - - In CDP mode all pages in the context are recorded. - """ - if self._video_session is None: + async def _switch_video_to_page(self, new_page: "Page") -> None: + """If recording active, switch screencast to *new_page*. No-op otherwise.""" + if self._video_recorder is None or self._video_session is None: return - if page in self._video_recorders: + if self._video_recorder.current_page == new_page: return - if page.is_closed(): + if new_page.is_closed(): return + try: + await self._video_recorder.switch_page(new_page) + except Exception as e: + logger.warning("[video] switch_page failed: %s", e) + async def _start_single_video_recorder(self, page: "Page") -> None: + """Start the single-stream recorder targeting *page*.""" + if self._video_session is None or page.is_closed(): + return output_path = self._allocate_video_temp_path() w = int(self._video_session["width"]) h = int(self._video_session["height"]) recorder = _video_recorder_mod.VideoRecorder( page.context, page, output_path, (w, h), ) - try: - await recorder.start() - except Exception as e: - logger.warning( - "[start_video] failed to start recorder on page %s: %s", page, e, - ) - return - self._video_recorders[page] = recorder - logger.info("[start_video] recording page → %s", output_path) + await recorder.start() + self._video_recorder = recorder + logger.info("[start_video] recording active tab → %s", output_path) async def start_video( self, width: Optional[int] = None, height: Optional[int] = None, ) -> str: - """Start video recording on ALL pages in the context. + """Start single-stream video recording on the active tab. - Mirrors the Playwright CLI behaviour - (``packages/playwright-core/src/tools/backend/context.ts`` — - ``startVideoRecording``): a single call records every page in the - browser context, and any page opened afterwards is auto-recorded - to its own .webm file. Uses CDP ``Page.startScreencast`` to - capture frames and pipes them to ffmpeg for VP8/WebM encoding — - no Playwright RPC streaming needed. + One ffmpeg process records the currently active page. When the + user switches tabs (via ``switch_tab``, ``new_tab``, etc.) the + CDP screencast source is hot-swapped to the new page — ffmpeg + stays alive and the output is a single continuous .webm file. Parameters ---------- @@ -6931,8 +7427,7 @@ async def start_video( Returns ------- str - "Video recording started" (plus the number of pages being - recorded). + "Video recording started (recording active tab)". """ logger.info(f"[start_video] start width={width} height={height}") @@ -6979,16 +7474,27 @@ async def start_video( viewport_width = 1280 viewport_height = 720 try: - dims = await page.evaluate( - "() => ({w: window.innerWidth, h: window.innerHeight})" - ) - qw = int(dims.get("w") or 0) - qh = int(dims.get("h") or 0) + # Use CDP Page.getLayoutMetrics instead of page.evaluate() — avoids the + # Playwright _mainContext() hang on pre-existing tabs in CDP borrowed mode. + _session = await self._context.new_cdp_session(page) + try: + _metrics = await asyncio.wait_for( + _session.send("Page.getLayoutMetrics"), + timeout=5.0, + ) + finally: + try: + await _session.detach() + except Exception: + pass + _vp = _metrics.get("cssVisualViewport", {}) + qw = int(_vp.get("clientWidth") or 0) + qh = int(_vp.get("clientHeight") or 0) if qw > 0 and qh > 0: viewport_width = qw viewport_height = qh else: - raise ValueError(f"non-positive dimensions: {dims}") + raise ValueError(f"non-positive dimensions from CDP: {_vp}") except Exception as exc: # Fall back to viewport_size, then the hard default above. Logged # but non-fatal so a hardened CSP page can still record. @@ -7004,7 +7510,7 @@ async def start_video( w = (width or viewport_width) & ~1 h = (height or viewport_height) & ~1 - # Build the session record up front so _start_page_video_recorder + # Build the session record up front so _start_single_video_recorder # picks up the parameters. From this point on, any failure must # roll back the partially-set-up session state. self._video_session = { @@ -7013,57 +7519,43 @@ async def start_video( "context": context, "page_listener": None, } - self._video_recorders = {} + self._video_recorder = None self._video_state[context_key] = True try: - # Start a recorder on every existing page. - # Mirrors Playwright CLI's context.ts ``startVideoRecording``: - # for (const page of browserContext.pages()) - # await this._startPageVideo(page); - # - # CDP borrowed context: all pages in the context are recorded, - # including the user's pre-existing tabs. - existing_pages = [p for p in context.pages if not p.is_closed()] - for p in existing_pages: - try: - await self._start_page_video_recorder(p) - except Exception as e: - logger.warning("[start_video] page start failed: %s", e) - - # Subscribe to future pages so newly opened tabs are also - # recorded. Mirrors Playwright CLI's context.ts - # ``_onPageCreated`` → ``_startPageVideo``. Playwright - # Python's context.on("page") calls the handler synchronously - # with the new Page, so async work has to be scheduled as a - # task. + # Single-stream: start one recorder on the active page. + await self._start_single_video_recorder(page) + if self._video_recorder is None: + raise RuntimeError("Failed to start video recorder on active page") + + # Subscribe to future pages so newly opened tabs auto-switch + # the screencast source to the new page. def _on_page_created(new_page: Page) -> None: try: asyncio.get_running_loop().create_task( - self._start_page_video_recorder(new_page), + self._switch_video_to_page(new_page), ) except RuntimeError: logger.warning( - "[start_video] no running loop to record new page", + "[start_video] no running loop to switch video to new page", ) context.on("page", _on_page_created) self._video_session["page_listener"] = _on_page_created - count = len(self._video_recorders) - result = f"Video recording started ({count} page{'s' if count != 1 else ''})" + result = "Video recording started (recording active tab)" logger.info("[start_video] %s", result) return result except Exception as e: # Rollback the session state we set up above so future # start_video() calls are not blocked by a phantom session. self._video_session = None - for _rec in list(self._video_recorders.values()): + if self._video_recorder is not None: try: - await _rec.stop() + await self._video_recorder.stop() except Exception: pass - self._video_recorders.clear() + self._video_recorder = None self._video_state.pop(context_key, None) if isinstance(e, BridgicBrowserError): raise @@ -7169,28 +7661,22 @@ def _resolve_multi_video_dests( return [base_abs if i == 0 else f"{stem}-{i}{ext}" for i in range(count)] async def stop_video(self, filename: Optional[str] = None) -> str: - """Stop video recording on all pages and save the files. + """Stop video recording and save the file. Files are saved immediately — no need to wait for browser close. - Mirrors Playwright CLI context.ts ``stopVideoRecording``: returns - one .webm file per page that was being recorded. Parameters ---------- filename : Optional[str], optional - Destination for the video files. Accepts a file path + Destination for the video file. Accepts a file path (``./videos/demo.webm``) or a directory (``./videos/``). The ``.webm`` extension is added automatically when missing. - When multiple pages are recorded and ``filename`` is a single - file path, the first file uses the given name and subsequent - files get a ``-1``, ``-2``, … suffix inserted before the - extension. If not provided, the files stay in the temporary - directory. + If not provided, the file stays in the temporary directory. Returns ------- str - Confirmation with the saved file path(s). + Confirmation with the saved file path. """ try: logger.info(f"[stop_video] start filename={filename}") @@ -7199,14 +7685,14 @@ async def stop_video(self, filename: Optional[str] = None) -> str: _raise_state_error("No context is open", code="NO_CONTEXT") context_key = _get_context_key(self._context) - if self._video_session is None and not self._video_recorders: + if self._video_session is None and self._video_recorder is None: _raise_state_error( "No active video recording. Use video-start first.", code="NO_ACTIVE_RECORDING", ) # Detach page-creation listener so stopping recording in - # parallel with a tab open doesn't race into a new recorder. + # parallel with a tab open doesn't race into a switch. if self._video_session is not None: listener = self._video_session.get("page_listener") if listener is not None: @@ -7215,54 +7701,41 @@ async def stop_video(self, filename: Optional[str] = None) -> str: except Exception: pass - # Snap the recorder dict to a local list first so a concurrent - # close() won't also try to stop them. - recorders = list(self._video_recorders.items()) - self._video_recorders = {} + # Snap the recorder to a local var so a concurrent close() + # won't also try to stop it. + recorder = self._video_recorder + self._video_recorder = None self._video_session = None self._video_state[context_key] = False - if not recorders: - return "Video recording stopped (no pages were recorded)" + if recorder is None: + return "Video recording stopped (no recorder was active)" - # Stop every recorder; preserve order of pages. - async def _stop_one( - rec: "_video_recorder_mod.VideoRecorder", - ) -> Optional[str]: - try: - return await rec.stop() - except Exception as exc: - logger.warning("[stop_video] recorder stop failed: %s", exc) - return None - - temp_paths: List[Optional[str]] = [] - for _page_ref, _rec in recorders: - temp_paths.append(await _stop_one(_rec)) - good_paths = [p for p in temp_paths if p] + # Stop the single recorder. + try: + temp_path: str = await asyncio.wait_for( + recorder.stop(), timeout=30.0, + ) + except Exception as exc: + logger.warning("[stop_video] recorder stop failed: %s", exc) + return "Video recording stopped (file may be incomplete)" - if not good_paths: - return "Video recording stopped (no files were produced)" + if not temp_path or not os.path.isfile(temp_path): + return "Video recording stopped (no file was produced)" - dests = self._resolve_multi_video_dests(filename, len(good_paths)) - if dests is None: - saved = list(good_paths) - else: - saved = [] - for src, dest in zip(good_paths, dests): - try: - self._move_video_local(Path(src), dest) - saved.append(dest) - except Exception as move_err: - logger.error( - "[stop_video] move failed, file stays at: %s (%s)", - src, move_err, - ) - saved.append(src) + # Move to user destination if requested. + if filename is not None: + dest = self._resolve_video_dest(filename) + try: + self._move_video_local(Path(temp_path), dest) + temp_path = dest + except Exception as move_err: + logger.error( + "[stop_video] move failed, file stays at: %s (%s)", + temp_path, move_err, + ) - if len(saved) == 1: - result = f"Video saved to: {saved[0]}" - else: - result = "Video files saved:\n" + "\n".join(saved) + result = f"Video saved to: {temp_path}" logger.info(f"[stop_video] done: {result}") return result except BridgicBrowserError: diff --git a/bridgic/browser/session/_video_recorder.py b/bridgic/browser/session/_video_recorder.py index 6723f8b..ef95fdf 100644 --- a/bridgic/browser/session/_video_recorder.py +++ b/bridgic/browser/session/_video_recorder.py @@ -340,23 +340,29 @@ async def start(self) -> None: self._width, self._height, self._output_path, ) - async def stop(self) -> str: - """Stop recording and return the output path. + async def prepare_stop(self) -> None: + """Phase 1: stop screencast, pad frames, detach CDP session. - On return the file is fully written. The shutdown sequence mirrors - ``stop()`` in Playwright's videoRecorder.ts (lines 130-155). + Fast (~milliseconds). Must be called while Chrome is still alive + so that ``Page.stopScreencast`` can reach Chrome. After this + method returns, the CDP session is detached and Chrome resources + are released — ``finalize()`` can run even after Chrome exits. + + Idempotent: calling twice is safe (second call is a no-op). """ if self._is_stopped: - return self._output_path + return + logger.debug("[VideoRecorder] prepare_stop step1: stopScreencast %s", self._output_path) # Step 1: tell Chrome to stop pushing frames. # Reference: CDP Page.stopScreencast. if self._cdp_session: try: await self._cdp_session.send("Page.stopScreencast") - except Exception: - pass + except Exception as _e: + logger.debug("[VideoRecorder] stopScreencast err: %s(%s)", type(_e).__name__, _e) + logger.debug("[VideoRecorder] prepare_stop step2: ensure frame %s", self._output_path) # Step 2: make sure at least one frame has been written. ffmpeg # refuses to produce a valid container with an empty input stream. # Reference: videoRecorder.ts lines 136-138. @@ -364,6 +370,7 @@ async def stop(self) -> str: white = _create_white_jpeg(self._width, self._height) self._write_frame(white, time.monotonic()) + logger.debug("[VideoRecorder] prepare_stop step3: pad tail %s", self._output_path) # Step 3: pad the tail with ≥1 second of the last frame so the # output never ends abruptly. Sending an empty frame (b"") is the # sentinel that tells _write_frame to advance the frame counter @@ -374,6 +381,36 @@ async def stop(self) -> str: self._is_stopped = True + logger.debug("[VideoRecorder] prepare_stop step6: detach CDP %s", self._output_path) + # Step 6 (moved here from old stop()): detach the CDP session + # early so Chrome resources are released before finalize(). + if self._cdp_session: + try: + await self._cdp_session.detach() + except Exception: + pass + self._cdp_session = None + + logger.debug("[VideoRecorder] prepare_stop done: %s", self._output_path) + + async def finalize(self) -> str: + """Phase 2: flush frames to ffmpeg, close stdin, wait for exit. + + Returns the output file path. Chrome can be dead — this method + only needs the ffmpeg subprocess. If ``prepare_stop()`` was not + called beforehand, it is called automatically as a safety fallback. + """ + if not self._is_stopped: + try: + await asyncio.wait_for(self.prepare_stop(), timeout=10.0) + except asyncio.TimeoutError: + logger.warning( + "[VideoRecorder] finalize: prepare_stop fallback timed out, " + "force-marking stopped: %s", self._output_path, + ) + self._is_stopped = True + self._cdp_session = None + # Step 4: drain any frames still queued for ffmpeg's stdin. await self._flush_queue() @@ -385,21 +422,71 @@ async def stop(self) -> str: except Exception: pass try: - await asyncio.wait_for(self._ffmpeg.wait(), timeout=10.0) + await asyncio.wait_for(self._ffmpeg.wait(), timeout=15.0) except asyncio.TimeoutError: self._ffmpeg.kill() logger.warning("[VideoRecorder] ffmpeg killed after timeout") - # Step 6: detach the CDP session. + logger.debug("[VideoRecorder] finalize done → %s", self._output_path) + return self._output_path + + async def stop(self) -> str: + """Full stop (convenience): ``prepare_stop()`` + ``finalize()``. + + On return the file is fully written. The shutdown sequence mirrors + ``stop()`` in Playwright's videoRecorder.ts (lines 130-155). + + For the two-phase flow used by ``Browser.close()`` — where Chrome + must exit between the two phases — call ``prepare_stop()`` and + ``finalize()`` separately instead. + """ + await self.prepare_stop() + return await self.finalize() + + async def switch_page(self, new_page: Any) -> None: + """Switch screencast source to a different page. ffmpeg stays alive.""" + if self._is_stopped: + return + if new_page == self._page: + return + + # Tear down old screencast if self._cdp_session: + try: + await self._cdp_session.send("Page.stopScreencast") + except Exception: + pass + try: + self._cdp_session.remove_listener( + "Page.screencastFrame", self._on_screencast_frame, + ) + except Exception: + pass try: await self._cdp_session.detach() except Exception: pass + + # Set up new screencast on the new page + self._page = new_page + try: + self._cdp_session = await self._context.new_cdp_session(new_page) + self._cdp_session.on("Page.screencastFrame", self._on_screencast_frame) + await self._cdp_session.send("Page.startScreencast", { + "format": "jpeg", + "quality": 95, + "maxWidth": self._width, + "maxHeight": self._height, + }) + logger.debug("[VideoRecorder] switched screencast to new page") + except Exception as e: + logger.warning("[VideoRecorder] switch_page CDP setup failed: %s", e) self._cdp_session = None - logger.debug("[VideoRecorder] stopped → %s", self._output_path) - return self._output_path + @property + def current_page(self) -> Any: + """The page currently being recorded.""" + return self._page @property def output_path(self) -> str: diff --git a/tests/conftest.py b/tests/conftest.py index ef295f2..6b7115e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -83,14 +83,27 @@ def mock_page() -> MagicMock: @pytest.fixture -def mock_context(mock_page: MagicMock) -> MagicMock: +def mock_cdp_session() -> MagicMock: + """Create a mock Playwright CDPSession with a default Page.getLayoutMetrics response.""" + session = MagicMock() + session.send = AsyncMock(return_value={ + "cssLayoutViewport": {"clientWidth": 1920, "clientHeight": 1080, "pageX": 0, "pageY": 0}, + "cssContentSize": {"width": 1920, "height": 3000}, + "cssVisualViewport": {"clientWidth": 1920, "clientHeight": 1080}, + }) + session.detach = AsyncMock() + return session + + +@pytest.fixture +def mock_context(mock_page: MagicMock, mock_cdp_session: MagicMock) -> MagicMock: """Create a mock Playwright BrowserContext object.""" context = MagicMock() context.pages = [mock_page] context.new_page = AsyncMock(return_value=mock_page) context.close = AsyncMock() context.browser = None # Playwright persistent contexts return None for .browser - context.new_cdp_session = AsyncMock() + context.new_cdp_session = AsyncMock(return_value=mock_cdp_session) context.add_init_script = AsyncMock() return context diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index 585ed10..0ac484b 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -562,8 +562,10 @@ async def test_stop_auto_saves_active_trace_and_video(self, mock_playwright): _tmp_video_fd, _tmp_video_path = tempfile.mkstemp(suffix=".webm") os.close(_tmp_video_fd) mock_recorder = MagicMock() + mock_recorder.prepare_stop = AsyncMock() + mock_recorder.finalize = AsyncMock(return_value=_tmp_video_path) mock_recorder.stop = AsyncMock(return_value=_tmp_video_path) - browser._video_recorders = {page: mock_recorder} + browser._video_recorder = mock_recorder browser._video_session = { "width": 800, "height": 600, "context": context, "page_listener": lambda *_: None, @@ -581,7 +583,8 @@ async def test_stop_auto_saves_active_trace_and_video(self, mock_playwright): assert "close-" in trace_path assert trace_path.endswith("trace.zip") - mock_recorder.stop.assert_awaited_once() + mock_recorder.prepare_stop.assert_awaited_once() + mock_recorder.finalize.assert_awaited_once() assert browser._last_shutdown_artifacts["trace"] == [os.path.abspath(trace_path)] assert len(browser._last_shutdown_artifacts["video"]) == 1 video_path = browser._last_shutdown_artifacts["video"][0] @@ -615,8 +618,10 @@ async def test_stop_reports_auto_saved_paths(self, mock_playwright): _tmp_video_fd, _tmp_video_path = tempfile.mkstemp(suffix=".webm") os.close(_tmp_video_fd) mock_recorder = MagicMock() + mock_recorder.prepare_stop = AsyncMock() + mock_recorder.finalize = AsyncMock(return_value=_tmp_video_path) mock_recorder.stop = AsyncMock(return_value=_tmp_video_path) - browser._video_recorders = {page: mock_recorder} + browser._video_recorder = mock_recorder browser._video_session = { "width": 800, "height": 600, "context": context, "page_listener": lambda *_: None, @@ -655,8 +660,10 @@ async def test_close_auto_stops_cdp_recorder(self, mock_playwright): _tmp_fd, _tmp_path = tempfile.mkstemp(suffix=".webm") os.close(_tmp_fd) mock_recorder = MagicMock() + mock_recorder.prepare_stop = AsyncMock() + mock_recorder.finalize = AsyncMock(return_value=_tmp_path) mock_recorder.stop = AsyncMock(return_value=_tmp_path) - browser._video_recorders = {page: mock_recorder} + browser._video_recorder = mock_recorder browser._video_session = { "width": 800, "height": 600, "context": context, "page_listener": lambda *_: None, @@ -667,15 +674,16 @@ async def test_close_auto_stops_cdp_recorder(self, mock_playwright): await browser.close() - mock_recorder.stop.assert_awaited_once() - assert browser._video_recorders == {} + mock_recorder.prepare_stop.assert_awaited_once() + mock_recorder.finalize.assert_awaited_once() + assert browser._video_recorder is None assert browser._video_session is None assert len(browser._last_shutdown_artifacts["video"]) == 1 assert context_key not in browser._video_state @pytest.mark.asyncio - async def test_close_page_auto_stops_recorder(self, mock_playwright): - """_close_page() should auto-stop the recorder when closing the recorded page.""" + async def test_close_page_switches_recorder_to_remaining_tab(self, mock_playwright): + """_close_page() should switch the recorder to a remaining page, not stop it.""" from bridgic.browser.session import _browser as browser_module with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: @@ -694,13 +702,15 @@ async def test_close_page_auto_stops_recorder(self, mock_playwright): second_page.url = "https://example.com/2" second_page.title = AsyncMock(return_value="Page 2") second_page.close = AsyncMock() + second_page.is_closed = MagicMock(return_value=False) context.pages = [page, second_page] - # Set up mock recorder on the current page only (second page - # is not being recorded in this scenario) + # Set up mock recorder recording the current page mock_recorder = MagicMock() - mock_recorder.stop = AsyncMock(return_value="/tmp/rec.webm") - browser._video_recorders = {page: mock_recorder} + mock_recorder.is_stopped = False + mock_recorder.current_page = page + mock_recorder.switch_page = AsyncMock() + browser._video_recorder = mock_recorder browser._video_session = { "width": 800, "height": 600, "context": context, "page_listener": lambda *_: None, @@ -709,16 +719,15 @@ async def test_close_page_auto_stops_recorder(self, mock_playwright): context_key = browser_module._get_context_key(context) browser._video_state[context_key] = True - # Close the page that has the recorder + # Close the page that is being recorded success, msg = await browser._close_page(page) assert success - # Recorder for the closed page should have been stopped and removed - mock_recorder.stop.assert_awaited_once() - assert page not in browser._video_recorders - # Session stays active since other tabs may still be recording + # Recorder should have been switched to the remaining page, not stopped + mock_recorder.switch_page.assert_awaited_once_with(second_page) + # Recorder is still active + assert browser._video_recorder is mock_recorder assert browser._video_session is not None - assert browser._video_state.get(context_key) is True @pytest.mark.asyncio async def test_stop_warns_on_trace_finalize_failure(self, mock_playwright): @@ -859,8 +868,10 @@ async def test_last_close_properties_populated_after_trace_video_close(self, moc _tmp_video_fd, _tmp_video_path = tempfile.mkstemp(suffix=".webm") os.close(_tmp_video_fd) mock_recorder = MagicMock() + mock_recorder.prepare_stop = AsyncMock() + mock_recorder.finalize = AsyncMock(return_value=_tmp_video_path) mock_recorder.stop = AsyncMock(return_value=_tmp_video_path) - browser._video_recorders = {page: mock_recorder} + browser._video_recorder = mock_recorder browser._video_session = { "width": 800, "height": 600, "context": context, "page_listener": lambda *_: None, @@ -981,6 +992,136 @@ async def test_launch_mode_close_records_page_close_failure(self, mock_playwrigh assert browser._context is None +class TestSingleVideoRecorderClose: + """Tests verifying single-stream video recorder lifecycle during close(). + + close() uses a two-phase shutdown: + Phase 1: prepare_stop() the single recorder (fast, while Chrome alive) + Phase 2: finalize() the single recorder (slow, after Chrome exits) + """ + + @pytest.mark.asyncio + async def test_close_finalize_success(self, mock_playwright): + """close() must finalize the single recorder and move the video file.""" + import tempfile as _tempfile + from bridgic.browser.session import _browser as browser_module + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + context = browser._context + assert context is not None + context.remove_listener = MagicMock() + + _fd, _temp_path = _tempfile.mkstemp(suffix=".webm") + os.close(_fd) + + page = MagicMock() + page.close = AsyncMock() + rec = MagicMock() + rec.prepare_stop = AsyncMock() + rec.finalize = AsyncMock(return_value=_temp_path) + + context.pages = [page] + browser._video_recorder = rec + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } + context_key = browser_module._get_context_key(context) + browser._video_state[context_key] = True + + await browser.close() + + rec.prepare_stop.assert_awaited_once() + rec.finalize.assert_awaited_once() + assert len(browser._last_shutdown_artifacts["video"]) == 1 + + @pytest.mark.asyncio + async def test_close_collects_finalize_timeout_error(self, mock_playwright): + """close() must collect the timeout error from finalize, not raise.""" + from bridgic.browser.session import _browser as browser_module + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + context = browser._context + assert context is not None + context.remove_listener = MagicMock() + + async def timeout_finalize(): + raise asyncio.TimeoutError() + + page = MagicMock() + page.close = AsyncMock() + rec = MagicMock() + rec.prepare_stop = AsyncMock() + rec.finalize = timeout_finalize + + context.pages = [page] + browser._video_recorder = rec + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } + context_key = browser_module._get_context_key(context) + browser._video_state[context_key] = True + + await browser.close() # must not raise + + timeout_errors = [ + e for e in browser._last_shutdown_errors + if "video_recorder.finalize: timeout" in e + ] + assert len(timeout_errors) == 1 + + @pytest.mark.asyncio + async def test_close_re_raises_cancelled_error_from_recorder(self, mock_playwright): + """CancelledError from finalize is stored and re-raised after cleanup.""" + from bridgic.browser.session import _browser as browser_module + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + + browser = Browser(stealth=False) + await browser._start() + + context = browser._context + assert context is not None + context.remove_listener = MagicMock() + + async def cancelling_finalize() -> str: + raise asyncio.CancelledError("simulated task cancellation") + + page = MagicMock() + page.close = AsyncMock() + rec = MagicMock() + rec.prepare_stop = AsyncMock() + rec.finalize = cancelling_finalize + + context.pages = [page] + browser._video_recorder = rec + browser._video_session = { + "width": 800, "height": 600, "context": context, + "page_listener": lambda *_: None, + } + context_key = browser_module._get_context_key(context) + browser._video_state[context_key] = True + + with pytest.raises(asyncio.CancelledError): + await browser.close() + + assert any( + "video_recorder.finalize" in e for e in browser._last_shutdown_errors + ) + + class TestBrowserNavigation: """Tests for Browser navigation methods.""" @@ -2020,7 +2161,6 @@ def test_find_cdp_url_localhost_returns_connection_error_when_port_dead(self): # Error message must mention the port and not look like a proxy error. msg = str(exc_info.value) assert str(dead_port) in msg, f"Expected port {dead_port} in error: {msg}" - assert "502" not in msg, f"Error must not mention 502 Bad Gateway: {msg}" assert "Bad Gateway" not in msg, f"Error must not mention Bad Gateway: {msg}" @@ -2046,3 +2186,187 @@ def test_in_all(self): import bridgic.browser as pkg assert "find_cdp_url" in pkg.__all__ assert "resolve_cdp_input" in pkg.__all__ + + +# ───────────────────────────────────────────────────────────────────────────── +# get_page_size_info +# ───────────────────────────────────────────────────────────────────────────── + +class TestGetPageSizeInfo: + """Tests for Browser.get_page_size_info (CDP Page.getLayoutMetrics path).""" + + @pytest.mark.asyncio + async def test_returns_page_size_info_from_cdp(self, mock_playwright, mock_page, mock_context, mock_cdp_session): + """Successful CDP Page.getLayoutMetrics returns a populated PageSizeInfo.""" + from bridgic.browser.session._browser_model import PageSizeInfo + + mock_cdp_session.send = AsyncMock(return_value={ + "cssLayoutViewport": {"clientWidth": 1280, "clientHeight": 800, "pageX": 0, "pageY": 200}, + "cssContentSize": {"width": 1280, "height": 4000}, + "cssVisualViewport": {"clientWidth": 1280, "clientHeight": 800}, + }) + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + + result = await browser.get_page_size_info() + + assert isinstance(result, PageSizeInfo) + assert result.viewport_width == 1280 + assert result.viewport_height == 800 + assert result.page_height == 4000 + assert result.scroll_y == 200 + assert result.pixels_above == 200 + assert result.pixels_below == 4000 - 800 - 200 + + @pytest.mark.asyncio + async def test_returns_none_when_no_page(self): + """Returns None immediately when no page is open.""" + browser = Browser(stealth=False) + assert browser._page is None + result = await browser.get_page_size_info() + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_when_evaluate_raises(self, mock_playwright, mock_page, mock_context, mock_cdp_session): + """Returns None gracefully when CDP session send fails.""" + mock_cdp_session.send = AsyncMock(side_effect=RuntimeError("cdp failed")) + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + + result = await browser.get_page_size_info() + + assert result is None + + @pytest.mark.asyncio + async def test_cdp_session_created_and_detached(self, mock_playwright, mock_page, mock_context, mock_cdp_session): + """Verify CDP session is opened for Page.getLayoutMetrics and detached afterwards.""" + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + + await browser.get_page_size_info() + + mock_context.new_cdp_session.assert_called_once_with(mock_page) + mock_cdp_session.send.assert_called_once_with("Page.getLayoutMetrics") + mock_cdp_session.detach.assert_called_once() + + +# ───────────────────────────────────────────────────────────────────────────── +# get_full_page_info +# ───────────────────────────────────────────────────────────────────────────── + +class TestGetFullPageInfo: + """Tests for Browser.get_full_page_info concurrent fetch behavior.""" + + @pytest.mark.asyncio + async def test_returns_full_page_info_on_success(self, mock_playwright, mock_page, mock_context): + """Returns FullPageInfo combining snapshot tree and page size data.""" + from bridgic.browser.session._browser_model import FullPageInfo + + fake_snapshot = MagicMock() + fake_snapshot.tree = "- button \"Go\" [ref=abc]" + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + browser.get_snapshot = AsyncMock(return_value=fake_snapshot) + + result = await browser.get_full_page_info() + + assert isinstance(result, FullPageInfo) + assert result.tree == fake_snapshot.tree + + @pytest.mark.asyncio + async def test_returns_none_when_no_page(self): + """Returns None immediately when no page is open.""" + browser = Browser(stealth=False) + assert browser._page is None + result = await browser.get_full_page_info() + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_when_snapshot_raises(self, mock_playwright, mock_page): + """Returns None when get_snapshot raises.""" + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + browser.get_snapshot = AsyncMock(side_effect=RuntimeError("snap failed")) + + result = await browser.get_full_page_info() + + assert result is None + + @pytest.mark.asyncio + async def test_returns_none_when_page_info_fails(self, mock_playwright, mock_page, mock_context, mock_cdp_session): + """Returns None when get_page_size_info returns None (CDP send failed).""" + fake_snapshot = MagicMock() + fake_snapshot.tree = "- heading \"Hi\"" + + mock_cdp_session.send = AsyncMock(side_effect=RuntimeError("cdp error")) + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + browser.get_snapshot = AsyncMock(return_value=fake_snapshot) + + result = await browser.get_full_page_info() + + assert result is None + + @pytest.mark.asyncio + async def test_snapshot_and_page_info_run_concurrently(self, mock_playwright, mock_page, mock_context, mock_cdp_session): + """get_snapshot and get_page_size_info must overlap in time (asyncio.gather).""" + call_log: list[str] = [] + snapshot_started = asyncio.Event() + page_info_started = asyncio.Event() + + async def _slow_snapshot(*args, **kwargs): + call_log.append("snapshot:start") + snapshot_started.set() + await asyncio.sleep(0) # yield to let get_page_size_info start + await page_info_started.wait() + call_log.append("snapshot:end") + snap = MagicMock() + snap.tree = "- button" + return snap + + async def _slow_cdp_send(*args, **kwargs): + call_log.append("page_info:start") + page_info_started.set() + await snapshot_started.wait() + return { + "cssLayoutViewport": {"clientWidth": 1280, "clientHeight": 800, "pageX": 0, "pageY": 0}, + "cssContentSize": {"width": 1280, "height": 2000}, + "cssVisualViewport": {"clientWidth": 1280, "clientHeight": 800}, + } + + mock_cdp_session.send = AsyncMock(side_effect=_slow_cdp_send) + + with patch("bridgic.browser.session._browser.async_playwright") as mock_ap: + mock_ap.return_value.start = AsyncMock(return_value=mock_playwright) + browser = Browser(stealth=False) + await browser._start() + browser.get_snapshot = AsyncMock(side_effect=_slow_snapshot) + + result = await browser.get_full_page_info() + + assert result is not None + # Both must have started before either finished — proving concurrency. + assert "snapshot:start" in call_log + assert "page_info:start" in call_log + snapshot_end_idx = call_log.index("snapshot:end") + page_info_start_idx = call_log.index("page_info:start") + # page_info started before snapshot finished → they overlapped + assert page_info_start_idx < snapshot_end_idx, ( + "page_info should have started before snapshot finished (concurrent)" + ) diff --git a/tests/unit/test_browser_methods.py b/tests/unit/test_browser_methods.py index 379d787..6ab0057 100644 --- a/tests/unit/test_browser_methods.py +++ b/tests/unit/test_browser_methods.py @@ -82,7 +82,7 @@ def _make_browser_with_mock_page() -> tuple: browser._dialog_handlers = {} browser._tracing_state = {} browser._video_state = {} - browser._video_recorders = {} + browser._video_recorder = None browser._video_session = None # CDP-mode attributes — required by start_video / get_pages / _close_page # which inspect them to decide whether to filter out user tabs. Tests in @@ -130,9 +130,8 @@ async def test_stop_tracing_guard(): @pytest.mark.asyncio async def test_start_video_uses_window_inner_dimensions_not_viewport_size(): - """Regression: start_video() must derive its recording size from - ``window.innerWidth/innerHeight`` (queried via JS), NOT from - ``page.viewport_size``. + """Regression: start_video() must derive its recording size from CDP + Page.getLayoutMetrics, NOT from ``page.viewport_size``. In CDP attach mode bridgic never calls ``setViewportSize`` on the foreign Chrome, so ``page.viewport_size`` returns ``None`` and the @@ -141,7 +140,7 @@ async def test_start_video_uses_window_inner_dimensions_not_viewport_size(): within 800×600, which: 1. blurred the page (37% downscale) 2. left a gray strip at the bottom from ffmpeg's pad filter - Querying the page directly avoids both. + Querying via CDP avoids both. """ browser = _make_browser_with_mock_page() @@ -154,18 +153,28 @@ async def test_start_video_uses_window_inner_dimensions_not_viewport_size(): # Simulate CDP attach mode: viewport_size is None. fake_page.viewport_size = None fake_page.is_closed = MagicMock(return_value=False) - # window.innerWidth/innerHeight reports the real window — 16:9, much - # larger than the old 800×600 fallback. - fake_page.evaluate = AsyncMock(return_value={"w": 1366, "h": 768}) browser.get_current_page = AsyncMock(return_value=fake_page) + # Mock CDPSession on the browser's context so Page.getLayoutMetrics returns real dims. + fake_cdp_session = MagicMock() + fake_cdp_session.send = AsyncMock(return_value={ + "cssLayoutViewport": {"clientWidth": 1366, "clientHeight": 768, "pageX": 0, "pageY": 0}, + "cssContentSize": {"width": 1366, "height": 768}, + "cssVisualViewport": {"clientWidth": 1366, "clientHeight": 768}, + }) + fake_cdp_session.detach = AsyncMock() + browser._context.new_cdp_session = AsyncMock(return_value=fake_cdp_session) + + # Mock the recorder startup — this test only verifies dimension computation. + async def _fake_start(page): + browser._video_recorder = MagicMock() + browser._start_single_video_recorder = _fake_start # type: ignore[method-assign] + await browser.start_video() - # JS query was performed. - fake_page.evaluate.assert_awaited_once() - call_arg = fake_page.evaluate.await_args.args[0] - assert "innerWidth" in call_arg - assert "innerHeight" in call_arg + # CDP session was used to query dimensions. + browser._context.new_cdp_session.assert_awaited_once() + fake_cdp_session.send.assert_awaited_once_with("Page.getLayoutMetrics") # Recording size matches the queried dimensions, NOT the 800×600 # fallback. (& ~1 rounds to even, both are already even here.) @@ -181,9 +190,8 @@ async def test_start_video_uses_window_inner_dimensions_not_viewport_size(): @pytest.mark.asyncio async def test_start_video_falls_back_to_viewport_size_when_evaluate_fails(): - """If ``page.evaluate`` raises (hardened CSP, page closed mid-call, - etc.), start_video() should fall back to ``page.viewport_size`` - instead of crashing.""" + """If CDP session send raises (e.g. session unavailable), start_video() + should fall back to ``page.viewport_size`` instead of crashing.""" browser = _make_browser_with_mock_page() fake_context = MagicMock() @@ -194,9 +202,19 @@ async def test_start_video_falls_back_to_viewport_size_when_evaluate_fails(): fake_page.context = fake_context fake_page.viewport_size = {"width": 1280, "height": 800} fake_page.is_closed = MagicMock(return_value=False) - fake_page.evaluate = AsyncMock(side_effect=RuntimeError("CSP blocked")) browser.get_current_page = AsyncMock(return_value=fake_page) + # Make CDP session fail so it falls back to viewport_size. + fake_cdp_session = MagicMock() + fake_cdp_session.send = AsyncMock(side_effect=RuntimeError("CDP unavailable")) + fake_cdp_session.detach = AsyncMock() + browser._context.new_cdp_session = AsyncMock(return_value=fake_cdp_session) + + # Mock the recorder startup — this test only verifies dimension fallback. + async def _fake_start(page): + browser._video_recorder = MagicMock() + browser._start_single_video_recorder = _fake_start # type: ignore[method-assign] + await browser.start_video() session = browser._video_session @@ -205,6 +223,7 @@ async def test_start_video_falls_back_to_viewport_size_when_evaluate_fails(): assert session["height"] == 800 browser._video_session = None + browser._video_recorder = None browser._video_state.clear() @@ -230,6 +249,11 @@ async def test_start_video_already_active_does_not_destroy_existing_session(): fake_page.is_closed = MagicMock(return_value=False) browser.get_current_page = AsyncMock(return_value=fake_page) + # Mock recorder startup so first call succeeds. + async def _fake_start(page): + browser._video_recorder = MagicMock() + browser._start_single_video_recorder = _fake_start # type: ignore[method-assign] + # First call: sets up a session. await browser.start_video() sentinel_session = browser._video_session @@ -293,10 +317,9 @@ async def test_close_page_switches_to_remaining_tab_in_cdp_borrowed_mode(): @pytest.mark.asyncio -async def test_start_video_records_all_tabs_in_cdp_borrowed_mode(): - """start_video() MUST install a recorder on every page (including the - user's existing tabs) when bridgic is a guest on a borrowed CDP context. - """ +async def test_start_video_records_only_active_tab_in_cdp_borrowed_mode(): + """start_video() in single-stream mode MUST start only one recorder on the + active page, even in CDP borrowed mode with multiple tabs.""" owned = MagicMock(name="bridgic_tab") owned.is_closed = MagicMock(return_value=False) @@ -316,21 +339,30 @@ async def test_start_video_records_all_tabs_in_cdp_borrowed_mode(): fake_context.on = MagicMock() browser._context = fake_context - started: list = [] + started_page = None async def _fake_starter(page): - started.append(page) + nonlocal started_page + started_page = page - browser._start_page_video_recorder = _fake_starter # type: ignore[method-assign] + browser._start_single_video_recorder = _fake_starter # type: ignore[method-assign] browser.get_current_page = AsyncMock(return_value=owned) owned.evaluate = AsyncMock(return_value={"w": 1280, "h": 720}) + # Make _start_single_video_recorder set _video_recorder so the post-check passes. + async def _fake_starter_with_recorder(page): + nonlocal started_page + started_page = page + browser._video_recorder = MagicMock() # simulate recorder created + + browser._start_single_video_recorder = _fake_starter_with_recorder # type: ignore[method-assign] + await browser.start_video() - # Both bridgic-owned tab AND the user's pre-existing tab must be recorded. - assert owned in started - assert user in started + # Only the active (owned) tab should have been started. + assert started_page is owned - # Cleanup: avoid leaking the fake session into other tests. + # Cleanup. browser._video_session = None + browser._video_recorder = None browser._video_state.clear() diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 6de2dc8..0e2176e 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -721,11 +721,19 @@ def test_wait_seconds(self): def test_wait_text_appear(self): _, sc = invoke(["wait", "Done"]) - sc.assert_called_once_with("wait", {"text": "Done"}, start_if_needed=False) + sc.assert_called_once_with("wait", {"text": "Done", "timeout": 30.0}, start_if_needed=False) + + def test_wait_text_appear_custom_timeout(self): + _, sc = invoke(["wait", "--timeout", "5", "Done"]) + sc.assert_called_once_with("wait", {"text": "Done", "timeout": 5.0}, start_if_needed=False) def test_wait_text_gone(self): _, sc = invoke(["wait", "--gone", "Loading"]) - sc.assert_called_once_with("wait", {"text_gone": "Loading"}, start_if_needed=False) + sc.assert_called_once_with("wait", {"text_gone": "Loading", "timeout": 30.0}, start_if_needed=False) + + def test_wait_text_gone_custom_timeout(self): + _, sc = invoke(["wait", "--gone", "--timeout", "10", "Spinner"]) + sc.assert_called_once_with("wait", {"text_gone": "Spinner", "timeout": 10.0}, start_if_needed=False) # ── Tabs ────────────────────────────────────────────────────────────────── diff --git a/tests/unit/test_tools.py b/tests/unit/test_tools.py index e4e24b2..81d19e9 100644 --- a/tests/unit/test_tools.py +++ b/tests/unit/test_tools.py @@ -59,6 +59,7 @@ def mock_browser(): browser._new_page = AsyncMock() browser.get_snapshot = AsyncMock() browser.get_element_by_ref = AsyncMock() + browser._get_page_title = AsyncMock(return_value="Test Page") # Browser tool methods (all async) browser.search = AsyncMock(return_value="Searched on Duckduckgo for 'test query'") @@ -685,7 +686,9 @@ async def test_check_checkbox_by_ref(self, mock_browser): mock_locator.check = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value=None) mock_locator.is_visible = AsyncMock(return_value=True) - mock_locator.evaluate = AsyncMock(side_effect=["input", False, True]) + # is_checked: False before (proceed), True after (confirmed) + mock_locator.is_checked = AsyncMock(side_effect=[False, True]) + # get_attribute("type") → "checkbox" → is_native; get_attribute("aria-checked") unused mock_locator.get_attribute = AsyncMock(return_value="checkbox") mock_browser.get_element_by_ref.return_value = mock_locator @@ -702,7 +705,8 @@ async def test_uncheck_checkbox_by_ref(self, mock_browser): mock_locator.uncheck = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value=None) mock_locator.is_visible = AsyncMock(return_value=True) - mock_locator.evaluate = AsyncMock(side_effect=["input", True, False]) + # is_checked: True before (proceed), False after (confirmed) + mock_locator.is_checked = AsyncMock(side_effect=[True, False]) mock_locator.get_attribute = AsyncMock(return_value="checkbox") mock_browser.get_element_by_ref.return_value = mock_locator @@ -722,7 +726,10 @@ async def test_uncheck_checkbox_by_ref_covered_uses_elementFromPoint(self, mock_ mock_locator.uncheck = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value={"x": 10, "y": 20, "width": 100, "height": 40}) mock_locator.is_visible = AsyncMock(return_value=True) - mock_locator.evaluate = AsyncMock(side_effect=["input", True, True, False]) + # is_checked: True before (proceed), False after (confirmed) + mock_locator.is_checked = AsyncMock(side_effect=[True, False]) + # locator.evaluate used only by _check_element_covered → return True (element is covered) + mock_locator.evaluate = AsyncMock(return_value=True) mock_locator.get_attribute = AsyncMock(return_value="checkbox") mock_browser.get_element_by_ref.return_value = mock_locator @@ -742,7 +749,9 @@ async def test_check_custom_checkbox_uses_click_instead_of_check(self, mock_brow mock_locator.dispatch_event = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value=None) mock_locator.is_visible = AsyncMock(return_value=True) - mock_locator.evaluate = AsyncMock(side_effect=["div", False, True]) + # is_checked: False before (proceed), True after (confirmed) + mock_locator.is_checked = AsyncMock(side_effect=[False, True]) + # get_attribute("type") → None → is_native=False (custom element) mock_locator.get_attribute = AsyncMock(return_value=None) mock_browser.get_element_by_ref.return_value = mock_locator @@ -762,7 +771,8 @@ async def test_uncheck_custom_checkbox_uses_click_instead_of_uncheck(self, mock_ mock_locator.dispatch_event = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value=None) mock_locator.is_visible = AsyncMock(return_value=True) - mock_locator.evaluate = AsyncMock(side_effect=["div", True, False]) + # is_checked: True before (proceed), False after (confirmed) + mock_locator.is_checked = AsyncMock(side_effect=[True, False]) mock_locator.get_attribute = AsyncMock(return_value=None) mock_browser.get_element_by_ref.return_value = mock_locator @@ -781,8 +791,8 @@ async def test_check_custom_checkbox_reports_failure_when_state_not_changed(self mock_locator.dispatch_event = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value=None) mock_locator.is_visible = AsyncMock(return_value=True) - # tag=input? no, custom div; initially unchecked -> still unchecked after click - mock_locator.evaluate = AsyncMock(side_effect=["div", False, False]) + # is_native=False (custom div); initially unchecked → still unchecked after click + mock_locator.is_checked = AsyncMock(side_effect=[False, False]) mock_locator.get_attribute = AsyncMock(return_value=None) mock_browser.get_element_by_ref.return_value = mock_locator @@ -800,8 +810,8 @@ async def test_uncheck_custom_checkbox_reports_failure_when_state_not_changed(se mock_locator.dispatch_event = AsyncMock() mock_locator.bounding_box = AsyncMock(return_value=None) mock_locator.is_visible = AsyncMock(return_value=True) - # custom div; initially checked -> still checked after click - mock_locator.evaluate = AsyncMock(side_effect=["div", True, True]) + # custom div; initially checked → still checked after click + mock_locator.is_checked = AsyncMock(side_effect=[True, True]) mock_locator.get_attribute = AsyncMock(return_value=None) mock_browser.get_element_by_ref.return_value = mock_locator @@ -1512,9 +1522,9 @@ async def test_stop_tracing(self, mock_browser, temp_dir): @pytest.mark.asyncio async def test_start_video(self, mock_browser): - """Test starting video recording — multi-page: all existing - pages in the context get a per-page recorder, and context.on('page') - is subscribed so future pages auto-record too. + """Test starting video recording — single-stream: one recorder on + the active page, and context.on('page') is subscribed so new pages + auto-switch the screencast source. """ import types @@ -1525,11 +1535,10 @@ async def test_start_video(self, mock_browser): mock_context.pages = [page] mock_context.on = MagicMock() mock_browser._video_state = {} - mock_browser._video_recorders = {} + mock_browser._video_recorder = None mock_browser._video_session = None - # Bind the real helper so start_video can drive _start_page_video_recorder. - mock_browser._start_page_video_recorder = types.MethodType( - Browser._start_page_video_recorder, mock_browser, + mock_browser._start_single_video_recorder = types.MethodType( + Browser._start_single_video_recorder, mock_browser, ) mock_recorder = MagicMock() @@ -1538,18 +1547,17 @@ async def test_start_video(self, mock_browser): result = await Browser.start_video(mock_browser) assert "Video recording started" in result - assert "1 page" in result - assert mock_browser._video_recorders[page] is mock_recorder + assert "active tab" in result + assert mock_browser._video_recorder is mock_recorder assert mock_browser._video_session is not None - # context.on('page', handler) must be registered for auto-recording - # of newly opened tabs. + # context.on('page', handler) must be registered for auto-switching. assert mock_context.on.called assert mock_context.on.call_args.args[0] == "page" @pytest.mark.asyncio async def test_stop_video(self, mock_browser): """Test stopping video recording when no session is active.""" - mock_browser._video_recorders = {} + mock_browser._video_recorder = None mock_browser._video_session = None mock_browser._video_state = {} with pytest.raises(StateError) as exc_info: @@ -1557,8 +1565,8 @@ async def test_stop_video(self, mock_browser): assert exc_info.value.code == "NO_ACTIVE_RECORDING" @pytest.mark.asyncio - async def test_start_video_records_all_pages(self, mock_browser): - """start_video should record every existing page, not just current.""" + async def test_start_video_single_stream_only_records_active_page(self, mock_browser): + """start_video in single-stream mode records only the active page.""" import types page1 = mock_browser._page @@ -1574,59 +1582,43 @@ async def test_start_video_records_all_pages(self, mock_browser): mock_context.pages = [page1, page2] mock_context.on = MagicMock() mock_browser._video_state = {} - mock_browser._video_recorders = {} + mock_browser._video_recorder = None mock_browser._video_session = None - mock_browser._start_page_video_recorder = types.MethodType( - Browser._start_page_video_recorder, mock_browser, + mock_browser._start_single_video_recorder = types.MethodType( + Browser._start_single_video_recorder, mock_browser, ) - created_recorders = [] - - def _factory(context, page, output_path, size): - rec = MagicMock() - rec.start = AsyncMock() - rec.output_path = output_path - created_recorders.append(rec) - return rec - + mock_recorder = MagicMock() + mock_recorder.start = AsyncMock() with patch( "bridgic.browser.session._browser._video_recorder_mod.VideoRecorder", - side_effect=_factory, + return_value=mock_recorder, ): result = await Browser.start_video(mock_browser) - assert "2 pages" in result - assert len(mock_browser._video_recorders) == 2 - assert page1 in mock_browser._video_recorders - assert page2 in mock_browser._video_recorders - assert len(created_recorders) == 2 - for rec in created_recorders: - rec.start.assert_awaited_once() + assert "active tab" in result + # Only one recorder for the active page (page1), not both. + assert mock_browser._video_recorder is mock_recorder + mock_recorder.start.assert_awaited_once() @pytest.mark.asyncio - async def test_stop_video_returns_multiple_paths(self, mock_browser, tmp_path): - """stop_video should stop all page recorders and return all paths.""" + async def test_stop_video_returns_single_path(self, mock_browser, tmp_path): + """stop_video should stop the single recorder and return its path.""" from bridgic.browser.session import _browser as browser_module mock_browser._context.remove_listener = MagicMock() context_key = browser_module._get_context_key(mock_browser._context) mock_browser._video_state = {context_key: True} - # Bind real static helpers — otherwise `self._resolve_multi_video_dests` - # on a MagicMock returns another MagicMock (truthy) and the code - # takes the wrong branch. - mock_browser._resolve_multi_video_dests = Browser._resolve_multi_video_dests + mock_browser._resolve_video_dest = Browser._resolve_video_dest mock_browser._move_video_local = Browser._move_video_local - page1 = MagicMock() - page2 = MagicMock() - rec1 = MagicMock() - rec1.stop = AsyncMock(return_value=str(tmp_path / "a.webm")) - rec2 = MagicMock() - rec2.stop = AsyncMock(return_value=str(tmp_path / "b.webm")) - (tmp_path / "a.webm").write_bytes(b"") - (tmp_path / "b.webm").write_bytes(b"") - - mock_browser._video_recorders = {page1: rec1, page2: rec2} + video_path = str(tmp_path / "video.webm") + (tmp_path / "video.webm").write_bytes(b"") + + rec = MagicMock() + rec.stop = AsyncMock(return_value=video_path) + + mock_browser._video_recorder = rec mock_browser._video_session = { "width": 800, "height": 600, "context": mock_browser._context, "page_listener": lambda *_: None, @@ -1634,14 +1626,36 @@ async def test_stop_video_returns_multiple_paths(self, mock_browser, tmp_path): result = await Browser.stop_video(mock_browser) - rec1.stop.assert_awaited_once() - rec2.stop.assert_awaited_once() - assert "Video files saved" in result - assert str(tmp_path / "a.webm") in result - assert str(tmp_path / "b.webm") in result - assert mock_browser._video_recorders == {} + rec.stop.assert_awaited_once() + assert "Video saved to" in result + assert video_path in result + assert mock_browser._video_recorder is None assert mock_browser._video_session is None + @pytest.mark.asyncio + async def test_stop_video_handles_recorder_failure(self, mock_browser): + """stop_video() should handle recorder stop failure gracefully.""" + from bridgic.browser.session import _browser as browser_module + + mock_browser._context.remove_listener = MagicMock() + context_key = browser_module._get_context_key(mock_browser._context) + mock_browser._video_state = {context_key: True} + + rec = MagicMock() + rec.stop = AsyncMock(side_effect=RuntimeError("encoder crashed")) + + mock_browser._video_recorder = rec + mock_browser._video_session = { + "width": 800, "height": 600, "context": mock_browser._context, + "page_listener": lambda *_: None, + } + + result = await Browser.stop_video(mock_browser) + + assert "incomplete" in result + assert mock_browser._video_recorder is None + + # ==================== State Tools Tests ==================== class TestStateTools: @@ -1721,7 +1735,7 @@ async def test_get_snapshot_text_snapshot_failed(self, mock_browser): mock_browser.get_snapshot.return_value = None with pytest.raises(OperationError) as exc_info: await Browser.get_snapshot_text(mock_browser) - assert "Failed to get interface information" in exc_info.value.message + assert "Failed to get snapshot" in exc_info.value.message # ==================== BrowserToolSetBuilder Tests ==================== diff --git a/tests/unit/test_video_recorder.py b/tests/unit/test_video_recorder.py index 799e23b..0f39c6a 100644 --- a/tests/unit/test_video_recorder.py +++ b/tests/unit/test_video_recorder.py @@ -246,3 +246,84 @@ async def fake_create(*args, **kwargs): assert captured.get("stderr") == asyncio.subprocess.DEVNULL # stdin must remain PIPE — bridgic feeds JPEG bytes into it. assert captured.get("stdin") == asyncio.subprocess.PIPE + + +# --------------------------------------------------------------------------- +# switch_page() +# --------------------------------------------------------------------------- + +class TestSwitchPage: + """Tests for VideoRecorder.switch_page() — hot-swap screencast source.""" + + def _make_recorder(self, tmp_path: Path) -> VideoRecorder: + ctx = MagicMock() + page = MagicMock() + output = str(tmp_path / "test.webm") + return VideoRecorder(ctx, page, output, (800, 600)) + + @pytest.mark.asyncio + async def test_noop_when_stopped(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + rec._is_stopped = True + old_page = rec._page + new_page = MagicMock() + await rec.switch_page(new_page) + assert rec._page is old_page # unchanged + + @pytest.mark.asyncio + async def test_noop_same_page(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + old_page = rec._page + rec._context.new_cdp_session = AsyncMock() + await rec.switch_page(old_page) + # No CDP calls should have been made + rec._context.new_cdp_session.assert_not_awaited() + + @pytest.mark.asyncio + async def test_tears_down_old_sets_up_new(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + old_cdp = MagicMock() + old_cdp.send = AsyncMock() + old_cdp.remove_listener = MagicMock() + old_cdp.detach = AsyncMock() + rec._cdp_session = old_cdp + + new_page = MagicMock() + new_cdp = MagicMock() + new_cdp.on = MagicMock() + new_cdp.send = AsyncMock() + rec._context.new_cdp_session = AsyncMock(return_value=new_cdp) + + await rec.switch_page(new_page) + + # Old CDP torn down + old_cdp.send.assert_awaited_once_with("Page.stopScreencast") + old_cdp.remove_listener.assert_called_once() + old_cdp.detach.assert_awaited_once() + + # New CDP set up + rec._context.new_cdp_session.assert_awaited_once_with(new_page) + new_cdp.on.assert_called_once() + new_cdp.send.assert_awaited_once() + assert rec._page is new_page + assert rec._cdp_session is new_cdp + + @pytest.mark.asyncio + async def test_survives_cdp_failure(self, tmp_path: Path) -> None: + """If CDP setup fails on the new page, recorder degrades gracefully.""" + rec = self._make_recorder(tmp_path) + rec._cdp_session = None # no old session + + new_page = MagicMock() + rec._context.new_cdp_session = AsyncMock( + side_effect=RuntimeError("CDP unavailable"), + ) + + await rec.switch_page(new_page) # must not raise + + assert rec._page is new_page + assert rec._cdp_session is None # degraded + + def test_current_page_property(self, tmp_path: Path) -> None: + rec = self._make_recorder(tmp_path) + assert rec.current_page is rec._page From e2246bfa2e0680a689ba1bc84827185e525dd661 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Fri, 10 Apr 2026 11:42:41 +0800 Subject: [PATCH 12/72] feat: enhance search command with CDP connection option - Added a new `--cdp` option to the `search` command, allowing users to connect to an existing browser session instead of launching a new one. - Updated the command's implementation to handle the new `cdp` parameter and resolve the input correctly. - Modified CLI command descriptions to reflect the addition of the `--cdp` option. - Adjusted related tests to verify the correct behavior of the updated `search` command with the new option. --- bridgic/browser/_cli_catalog.py | 2 +- bridgic/browser/cli/_commands.py | 20 +++++++- bridgic/browser/cli/_daemon.py | 10 ++++ bridgic/browser/session/_browser.py | 57 ++++++++++++---------- bridgic/browser/session/_video_recorder.py | 31 +++++++++++- tests/unit/test_cli.py | 8 +-- tests/unit/test_video_recorder.py | 3 +- 7 files changed, 94 insertions(+), 37 deletions(-) diff --git a/bridgic/browser/_cli_catalog.py b/bridgic/browser/_cli_catalog.py index 98cd5b0..4a9ed53 100644 --- a/bridgic/browser/_cli_catalog.py +++ b/bridgic/browser/_cli_catalog.py @@ -82,7 +82,7 @@ "back": (ToolCategory.NAVIGATION, "Go back to the previous page"), "forward": (ToolCategory.NAVIGATION, "Go forward to the next page"), "reload": (ToolCategory.NAVIGATION, "Reload the current page"), - "search": (ToolCategory.NAVIGATION, "Search the web using a search engine (starts a browser session if needed) [--headed] [--clear-user-data] [--engine duckduckgo|google|bing]"), + "search": (ToolCategory.NAVIGATION, "Search the web using a search engine (starts a browser session if needed) [--headed] [--clear-user-data] [--cdp PORT_OR_URL] [--engine duckduckgo|google|bing]"), "info": (ToolCategory.NAVIGATION, "Show current page URL, title, viewport, scroll position"), "snapshot": (ToolCategory.SNAPSHOT, "Get accessibility tree of the current page (full-page by default) with refs [-i] [-F viewport-only] [-l LIMIT] [-s FILE]"), "click": (ToolCategory.ELEMENT_INTERACTION, "Click an element by ref (@80365bf7 or 80365bf7)"), diff --git a/bridgic/browser/cli/_commands.py b/bridgic/browser/cli/_commands.py index e844599..e1a2913 100644 --- a/bridgic/browser/cli/_commands.py +++ b/bridgic/browser/cli/_commands.py @@ -186,10 +186,26 @@ def cmd_reload() -> None: help="Launch the browser in headed (visible) mode.") @click.option("--clear-user-data", is_flag=True, default=False, help="Start with a fresh browser profile (no persistent user data). Ignored if a session is already running.") -def cmd_search(query: str, engine: str, headed: bool, clear_user_data: bool) -> None: +@click.option( + "--cdp", default=None, metavar="PORT_OR_URL", + help=( + "Connect to a running browser instead of launching a new one. " + "Accepts: port number (9222), ws:// or wss:// URL, http://host:port, " + "or 'auto' to scan local Chrome/Chromium/Brave (+ Canary variants) profiles." + ), +) +def cmd_search(query: str, engine: str, headed: bool, clear_user_data: bool, cdp: str | None) -> None: """Search the web using a search engine (starts a browser session if needed).""" + cdp_url: str | None = None + if cdp: + from bridgic.browser.session._browser import resolve_cdp_input + try: + cdp_url = resolve_cdp_input(cdp) + except Exception as exc: + _err(exc) + return try: - _ok(send_command("search", {"query": query, "engine": engine}, headed=headed, clear_user_data=clear_user_data)) + _ok(send_command("search", {"query": query, "engine": engine}, headed=headed, clear_user_data=clear_user_data, cdp_url=cdp_url)) except Exception as exc: _err(exc) diff --git a/bridgic/browser/cli/_daemon.py b/bridgic/browser/cli/_daemon.py index 2040a26..6a538f4 100644 --- a/bridgic/browser/cli/_daemon.py +++ b/bridgic/browser/cli/_daemon.py @@ -580,6 +580,11 @@ async def _cdp_reconnect(browser: "Browser") -> bool: Returns True if the reconnect succeeded, False otherwise. After a successful reconnect the browser is at about:blank (new session). + + Implementation note: calls ``browser._start()`` (private) because there + is no public ``reconnect()`` API. This is intentional — reconnect is a + daemon-only concern. If ``_start()``'s preconditions change, this + function must be updated accordingly. """ try: await browser.close() @@ -671,6 +676,11 @@ async def _dispatch(browser: "Browser", command: str, args: Dict[str, Any]) -> D _READ_TIMEOUT = 60.0 # seconds to wait for a command line from the client +# Global safety-net timeout for browser.close(). The large value (300s) +# accommodates worst-case video finalization (ffmpeg encoding). In practice +# individual cleanup steps have their own shorter timeouts (video finalize +# 30s, context close 15s, etc.), so the full 300s is never reached during +# normal operation. try: _DAEMON_STOP_TIMEOUT = float(os.environ.get("BRIDGIC_DAEMON_STOP_TIMEOUT", "300")) except (ValueError, TypeError): diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 12c1b5e..1ec0cbd 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -6,7 +6,7 @@ import signal import sys import tempfile -from urllib.parse import urlparse +from urllib.parse import urlparse, urlunparse from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Sequence, Union, NoReturn @@ -169,13 +169,15 @@ def find_cdp_url( except (KeyError, json.JSONDecodeError) as exc: raise ValueError(f"Failed to parse /json/version response: {exc}") from exc # Chrome always reports localhost in the URL; replace with the actual - # host when the user passed a remote address. Compare against the - # *normalized* host_lower so callers passing "LOCALHOST" or other - # mixed-case loopback variants still get a clean lowercase URL - # rather than ws://LOCALHOST:9222/... For IPv6 hosts we substitute - # the bracketed form so the resulting URL is parseable. + # host when the user passed a remote address. Use urlparse to + # precisely swap only the hostname component (a naive string replace + # could match "localhost" inside a path or query parameter). if host_lower != "localhost": - ws_url = ws_url.replace("localhost", host_in_url, 1) + _parsed_ws = urlparse(ws_url) + # Rebuild netloc: bracketed IPv6 host + original port + _ws_port = _parsed_ws.port or port + _new_netloc = f"{host_in_url}:{_ws_port}" + ws_url = urlunparse(_parsed_ws._replace(netloc=_new_netloc)) return ws_url if mode == "scan": @@ -508,7 +510,9 @@ async def _is_native_checkbox_or_radio(locator) -> bool: Uses ``get_attribute("type")`` instead of ``evaluate()`` to avoid Playwright's ``_mainContext()`` hang on pre-existing CDP tabs. Only ```` elements carry those type values, so - the tagName check is redundant. + the tagName check is redundant. A custom element with an explicit + ``type="checkbox"`` attribute would be misidentified, but this is + vanishingly rare in practice. """ try: input_type = (await locator.get_attribute("type") or "").strip().lower() @@ -543,6 +547,10 @@ async def _cdp_evaluate_on_element(cdp_context, page, locator, code: str) -> Any Resolves the element via bounding-box coordinates + ``document.elementFromPoint`` so it bypasses Playwright's ``_mainContext()`` which hangs on pre-existing CDP-borrowed tabs. Raises on any failure (caller must handle). + + Note: assumes no concurrent scroll between ``bounding_box()`` and the CDP + ``Runtime.evaluate`` call — if the page scrolls in between, the coordinates + may resolve to a different element. """ bbox = await locator.bounding_box() if bbox is None: @@ -1790,9 +1798,10 @@ async def close(self) -> str: errors: List[str] = [] shutdown_artifacts: Dict[str, List[str]] = {"trace": [], "video": []} context_key: Optional[str] = None - # Recorders whose prepare_stop() has run but finalize() is deferred + # Recorder whose prepare_stop() has run but finalize() is deferred # until after Chrome exits (two-phase video shutdown). - _deferred_recorders: list = [] + # Currently only one single-stream recorder is supported. + _deferred_recorder: Optional[Any] = None # Deferred re-raise: if CancelledError / KeyboardInterrupt arrives during any # cleanup await we record it here, finish ALL cleanup steps, then re-raise at # the very end. This ensures no Playwright/Chromium process is left orphaned @@ -1905,7 +1914,7 @@ async def close(self) -> str: _pending_cancel = _pr # Stash for Phase 2 (runs after Chrome exits). - _deferred_recorders = [("single", _recorder)] + _deferred_recorder = _recorder logger.debug("[close] Phase 1 done, clearing page state") # Always clear page-scoped listeners/caches for every context page. @@ -2020,15 +2029,14 @@ async def close(self) -> str: if _pending_cancel is None: _pending_cancel = e - # Phase 2: finalize() deferred video recorders. + # Phase 2: finalize() the deferred video recorder. # Chrome is dead, user_data_dir is released. Now flush the ffmpeg - # frame queues with a semaphore to bound CPU usage. - if _deferred_recorders: + # frame queue. + if _deferred_recorder is not None: logger.info("[close] Phase 2: finalize single recorder") - _, _rec_to_finalize = _deferred_recorders[0] try: rec_path: str = await asyncio.wait_for( - _rec_to_finalize.finalize(), + _deferred_recorder.finalize(), timeout=self._VIDEO_FINALIZE_TIMEOUT, ) if self._close_session_dir: @@ -2423,16 +2431,7 @@ async def _close_page(self, page: Page | str) -> tuple[bool, str]: logger.debug("[_close_page] video switch error: %s", e) else: # Last page — stop screencast but keep ffmpeg alive for finalize. - if self._video_recorder._cdp_session: - try: - await self._video_recorder._cdp_session.send("Page.stopScreencast") - except Exception: - pass - try: - await self._video_recorder._cdp_session.detach() - except Exception: - pass - self._video_recorder._cdp_session = None + await self._video_recorder.detach_screencast() await page.close() @@ -2474,7 +2473,6 @@ async def get_page_size_info(self) -> Optional[PageSizeInfo]: layout = metrics.get("cssLayoutViewport", {}) content = metrics.get("cssContentSize", {}) - visual = metrics.get("cssVisualViewport", {}) viewport_width = layout.get("clientWidth", 0) viewport_height = layout.get("clientHeight", 0) @@ -7487,6 +7485,11 @@ async def start_video( await _session.detach() except Exception: pass + # Use cssVisualViewport (not cssLayoutViewport) because it + # represents the actual visible pixel area after pinch-zoom, + # matching what Chrome's screencast captures. + # get_page_size_info() uses cssLayoutViewport for scroll + # reporting — different purpose, both choices are intentional. _vp = _metrics.get("cssVisualViewport", {}) qw = int(_vp.get("clientWidth") or 0) qh = int(_vp.get("clientHeight") or 0) diff --git a/bridgic/browser/session/_video_recorder.py b/bridgic/browser/session/_video_recorder.py index ef95fdf..1eabebf 100644 --- a/bridgic/browser/session/_video_recorder.py +++ b/bridgic/browser/session/_video_recorder.py @@ -44,6 +44,7 @@ import re import shutil import time +from collections import deque from pathlib import Path from typing import Any, List, Optional, Tuple @@ -219,7 +220,7 @@ def __init__( self._first_frame_ts: float = 0.0 # timestamp of the first frame; used to compute frame numbers self._last_frame: Optional[Tuple[bytes, float, int]] = None # (jpeg_bytes, timestamp, frame_number) self._last_write_time: float = 0.0 # monotonic time of the last write_frame() call - self._frame_queue: List[bytes] = [] # frames waiting to be written to ffmpeg's stdin + self._frame_queue: deque[bytes] = deque() # frames waiting to be written to ffmpeg's stdin self._is_stopped = False self._write_lock = asyncio.Lock() # serializes writes to ffmpeg's stdin self._flush_pending = False # dedup flag: avoid scheduling a flush task per frame @@ -376,6 +377,13 @@ async def prepare_stop(self) -> None: # sentinel that tells _write_frame to advance the frame counter # without replacing the cached JPEG bytes. # Reference: videoRecorder.ts lines 140-144. + # + # Note: _last_write_time is monotonic while _last_frame[1] is + # wall-clock (Chrome's metadata.timestamp). Mixing clocks is safe + # here because add_time is a *duration* (not an absolute + # timestamp) — both clocks advance at the same rate, so the + # delta is valid. monotonic is preferred for the duration to + # avoid NTP jump artifacts. add_time = max(time.monotonic() - self._last_write_time, 1.0) self._write_frame(b"", self._last_frame[1] + add_time) # type: ignore[index] @@ -443,6 +451,25 @@ async def stop(self) -> str: await self.prepare_stop() return await self.finalize() + async def detach_screencast(self) -> None: + """Stop the CDP screencast and detach the session without stopping ffmpeg. + + Used when the last recorded page is about to close — the CDP session + is bound to that page and will die with it, but ffmpeg must stay alive + for a later ``finalize()`` call. Idempotent: safe to call when the + session is already detached. + """ + if self._cdp_session: + try: + await self._cdp_session.send("Page.stopScreencast") + except Exception: + pass + try: + await self._cdp_session.detach() + except Exception: + pass + self._cdp_session = None + async def switch_page(self, new_page: Any) -> None: """Switch screencast source to a different page. ffmpeg stays alive.""" if self._is_stopped: @@ -603,7 +630,7 @@ async def _flush_queue(self) -> None: """Drain the frame queue into ffmpeg's stdin under a write lock.""" async with self._write_lock: while self._frame_queue: - frame_data = self._frame_queue.pop(0) + frame_data = self._frame_queue.popleft() await self._send_frame(frame_data) async def _flush_and_reset(self) -> None: diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 0e2176e..c516299 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -506,19 +506,19 @@ def test_reload(self): def test_search_default_engine(self): _, sc = invoke(["search", "python async"]) - sc.assert_called_once_with("search", {"query": "python async", "engine": "duckduckgo"}, headed=False, clear_user_data=False) + sc.assert_called_once_with("search", {"query": "python async", "engine": "duckduckgo"}, headed=False, clear_user_data=False, cdp_url=None) def test_search_custom_engine(self): _, sc = invoke(["search", "query", "--engine", "google"]) - sc.assert_called_once_with("search", {"query": "query", "engine": "google"}, headed=False, clear_user_data=False) + sc.assert_called_once_with("search", {"query": "query", "engine": "google"}, headed=False, clear_user_data=False, cdp_url=None) def test_search_headed(self): _, sc = invoke(["search", "--headed", "python async"]) - sc.assert_called_once_with("search", {"query": "python async", "engine": "duckduckgo"}, headed=True, clear_user_data=False) + sc.assert_called_once_with("search", {"query": "python async", "engine": "duckduckgo"}, headed=True, clear_user_data=False, cdp_url=None) def test_search_clear_user_data(self): _, sc = invoke(["search", "--clear-user-data", "python async"]) - sc.assert_called_once_with("search", {"query": "python async", "engine": "duckduckgo"}, headed=False, clear_user_data=True) + sc.assert_called_once_with("search", {"query": "python async", "engine": "duckduckgo"}, headed=False, clear_user_data=True, cdp_url=None) def test_info(self): _, sc = invoke(["info"]) diff --git a/tests/unit/test_video_recorder.py b/tests/unit/test_video_recorder.py index 0f39c6a..0c95bd2 100644 --- a/tests/unit/test_video_recorder.py +++ b/tests/unit/test_video_recorder.py @@ -173,7 +173,8 @@ async def test_flush_queue_writes_to_ffmpeg(self, tmp_path: Path) -> None: mock_proc.stdin = mock_stdin rec._ffmpeg = mock_proc - rec._frame_queue = [b"a", b"b", b"c"] + from collections import deque + rec._frame_queue = deque([b"a", b"b", b"c"]) await rec._flush_queue() assert mock_stdin.write.call_count == 3 From 2d7fb8774a0f6362294f3784ad5ded429dcfade4 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Fri, 10 Apr 2026 12:06:14 +0800 Subject: [PATCH 13/72] fix: improve browser detection in test for CDP URL retrieval - Enhanced the `fake_read` function in the `TestFindCdpUrl` class to improve browser detection by checking for variations of "chrome" in a case-insensitive manner. - Added conditions to exclude "canary", "unstable", and "beta" versions of Chrome to ensure accurate URL retrieval for the standard Chrome browser. --- tests/unit/test_cli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index c516299..04a7234 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -2485,7 +2485,11 @@ def test_scan_mode_returns_first_active(self): chrome_url = "ws://localhost:9222/devtools/browser/chrome-uuid" def fake_read(base): - if "Chrome" in base and "Canary" not in base and "Beta" not in base: + base_lower = base.lower() + if ("chrome" in base_lower or "google-chrome" in base_lower) \ + and "canary" not in base_lower \ + and "unstable" not in base_lower \ + and "beta" not in base_lower: return chrome_url return None From e5629f7e2420358cb26d60b63aed8c08d22382b4 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Fri, 10 Apr 2026 16:02:57 +0800 Subject: [PATCH 14/72] feat: add video recording functionality for active tab - Introduced a new `VideoRecorder` class to handle single-stream video recording on the active tab, allowing seamless switching between tabs while maintaining a continuous output file. - Updated CLI commands to reflect the new video recording behavior, including enhanced descriptions for `video-start` and `video-stop`. - Adjusted documentation to clarify that only the active tab is recorded, improving user understanding of the video recording process. - Enhanced tests to validate the new video recording features and ensure proper functionality across different scenarios. --- .github/workflows/windows-cli-test.yml | 309 +++++++- CLAUDE.md | 441 +----------- README.md | 2 +- README_zh.md | 2 +- bridgic/browser/_cli_catalog.py | 4 +- bridgic/browser/cli/_commands.py | 22 +- bridgic/browser/session/_browser.py | 70 +- bridgic/browser/session/_video_recorder.py | 33 +- docs/API.md | 2 + docs/BROWSER_TOOLS_GUIDE.md | 2 +- docs/CDP_MODE.md | 12 +- docs/INTERNALS.md | 384 ++++++++++ docs/SNAPSHOT_AND_STATE.md | 1 + pyproject.toml | 2 +- .../bridgic-browser/references/cli-guide.md | 6 +- .../references/cli-sdk-api-mapping.md | 2 +- skills/bridgic-browser/references/env-vars.md | 6 +- .../bridgic-browser/references/sdk-guide.md | 4 +- tests/integration/_chrome_utils.py | 159 +++++ tests/integration/test_cdp_borrowed_mode.py | 573 +++++++++++++++ tests/integration/test_cdp_cli_full.py | 671 ++++++++++++++++++ tests/integration/test_tools.py | 16 +- tests/unit/test_browser_methods.py | 4 +- tests/unit/test_tools.py | 12 +- uv.lock | 2 +- 25 files changed, 2177 insertions(+), 564 deletions(-) create mode 100644 docs/INTERNALS.md create mode 100644 tests/integration/_chrome_utils.py create mode 100644 tests/integration/test_cdp_borrowed_mode.py create mode 100644 tests/integration/test_cdp_cli_full.py diff --git a/.github/workflows/windows-cli-test.yml b/.github/workflows/windows-cli-test.yml index 58f5b5e..66223aa 100644 --- a/.github/workflows/windows-cli-test.yml +++ b/.github/workflows/windows-cli-test.yml @@ -14,13 +14,57 @@ on: workflow_dispatch: jobs: + # ── Unit tests (pytest) on Windows ─────────────────────────────────────── + unit-test-windows: + name: Unit Test (Windows, Python ${{ matrix.python-version }}) + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.11", "3.12", "3.13"] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + enable-cache: true + + - name: Cache uv dependencies + uses: actions/cache@v4 + with: + path: | + ~\AppData\Local\uv\cache + .venv + key: windows-unit-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }} + restore-keys: | + windows-unit-py${{ matrix.python-version }}- + + - name: Install dependencies + run: uv sync --group dev + + - name: Install Playwright browsers + run: uv run playwright install chromium + + - name: Run unit tests + run: uv run pytest tests/ --tb=short --verbose -m "not integration" + + # ── CLI functional tests on Windows ────────────────────────────────────── cli-windows: name: CLI Test (Windows, Python ${{ matrix.python-version }}) runs-on: windows-latest strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout repository @@ -74,6 +118,49 @@ jobs: shell: pwsh run: uv run bridgic-browser reload + # ── Verify ───────────────────────────────────────────────────────────── + - name: CLI — verify-url + shell: pwsh + run: | + $out = uv run bridgic-browser verify-url example.com + Write-Output $out + $outText = ($out | Out-String) + if ($outText -notmatch 'PASS') { + Write-Error "verify-url did not PASS" + exit 1 + } + + - name: CLI — verify-title + shell: pwsh + run: | + $out = uv run bridgic-browser verify-title Example + Write-Output $out + $outText = ($out | Out-String) + if ($outText -notmatch 'PASS') { + Write-Error "verify-title did not PASS" + exit 1 + } + + - name: CLI — verify-text + shell: pwsh + run: | + $out = uv run bridgic-browser verify-text "Example Domain" + Write-Output $out + $outText = ($out | Out-String) + if ($outText -notmatch 'PASS') { + Write-Error "verify-text did not PASS" + exit 1 + } + + # ── Wait ─────────────────────────────────────────────────────────────── + - name: CLI — wait (text appears) + shell: pwsh + run: uv run bridgic-browser wait "Example Domain" + + - name: CLI — wait (time) + shell: pwsh + run: uv run bridgic-browser wait 1 + # ── Snapshot ─────────────────────────────────────────────────────────── - name: CLI — snapshot (full page) shell: pwsh @@ -97,6 +184,39 @@ jobs: run: uv run bridgic-browser snapshot -F # ── Element interaction via ref ──────────────────────────────────────── + - name: CLI — hover first interactive ref + shell: pwsh + run: | + $snap = uv run bridgic-browser snapshot -i + $snapText = ($snap | Out-String) + $refMatch = [regex]::Match($snapText, '\[ref=([^\]\r\n]+)\]') + if ($refMatch.Success) { + $ref = $refMatch.Groups[1].Value + Write-Output "Hovering ref: $ref" + uv run bridgic-browser hover $ref + } else { + Write-Output "No interactive refs found, skipping hover" + } + + - name: CLI — eval-on (ref) + shell: pwsh + run: | + $snap = uv run bridgic-browser snapshot -i + $snapText = ($snap | Out-String) + $refMatch = [regex]::Match($snapText, '\[ref=([^\]\r\n]+)\]') + if ($refMatch.Success) { + $ref = $refMatch.Groups[1].Value + Write-Output "eval-on ref: $ref" + $result = uv run bridgic-browser eval-on $ref "(el) => el.tagName" + Write-Output "Tag: $result" + if ([string]::IsNullOrWhiteSpace(($result | Out-String))) { + Write-Error "eval-on returned empty" + exit 1 + } + } else { + Write-Output "No interactive refs found, skipping eval-on" + } + - name: CLI — click first link ref shell: pwsh run: | @@ -112,6 +232,11 @@ jobs: Write-Output "No interactive refs found, skipping click" } + # ── Scroll ───────────────────────────────────────────────────────────── + - name: CLI — scroll + shell: pwsh + run: uv run bridgic-browser scroll --dy 300 + # ── Navigation history ───────────────────────────────────────────────── - name: CLI — back shell: pwsh @@ -121,10 +246,10 @@ jobs: shell: pwsh run: uv run bridgic-browser forward - # ── Wait ─────────────────────────────────────────────────────────────── - - name: CLI — wait (time) + # ── Keyboard ─────────────────────────────────────────────────────────── + - name: CLI — press (keyboard) shell: pwsh - run: uv run bridgic-browser wait 1 + run: uv run bridgic-browser press Tab # ── JavaScript eval ──────────────────────────────────────────────────── - name: CLI — eval @@ -163,20 +288,42 @@ jobs: shell: pwsh run: uv run bridgic-browser tabs - - name: CLI — new-tab and close-tab + - name: CLI — new-tab, switch-tab, close-tab shell: pwsh run: | + # Create a new tab $out = uv run bridgic-browser new-tab https://example.com - Write-Output $out - # page_id is always an integer; allow page_id: 2, page_id=2, or "page_id": 2. - $outText = ($out | Out-String) - $pidMatch = [regex]::Match($outText, '(?m)\b"?page_id"?\b\s*[:=]\s*(\d+)') - if ($pidMatch.Success) { - $tabId = $pidMatch.Groups[1].Value - Write-Output "Closing tab: $tabId" - uv run bridgic-browser close-tab $tabId + Write-Output "new-tab: $out" + + # List tabs and extract page_ids + $tabsOut = uv run bridgic-browser tabs + Write-Output "tabs: $tabsOut" + $tabsText = ($tabsOut | Out-String) + + # Extract all page_ids + $allPids = [regex]::Matches($tabsText, '\bpage_(\d+)\b') + if ($allPids.Count -ge 2) { + # Switch to the first tab + $firstPid = $allPids[0].Value + Write-Output "Switching to: $firstPid" + uv run bridgic-browser switch-tab $firstPid + + # Switch back to the second tab + $secondPid = $allPids[1].Value + Write-Output "Switching to: $secondPid" + uv run bridgic-browser switch-tab $secondPid + + # Close the second tab + $pidMatch = [regex]::Match(($out | Out-String), '(?m)\b"?page_id"?\b\s*[:=]\s*(\d+)') + if ($pidMatch.Success) { + $tabId = $pidMatch.Groups[1].Value + Write-Output "Closing tab: $tabId" + uv run bridgic-browser close-tab $tabId + } else { + Write-Output "Could not extract page_id from new-tab output, skipping close-tab" + } } else { - Write-Output "Could not extract page_id, skipping close-tab" + Write-Output "Less than 2 tabs found, skipping switch-tab" } # ── Storage ──────────────────────────────────────────────────────────── @@ -194,6 +341,62 @@ jobs: } uv run bridgic-browser storage-load state.json + # ── Form interaction (httpbin) ───────────────────────────────────────── + - name: CLI — open httpbin form + shell: pwsh + run: uv run bridgic-browser open https://httpbin.org/forms/post + + - name: CLI — fill textbox on httpbin form + shell: pwsh + run: | + $snap = uv run bridgic-browser snapshot -i + Write-Output $snap + $snapText = ($snap | Out-String) + # Find a textbox ref + $textboxMatch = [regex]::Match($snapText, '(?i)textbox[^\[]*\[ref=([^\]\r\n]+)\]') + if ($textboxMatch.Success) { + $ref = $textboxMatch.Groups[1].Value + Write-Output "Filling textbox ref: $ref" + uv run bridgic-browser fill $ref "CI Test" + } else { + Write-Error "No textbox found in httpbin interactive snapshot" + exit 1 + } + + - name: CLI — type text (into focused field) + shell: pwsh + run: uv run bridgic-browser type " appended" + + - name: CLI — focus textbox on httpbin form + shell: pwsh + run: | + $snap = uv run bridgic-browser snapshot -i + $snapText = ($snap | Out-String) + $textboxMatch = [regex]::Match($snapText, '(?i)textbox[^\[]*\[ref=([^\]\r\n]+)\]') + if ($textboxMatch.Success) { + $ref = $textboxMatch.Groups[1].Value + Write-Output "Focusing ref: $ref" + uv run bridgic-browser focus $ref + } else { + Write-Output "No textbox found, skipping focus" + } + + - name: CLI — check and uncheck checkbox on httpbin form + shell: pwsh + run: | + $snap = uv run bridgic-browser snapshot -i + $snapText = ($snap | Out-String) + $checkMatch = [regex]::Match($snapText, '(?i)checkbox[^\[]*\[ref=([^\]\r\n]+)\]') + if ($checkMatch.Success) { + $ref = $checkMatch.Groups[1].Value + Write-Output "Checking ref: $ref" + uv run bridgic-browser check $ref + Write-Output "Unchecking ref: $ref" + uv run bridgic-browser uncheck $ref + } else { + Write-Output "No checkbox found in httpbin form, skipping check/uncheck" + } + # ── Shutdown ─────────────────────────────────────────────────────────── - name: CLI — close daemon shell: pwsh @@ -216,3 +419,81 @@ jobs: exit 1 } Write-Output "OK: daemon cleaned up correctly (${elapsed}s)" + + # ── Integration tests (pytest) on Windows ───────────────────────────────── + integration-test-windows: + name: Integration Test (Windows) + runs-on: windows-latest + needs: unit-test-windows + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install uv + uses: astral-sh/setup-uv@v4 + with: + version: "latest" + enable-cache: true + + - name: Cache uv dependencies + uses: actions/cache@v4 + with: + path: | + ~\AppData\Local\uv\cache + .venv + key: windows-integ-py3.11-${{ hashFiles('uv.lock') }} + restore-keys: | + windows-integ-py3.11- + + - name: Cache Playwright browsers + uses: actions/cache@v4 + with: + path: ~\AppData\Local\ms-playwright + key: windows-playwright-${{ hashFiles('uv.lock') }} + restore-keys: | + windows-playwright- + + - name: Install dependencies + run: uv sync --group dev + + - name: Install Playwright browsers + run: uv run playwright install chromium + + - name: Run integration tests + run: uv run pytest tests/ --tb=short --verbose -m integration + + # ── Summary ────────────────────────────────────────────────────────────── + windows-test-summary: + name: Windows Test Summary + runs-on: windows-latest + needs: [unit-test-windows, cli-windows, integration-test-windows] + if: always() + + steps: + - name: Check results + shell: pwsh + run: | + Write-Output "Unit tests: ${{ needs.unit-test-windows.result }}" + Write-Output "CLI tests: ${{ needs.cli-windows.result }}" + Write-Output "Integration tests: ${{ needs.integration-test-windows.result }}" + + if ("${{ needs.unit-test-windows.result }}" -ne "success") { + Write-Error "Unit tests failed" + exit 1 + } + + if ("${{ needs.cli-windows.result }}" -ne "success") { + Write-Output "WARNING: CLI functional tests failed (non-blocking)" + } + + if ("${{ needs.integration-test-windows.result }}" -ne "success") { + Write-Output "WARNING: Integration tests failed (non-blocking)" + } + + Write-Output "All required Windows tests passed" diff --git a/CLAUDE.md b/CLAUDE.md index 50df904..cca38f4 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -49,6 +49,7 @@ bridgic/browser/ │ ├── _snapshot.py # SnapshotGenerator + EnhancedSnapshot + RefData │ ├── _stealth.py # StealthConfig + StealthArgsBuilder (50+ Chrome args) │ ├── _download.py # DownloadManager +│ ├── _video_recorder.py # VideoRecorder (CDP screencast → ffmpeg) │ └── _browser_model.py # Data models ├── tools/ # 67 automation tools (all implemented in _browser.py) │ ├── _browser_tool_set_builder.py # BrowserToolSetBuilder (category/name selection) @@ -74,31 +75,17 @@ bridgic/browser/ 4. **Tools** are bound async methods on the `Browser` class. Pass them to an LLM agent via `BrowserToolSetBuilder`. -### Element reference system - -Refs (like `1f79fe5e`, `8d4b03a9`, …) are generated during snapshot and stored in `EnhancedSnapshot.refs`. They are the stable, accessibility-aware identifiers used by all `*_by_ref` tools. When a page changes, call `get_snapshot()` again to refresh refs. - ### Tool selection -`BrowserToolSetBuilder` supports multiple selection strategies: +`BrowserToolSetBuilder` selects tools by category or name (combinable): ```python -# By category builder = BrowserToolSetBuilder.for_categories(browser, "navigation", "element_interaction") tools = builder.build()["tool_specs"] - -# By tool name -builder = BrowserToolSetBuilder.for_tool_names( - browser, "click_element_by_ref", "input_text_by_ref" -) -tools = builder.build()["tool_specs"] - -# Combine multiple for_* selections -builder1 = BrowserToolSetBuilder.for_categories(browser, "navigation", "element_interaction", "capture") -builder2 = BrowserToolSetBuilder.for_tool_names(browser, "verify_url") -tools = [*builder1.build()["tool_specs"], *builder2.build()["tool_specs"]] ``` +Also available: `for_tool_names(browser, "click_element_by_ref", ...)` and combining multiple builders. See `docs/BROWSER_TOOLS_GUIDE.md` for full examples. + ### Snapshot modes `get_snapshot(interactive=False, full_page=True)`: @@ -110,67 +97,13 @@ tools = [*builder1.build()["tool_specs"], *builder2.build()["tool_specs"]] `StealthConfig` (default enabled) applies Chrome arguments and a JS init script to evade bot detection. The strategy is **mode-aware**: headless mode uses a full 50+ flag set; headed mode uses a minimal ~11 flag set to match real Chrome user behavior. -Key options: -- `use_new_headless=True` (default) — use full Chromium binary with `--headless=new` instead of headless-shell (see below) -- `docker_mode=True` for container environments - -**New headless mode (`use_new_headless=True`, default)**: -When `headless=True` (default) and stealth is enabled, bridgic redirects Playwright to the full Chromium binary to avoid headless-shell's detectable fingerprint differences: - -``` -self._headless=True (user intent) → Playwright receives headless=False → full Chromium binary - build_args() adds --headless=new → no visible window -``` - -Key distinction: -- `Browser._headless` — **user's intent** (hide the window?) -- `options["headless"]` passed to Playwright — **binary selection** (which binary to pick?) - -`StealthArgsBuilder.build_args(headless_intent=True, locale=None)`: -- `headless_intent=True` (default, headless mode): uses `CHROME_STEALTH_ARGS` (50+ flags) + `CHROME_DISABLED_COMPONENTS` (28 features). Injects `--headless=new`, `--hide-scrollbars`, `--mute-audio`, and `--blink-settings=...` explicitly (Playwright normally adds these when `headless=True`, but since we pass `headless=False`, we add them manually). -- `headless_intent=False` (headed mode): uses `CHROME_STEALTH_ARGS_HEADED` (~11 flags) + `CHROME_DISABLED_COMPONENTS_HEADED` (3 features). Uses `--lang={locale}` (not hardcoded `en-US`). Never adds `--headless=new`. Goal: fingerprint indistinguishable from a real Chrome user — excessive disable-* flags in headed mode create a detectable anomaly and can break Cloudflare Turnstile's AJAX challenge requests. - -This redirect is skipped when: -- `StealthConfig.use_new_headless=False` (opt-out to restore old headless-shell) -- System Chrome is used (`channel` or `executable_path` set) — system Chrome manages its own binary - -**Headed mode auto-switches to system Chrome**: -Playwright's bundled "Google Chrome for Testing" binary is blocked by Google OAuth (login rejected as "unsafe browser") and shows a "test" label in the macOS Dock. In headed mode, when stealth is enabled and system Chrome is detected (`_detect_system_chrome()`), bridgic automatically sets `channel="chrome"` to use the real system Chrome binary. This is transparent to the user. The headed stealth args (~11 flags) are still applied; `--test-type=` is added to suppress Chrome's "unsupported flag" warning banner for `--disable-blink-features=AutomationControlled`. If system Chrome is not installed, bridgic falls back to Chrome for Testing. - -**JS init script** (`_STEALTH_INIT_SCRIPT_TEMPLATE` in `_stealth.py`) — **headless mode only**. Skipped entirely in headed mode (`self._headless=False`) because `context.add_init_script()` runs in ALL frames including Cloudflare Turnstile's challenge iframe; patching `window.chrome` (`configurable:false`), `navigator.permissions.query`, and WebGL prototype inside the iframe causes detectable API inconsistencies that fail the challenge. Playwright CLI injects nothing and passes Turnstile; bridgic matches that behaviour in headed mode. +Key decisions and constraints: +- **New headless redirect** (`use_new_headless=True`, default): bridgic passes `headless=False` to Playwright (selecting the full Chromium binary) and manually adds `--headless=new` + scrollbar/audio/blink flags. `Browser._headless` = user's intent; `options["headless"]` = binary selection. +- **Headed mode auto-switches to system Chrome**: Playwright's bundled "Chrome for Testing" is blocked by Google OAuth. When stealth is enabled in headed mode and system Chrome is detected, bridgic sets `channel="chrome"` automatically. `--test-type=` suppresses the "unsupported flag" warning banner. +- **JS init script is headless-only**: skipped in headed mode because `add_init_script()` runs in ALL frames including Cloudflare Turnstile's challenge iframe — patching `window.chrome`/`navigator.permissions.query`/WebGL inside it causes detectable inconsistencies that fail the challenge. +- **Anti-toString (`_mkNative`)**: all patched functions return `"function name() { [native code] }"` via intercepted `Function.prototype.toString` to defeat DataDome/PerimeterX/Cloudflare `.toString()` probing. -When active (headless mode), patches these navigator/window properties before any page script runs: - -**Anti-toString-detection (`_mkNative` framework)**: -All patched functions are registered in a `WeakSet` (`_nativeFns`) via `_mkNative(fn, name)`. `Function.prototype.toString` is itself intercepted to return `"function foo() { [native code] }"` for any registered function. This closes the entire class of "call `.toString()` on a function to detect monkey-patching" attacks used by DataDome, PerimeterX, and Cloudflare bot detectors. - -```javascript -const _nativeFns = new WeakSet(); -const _nativeFnNames = new WeakMap(); -const _mkNative = (fn, name) => { _nativeFns.add(fn); _nativeFnNames.set(fn, name); return fn; }; -Function.prototype.toString = _mkNative(function toString() { - if (_nativeFns.has(this)) return `function ${_nativeFnNames.get(this) ?? this.name}() { [native code] }`; - return _origFnToString.call(this); -}, 'toString'); -``` - -**Patched properties**: -- `navigator.webdriver` → **conditionally** `undefined`; checks `Object.getOwnPropertyDescriptor(Navigator.prototype, 'webdriver')` first and patches the prototype descriptor. Falls back to instance property only if the prototype has no descriptor but the value is non-undefined. Avoids creating an own-property (which makes `'webdriver' in navigator` = true — detectable in real Chrome where the property is absent). -- `navigator.plugins` / `navigator.mimeTypes` → realistic PDF Viewer entries (5 plugins, 2 MIME types); each plugin holds its own per-plugin mime copies so `enabledPlugin` refs are correct -- `navigator.languages` → derived from `Browser(locale=...)` to keep `navigator.language === navigator.languages[0]` (e.g. `["zh-CN", "zh", "en"]` for `locale="zh-CN"`); defaults to `["en-US", "en"]` -- `window.chrome` → complete object with `runtime`, `csi()`, `loadTimes()` (all wrapped with `_mkNative`) -- `navigator.permissions.query` → returns `"default"` for notifications (not `"denied"`); wrapped with `_mkNative` -- `window.outerWidth/Height` → matches `innerWidth/Height` when zero (guard for edge cases; with `--headless=new` + `screen` context option these are already correctly set by Chrome) -- `navigator.deviceMemory` → `8` (headless environments may return `undefined`) -- `navigator.hardwareConcurrency` → `8` when value is 0 or 1 (headless may report fewer cores) -- `navigator.connection` → `{ effectiveType: '4g', downlink: 10, rtt: 100, saveData: false }` when absent -- `WebGLRenderingContext` / `WebGL2RenderingContext` → `getParameter(37445/37446)` **conditionally** returns `'Intel Inc.'` / `'Intel Iris OpenGL Engine'` only when the real vendor contains `'Google'` or `'SwiftShader'` (masks SwiftShader which is a well-known bot signal). On headed Apple Silicon Mac the real `'Apple Inc.'` value is preserved so the WebGL fingerprint stays consistent with DPI, Canvas, and font rendering signals. `getParameter` is wrapped with `_mkNative`. -- `document.hasFocus()` → always returns `true` (headless tabs return `false` by default; Cloudflare and DataDome probe this); wrapped with `_mkNative` -- `document.hidden` → always `false` (via `Object.defineProperty`) -- `document.visibilityState` → always `'visible'` (via `Object.defineProperty`); headless tabs default to `'hidden'` which is a strong bot signal -- `Notification.permission` → guarded: only patched if `Notification` exists and its permission is `'denied'`; returns `'default'` - -`get_init_script(locale=None)` accepts the locale and performs the `__BRIDGIC_LANGS__` substitution before returning the script. Called from `_browser.py:_start()` with `self._locale` only when `self._headless=True`. +For the full list of patched navigator/window properties, see [`docs/INTERNALS.md` — Stealth JS Init Script](docs/INTERNALS.md#stealth-js-init-script--patched-properties). ### CLI architecture @@ -186,339 +119,26 @@ bridgic-browser click @8d4b03a9 │◄─── JSON response ──────────────────────────── dispatch → tool fn() ``` -Key implementation details: -- **`_client.py`**: `send_command()` auto-starts the daemon if no socket exists. `_spawn_daemon()` uses `select.select()` + `os.read()` for the 30-second ready timeout (avoids blocking `proc.stdout.read()`). `start_if_needed=False` prevents auto-start for the `close` command. -- **`_daemon.py`**: `run_daemon()` creates a `Browser()` instance directly (lazy start — Playwright does **not** launch immediately; `Browser.__init__` auto-loads config from `_config.py`), writes `BRIDGIC_DAEMON_READY` to stdout, and serves one JSON command per connection. The browser's Playwright process starts on the first command that calls `_ensure_started()` (e.g. `navigate_to`). `asyncio.wait_for(reader.readline(), timeout=60)` prevents hanging on idle connections. Signal handling uses `loop.add_signal_handler()` (asyncio-safe). -- **`_commands.py`**: 67 Click commands in 15 sections via `SectionedGroup`. `scroll` uses `--dy`/`--dx` options (not positional) to support negative values. `screenshot`/`pdf`/`upload`/`storage-save`/`storage-load`/`trace-stop` call `os.path.abspath()` on the client side before sending (daemon cwd may differ). `snapshot` supports `-i`/`--interactive`, `-f/-F`/`--full-page/--no-full-page`, `-l`/`--limit` (default 10000), and `-s`/`--file` (overflow file path); it delegates to `browser.get_snapshot_text()`. When content exceeds limit or `--file` is provided, full snapshot is saved to a file (auto-generated under `~/.bridgic/bridgic-browser/snapshot/` when over limit, or the specified path). - - **`wait`**: argument is named `SECONDS_OR_TEXT`. When the argument parses as a float it always takes the time-wait path (`wait_seconds`); when it is a string it takes the text-wait path (`text` or `text_gone` with `--gone`). The `--gone` flag is **only** meaningful with a string argument — a numeric argument with `--gone` is ignored (number always → time). Unit is **seconds**, not milliseconds. This is documented explicitly in the command docstring and in `_cli_catalog.py` to prevent LLM confusion. Text search traverses **all frames** (main + iframes) via polling, so text inside iframes is detectable. - - **`type`**: docstring explicitly states the text goes into the **currently focused element** and that the user must `click` or `focus` the target first. - - **`mouse-move` / `mouse-click` / `mouse-drag`**: coordinates are **viewport pixels from the top-left corner**; documented in both docstrings and `_cli_catalog.py`. - - **`eval-on`**: CODE must be an arrow function or named function that receives the element as its argument (e.g. `"(el) => el.textContent"`); this calling convention is documented in the docstring with examples. -- **Config loading**: `Browser.__init__` auto-loads config via `_config.py:_load_config_sources()`. The `--headed` CLI flag merges `{"headless": false}` into `BRIDGIC_BROWSER_JSON` before spawning the daemon. The `--clear-user-data` CLI flag merges `{"clear_user_data": true}` into `BRIDGIC_BROWSER_JSON`. The `--cdp` CLI flag resolves the CDP input (port/url/auto) via `resolve_cdp_input()` on the client side, then passes the resolved `ws://` URL to the daemon via the `BRIDGIC_CDP` env var (overriding any inherited shell value). `run_daemon()` reads `BRIDGIC_CDP` and passes `cdp_url=...` to `Browser()` after running it through `resolve_cdp_input()` (a no-op on `ws://`/`wss://` inputs). CDP resolution failure raises `RuntimeError` — no silent fallback to launch mode. -- **`close` command fast-path**: the daemon calls `browser.inspect_pending_close_artifacts()` to pre-allocate a session dir, trace path, and video paths (all grouped under `~/.bridgic/bridgic-browser/tmp/close--/`), responds to the client immediately with those paths, then sets `stop_event`. Actual `browser.close()` runs after the client disconnects. After close, `_write_close_report()` writes `close-report.json` in the session dir with status (`"success"`, `"success_with_timeouts"`, `"error"`, or `"timeout"`), artifact paths, and any errors. -- **Daemon cleanup ownership guard**: after `browser.close()` finishes, `run_daemon()` reads the run-info file and compares its `pid` field to `os.getpid()` before calling `transport.cleanup()` / `remove_run_info()`. This prevents the outgoing daemon from deleting the new daemon's socket when a `close` is followed immediately by a new command (which starts a new daemon before the old one's shutdown completes). If the run-info is gone (`None`) the old daemon is still the owner and cleans up normally. - -Socket path: `BRIDGIC_SOCKET` env var (default `~/.bridgic/bridgic-browser/run/bridgic-browser.sock`). -The directory is created with `0o700` permissions on first use. Users upgrading from an older version that used `/tmp/bridgic-browser.sock` should stop any running daemon first (`bridgic-browser close`) before upgrading. - -Snapshot overflow: `get_snapshot_text(limit=10000, file=None, ...)` — when content exceeds `limit` or `file` is explicitly provided, full snapshot is written to `file` (auto-generated if `None` and over limit) and only a notice with the file path is returned. `limit` must be ≥ 1. `file` is validated: empty/whitespace-only paths, null bytes, and existing directories raise `InvalidInputError`. - -## Key Implementation Details & Playwright Internals - -### Two Co-existing Ref Systems (Foundation for Understanding the Entire Chain) - -bridgic has **two distinct ref systems** that must not be confused: - -| | bridgic ref | playwright_ref | -|---|---|---| -| Example | `"8d4b03a9"` | `"e369"` / `"f1e5"` | -| Generated in | `_snapshot.py:_compute_stable_ref()` | Playwright injected script `computeAriaRef()` | -| Format | SHA-256(namespace+role+name+frame_path+nth) first 4 bytes hex | `{refPrefix}e{lastRef}` incrementing integer | -| Stability | **Stable across snapshots** (same element, same ref) | **Resets after each snapshot** (valid only within current snapshotForAI) | -| Purpose | Exposed to LLM / tool calls / CLI | O(1) DOM pointer lookup for aria-ref fast path | -| Stored in | `EnhancedSnapshot.refs: Dict[str, RefData]` | `RefData.playwright_ref` | - ---- - -### Playwright Source: Ref Generation Rules - -All source paths are under `.venv/lib/python3.10/site-packages/playwright/driver/package/lib/`. - -#### 1. `lastRef` Counter and `computeAriaRef()` -**File**: `generated/injectedScriptSource.js` (this script is injected into each frame; each frame has its own independent instance) - -```javascript -// injectedScriptSource.js — module-level variable in injected script (independent per frame) -var lastRef = 0; - -function computeAriaRef(ariaNode, options) { - if (options.refs === "none") return; - // when mode="ai", refs="interactable" — only assigns refs to visible elements that receive pointer events - if (options.refs === "interactable" && (!ariaNode.box.visible || !ariaNode.receivesPointerEvents)) - return; - - let ariaRef = ariaNode.element._ariaRef; // cache on the DOM element - if (!ariaRef || ariaRef.role !== ariaNode.role || ariaRef.name !== ariaNode.name) { - // cache miss (first time / role or name changed) → generate new ref - ariaRef = { - role: ariaNode.role, - name: ariaNode.name, - ref: (options.refPrefix ?? "") + "e" + ++lastRef // ← core format - }; - ariaNode.element._ariaRef = ariaRef; // write back to DOM element - } - ariaNode.ref = ariaRef.ref; -} -``` - -**Key rules**: -- `lastRef` is a module-level integer that **monotonically increases throughout the lifetime of the injected script instance for the same frame and is never reset** -- If role+name is unchanged for the same element, **the previous ref is reused** (`element._ariaRef` cache), `lastRef` is not incremented -- Ref format: `{refPrefix}e{lastRef}`, e.g. `"e1"`, `"e5"`, `"f1e3"`, `"f2e7"` -- `refPrefix` is passed by the caller (see next section) - -#### 2. Source of `refPrefix`: frame.seq -**File**: `server/page.js:825` (`snapshotFrameForAI` function) - -```javascript -// page.js — snapshotFrameForAI() -injectedScript.evaluate((injected, options) => { - return injected.incrementalAriaSnapshot(node, { mode: "ai", ...options }); -}, { - refPrefix: frame.seq ? "f" + frame.seq : "", // ← main frame seq=0 → "", child frame seq=N → "fN" - track: options.track -}); -``` - -**File**: `server/frames.js:368` (Frame constructor) - -```javascript -// frames.js — Frame constructor -this.seq = page.frameManager.nextFrameSeq(); -// main frame seq=0; subsequent frames increment: 1, 2, 3... -// seq is not "the Nth iframe" — it is a globally unique sequence number -``` - -**Format summary**: -- Main frame (seq=0): `refPrefix=""` → refs are `"e1"`, `"e2"`, … -- Child frame (seq=1): `refPrefix="f1"` → refs are `"f1e1"`, `"f1e2"`, … -- Child frame (seq=2): `refPrefix="f2"` → refs are `"f2e1"`, `"f2e3"`, … -- **Note**: seq is a page-level global counter, unrelated to iframe position in the DOM - -#### 3. Building the `snapshot.elements` Map -**File**: `generated/injectedScriptSource.js` (the `visit` callback inside `generateAriaTree`) - -```javascript -// injectedScriptSource.js — generateAriaTree > visit() -if (childAriaNode.ref) { - snapshot.elements.set(childAriaNode.ref, element); // ref → DOM Element - snapshot.refs.set(element, childAriaNode.ref); // DOM Element → ref (reverse mapping) - if (childAriaNode.role === "iframe") - snapshot.iframeRefs.push(childAriaNode.ref); // iframes collected separately for recursive child snapshots -} -``` - -#### 4. Writing to `_lastAriaSnapshotForQuery` -**File**: `generated/injectedScriptSource.js` (`InjectedScript.incrementalAriaSnapshot()` method) - -```javascript -// injectedScriptSource.js — InjectedScript class -incrementalAriaSnapshot(node, options) { - const ariaSnapshot = generateAriaTree(node, options); - // ... - this._lastAriaSnapshotForQuery = ariaSnapshot; // ← overwritten after each snapshot - return { full, incremental, iframeRefs: ariaSnapshot.iframeRefs }; -} -``` - -**Key**: `_lastAriaSnapshotForQuery` is a property on each frame's injected script instance and is **completely independent per frame**. The L1 frame's injected script only holds L1's `elements` Map (with keys like `"f1e1"`). - ---- - -### Playwright Source: Ref Lookup Rules - -#### 5. aria-ref Engine: `_createAriaRefEngine()` -**File**: `generated/injectedScriptSource.js` (registered in the `InjectedScript` constructor) - -```javascript -// injectedScriptSource.js — _createAriaRefEngine() -_createAriaRefEngine() { - const queryAll = (root, selector) => { - const result = this._lastAriaSnapshotForQuery?.elements?.get(selector); - // selector = the raw string after "aria-ref=", e.g. "e369" or "f1e5" - return result && result.isConnected ? [result] : []; - // isConnected check: returns empty if element has been removed from DOM (stale case) - }; - return { queryAll }; -} -``` - -O(1) Map lookup; `isConnected` ensures stale refs return empty instead of throwing. - -#### 6. `_jumpToAriaRefFrameIfNeeded()`: Cross-frame Routing -**File**: `server/frameSelectors.js:85` - -```javascript -// frameSelectors.js — FrameSelectors class -_jumpToAriaRefFrameIfNeeded(selector, info, frame) { - if (info.parsed.parts[0].name !== "aria-ref") return frame; - const body = info.parsed.parts[0].body; // "f1e5" or "e369" - const match = body.match(/^f(\d+)e\d+$/); // only matches child frame refs (with "f" prefix) - if (!match) return frame; // main frame ref → no jump - const frameSeq = +match[1]; // extract seq number - const jumptToFrame = this.frame._page.frameManager.frames() - .find(frame2 => frame2.seq === frameSeq); // global linear search - if (!jumptToFrame) - throw new InvalidSelectorError(...); - return jumptToFrame; -} -``` - -**Important**: `_jumpToAriaRefFrameIfNeeded` switches the execution target frame **before** running `queryAll`, so the query runs in the correct frame's injected script context (which holds the corresponding key in its `_lastAriaSnapshotForQuery`). - -**This means**: from an element resolution perspective, both `page.locator("aria-ref=f1e5")` and `frame_locator("iframe").nth(0).locator("aria-ref=f1e5")` correctly find the L1 frame element, because `_jumpToAriaRefFrameIfNeeded` auto-routes. However, `locator.evaluate()`'s JS execution context is **not affected** — it always runs in the frame that **owns the locator's scope** (see below). - ---- - -### bridgic Source: Ref Generation Rules - -#### 7. Generating the bridgic ref (stable ID) -**File**: `bridgic/browser/session/_snapshot.py` - -```python -# _snapshot.py:394 -_REF_NAMESPACE = "bridgic-browser-v1" - -# _snapshot.py:422 — _compute_stable_ref() -@staticmethod -def _compute_stable_ref(role, name, frame_path, nth) -> str: - frame_str = ",".join(str(x) for x in frame_path) if frame_path else "" - raw = f"{_REF_NAMESPACE}\x1f{role}\x1f{name or ''}\x1f{frame_str}\x1f{nth}" - # \x1f (ASCII Unit Separator) used as field delimiter — cannot appear in HTML accessible names - digest = hashlib.sha256(raw.encode("utf-8")).digest() - return digest[:4].hex() # 8 hex characters, e.g. "8d4b03a9" -``` - -**Stability guarantee**: as long as the four fields role, name, frame_path, and nth remain unchanged, the same element always gets the same ref ID across snapshots — the LLM can use it persistently across snapshots. - -#### 8. Extracting and Storing `playwright_ref` -**File**: `bridgic/browser/session/_snapshot.py` - -```python -# _snapshot.py:374 -_REF_EXTRACT_PATTERN = re.compile(r'\[ref=([a-zA-Z0-9]+)\]') - -# _snapshot.py:1400-1491 — _process_page_snapshot_for_ai() parsing loop -# Extract before clean_suffix removes [ref=...]: -_pw_ref_match = ref_extract_pattern.search(suffix) if suffix else None -playwright_ref_for_element = _pw_ref_match.group(1) if _pw_ref_match else None - -# Store in RefData: -refs[ref] = RefData( - ... - playwright_ref=playwright_ref_for_element, # Playwright's "e369" / "f1e5" -) -``` - -`playwright_ref` is extracted from the `[ref=...]` suffix in Playwright's snapshot text lines and is only valid for the lifetime of the current `snapshotForAI` call. - -#### 9. Generating `frame_path` -**File**: `bridgic/browser/session/_snapshot.py:1229` (parsing loop) - -```python -# _snapshot.py — _process_page_snapshot_for_ai() -_iframe_local_counters: Dict[tuple, int] = {} # key=parent path tuple, value=number of child iframes seen so far -# ... -# When an iframe node is encountered: -parent_path = tuple(iframe_stack[-1][1]) if iframe_stack else () -local_idx = _iframe_local_counters.get(parent_path, 0) -_iframe_local_counters[parent_path] = local_idx + 1 -iframe_stack.append((original_depth, list(parent_path) + [local_idx])) -``` - -`frame_path` records **the per-level local indices from the main frame to the target iframe** (same-level iframes start from index 0), and is unrelated to `frame.seq`. - ---- - -### bridgic Source: Ref Lookup Rules - -#### 10. Two-phase Lookup in `get_element_by_ref()` -**File**: `bridgic/browser/session/_browser.py` - -``` -Input: bridgic ref (e.g. "8d4b03a9") - ↓ -self._last_snapshot.refs.get(ref) → RefData - ↓ -Phase 1: aria-ref fast path (O(1)) - Condition: ref_data.playwright_ref is non-empty (i.e. no re-navigation since last snapshot) - Implementation: - scope = page - for nth in ref_data.frame_path: # build scope chain following frame_path - scope = scope.frame_locator("iframe").nth(nth) - locator = scope.locator(f"aria-ref={ref_data.playwright_ref}") - count = await locator.count() - count == 1 → return directly (Playwright's _jumpToAriaRefFrameIfNeeded guarantees routing) - count == 0 → stale, fall through - Exception → engine unavailable, fall through - -Phase 2: CSS rebuild path (get_locator_from_ref_async) - Location: _snapshot.py:1830 - Strategy priority (by signal strength): - 1) get_by_role(role, name=name, exact=True) ← most elements - 2) get_by_role(role).filter(has_text=...) ← ROLE_TEXT_MATCH_ROLES - 3) get_by_text(text, exact=True) ← TEXT_LEAF_ROLES (text pseudo-role) - 4) STRUCTURAL_NOISE_ROLES with match_text ← CSS-scoped + filter(has_text) + nth - 5) STRUCTURAL_NOISE_ROLES child-anchor path ← unnamed noise with no text - 6) get_by_role(role) ← bare role fallback when no name - scope: chain frame_locator("iframe").nth(n) per frame_path level first - nth: applied only when locator key space matches role:name key space (excluding STRUCTURAL_NOISE/TEXT_LEAF) - -STRUCTURAL_NOISE child-anchor path (strategy 5) detail: - Applies to: unnamed generic/group/none/presentation with no stored text - Sub-strategies (tried in order): - a) Find text-leaf child (role='text', parent_ref==ref) → CSS-scoped container locator (STRUCTURAL_NOISE_CSS) - b) Find named STRUCTURAL_NOISE child (parent_ref==ref, role in STRUCTURAL_NOISE_ROLES, name non-empty) - → scope.locator(STRUCTURAL_NOISE_CSS_NAMED).filter(has_text=name).locator('..') - Note: locator('..') is auto-detected as XPath parent by Playwright (selectorParser.js:159) - Note: STRUCTURAL_NOISE_CSS_NAMED adds span:not([role]) vs STRUCTURAL_NOISE_CSS because - the child may be a that Playwright maps to 'generic' role. - nth is NOT applied; the parent is located structurally via the child. - c) fallback: get_by_role(role) (returns 0 results for implicit generic — last resort) -``` - ---- - -### Covered-element Check - -**6 locations**: `_click_checkable_target` (`_browser.py:239`), `click_element_by_ref` (`~3151`), `hover_element_by_ref` (`~3393`), `check_checkbox_or_radio_by_ref` (`~3645`), `uncheck_checkbox_by_ref` (`~3751`), `double_click_element_by_ref` (`~3847`) - -```javascript -(el) => { - if (window.parent !== window) return false; // ← skip directly for iframe elements - const t = document.elementFromPoint(cx, cy); - return !!t && t !== el && !el.contains(t) && !t.contains(el); -} -``` - -**Do not change to `window.frameElement !== null`**: Chrome returns `null` for `window.frameElement` inside iframes under the `file://` protocol (security policy), causing false positives. `window.parent !== window` is a pure object comparison that is reliable across all protocols and origins. - -**Why iframe elements must be skipped**: `bounding_box()` returns main-viewport coordinates, while `document.elementFromPoint(cx, cy)` inside the iframe JS context uses iframe-local coordinates. The coordinate systems differ, so `elementFromPoint` finds the wrong element (typically the child iframe node), triggering a false "covered" report. After skipping, `locator.click()` lets Playwright handle coordinate transformation internally. - ---- - -### Nested iframes and frame_path - -`RefData.frame_path: Optional[List[int]]`: -- `None` → main frame -- `[0]` → first top-level iframe (local index 0) -- `[0, 1]` → second iframe inside the first top-level iframe - -All three locator-building code paths (aria-ref fast path, `get_locator_from_ref_async`, recovery path) use the same chained call: -```python -scope = page -for local_nth in frame_path: - scope = scope.frame_locator("iframe").nth(local_nth) -``` - -`_iframe_local_counters: Dict[tuple, int]` (`_snapshot.py:1229`) tracks the iframe count under each parent path, ensuring per-level nth values are independent across multiple nesting levels. - ---- - -### Interactive Element Detection — Small Icon Rule - -`_is_element_interactive()` (`_snapshot.py`) rule 9: small icon (10–50 px) is treated as interactive only when it carries **strong semantic signals**: +Key behaviors: +- **Lazy start**: daemon creates `Browser()` but Playwright doesn't launch until the first command that needs a page (e.g. `navigate_to`). +- **Config flags**: `--headed` merges `{"headless": false}` into `BRIDGIC_BROWSER_JSON`; `--clear-user-data` merges `{"clear_user_data": true}`; `--cdp` resolves CDP input via `resolve_cdp_input()` on the client side and passes the `ws://` URL to the daemon via `BRIDGIC_CDP` env var. +- **Close fast-path**: daemon pre-allocates artifact paths, responds immediately, then runs `browser.close()` after the client disconnects. `close-report.json` records status and artifact paths. +- **Cleanup ownership guard**: after close, the daemon compares the run-info `pid` to `os.getpid()` before deleting the socket — prevents a new daemon's socket from being deleted by an old daemon still shutting down. +- **Socket path**: `BRIDGIC_SOCKET` env var (default `~/.bridgic/bridgic-browser/run/bridgic-browser.sock`), directory created with `0o700` permissions. +For detailed implementation notes on client/daemon/commands, see [`docs/INTERNALS.md` — CLI Architecture](docs/INTERNALS.md#cli-architecture--detailed-implementation). -- `data-action` attribute → explicit author intent -- `aria-label` → screen-reader accessible name +## Ref System Internals -**`classAndId` is intentionally excluded**: almost every element carries a CSS class, so including it causes false positives for purely decorative elements (badges, avatars, dividers) that happen to be small. `cursor=pointer` is covered by rule 10 (separate check) and is a stronger signal. +bridgic has **two co-existing ref systems**: the stable bridgic ref (`"8d4b03a9"`, SHA-256 based, stable across snapshots) and the ephemeral playwright_ref (`"e369"`, per-snapshot incrementing integer, used for O(1) DOM lookup). `get_element_by_ref()` uses a **two-phase lookup**: first tries the aria-ref fast path (O(1) Map lookup via playwright_ref), then falls back to a CSS rebuild path with 6 strategy tiers. All paths chain `frame_locator("iframe").nth(n)` per `frame_path` level for iframe support. -Impact on `get_snapshot(interactive=True)`: a small icon with only a CSS class (no `data-action`, no `aria-label`, no `cursor:pointer`) will **not** appear in the interactive snapshot. If an icon is missing, add `data-action` or `aria-label` to the element. +Key constraints: +- `frame_path` (per-level local indices) is unrelated to Playwright's `frame.seq` (page-level global counter). +- **Covered-element check** uses `window.parent !== window` (not `window.frameElement !== null`) to detect iframes — the latter returns `null` under `file://` protocol. Iframe elements skip the check entirely because `bounding_box()` returns main-viewport coordinates while `elementFromPoint()` uses iframe-local coordinates. +- **Small icon rule**: icons 10–50 px are interactive only with `data-action` or `aria-label` (not `classAndId` — too many false positives). ---- +For complete source-level documentation of Playwright internals, ref generation, lookup strategies, and iframe handling, see [`docs/INTERNALS.md`](docs/INTERNALS.md). -### Debug Logging +## Debug Logging ```bash BRIDGIC_LOG_LEVEL=DEBUG bridgic-browser snapshot -i @@ -526,14 +146,9 @@ BRIDGIC_LOG_LEVEL=DEBUG bridgic-browser click ``` Key DEBUG log points (`_browser.py`): -- `[get_element_by_ref] aria-ref fast-path hit: ref=... playwright_ref=... frame_path=...` -- `[get_element_by_ref] aria-ref stale (count=N), falling through to CSS: ...` -- `[get_element_by_ref] aria-ref exception (...), falling through to CSS: ...` -- `[get_element_by_ref] CSS path: ref=... role=... name=... nth=... frame_path=...` -- `[click_element_by_ref] covered at (x, y), clicking intercepting element` -- `_click_checkable_target: covered at (x, y), clicking intercepting element` - ---- +- `[get_element_by_ref] aria-ref fast-path hit/stale/exception` — ref lookup phase transitions +- `[get_element_by_ref] CSS path: ref=... role=... name=... nth=... frame_path=...` — fallback strategy +- `[click_element_by_ref] covered at (x, y), clicking intercepting element` — covered-element redirect ## Testing notes diff --git a/README.md b/README.md index 78da96a..e0644c7 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,7 @@ bridgic-browser open https://example.com --cdp auto | `9222` | Bare port number -- queries `localhost:9222/json/version` to discover the WebSocket URL | | `ws://...` / `wss://...` | Direct WebSocket URL (raw CDP or Playwright WS protocol), passed through as-is | | `http://host:port` | HTTP discovery endpoint -- queries `/json/version` on that host | -| `auto` | Auto-scan local Chrome/Chromium/Brave/Edge/Arc profile directories (+ Canary variants) for an active `DevToolsActivePort` file | +| `auto` | Auto-scan local Chrome/Chromium/Brave profile directories (+ Canary variants) for an active `DevToolsActivePort` file | **Closing behavior:** `bridgic-browser close` disconnects from the remote browser but does **not** terminate the Chrome process. The browser keeps running and can be reconnected. diff --git a/README_zh.md b/README_zh.md index 8f117bc..ed33e30 100644 --- a/README_zh.md +++ b/README_zh.md @@ -230,7 +230,7 @@ bridgic-browser open https://example.com --cdp auto | `9222` | 端口号 -- 向 `localhost:9222/json/version` 查询 WebSocket URL | | `ws://...` / `wss://...` | 直接 WebSocket URL(原始 CDP 或 Playwright WS 协议),原样传递 | | `http://host:port` | HTTP 发现端点 -- 向该主机的 `/json/version` 查询 | -| `auto` | 自动扫描本地 Chrome/Chromium/Brave/Edge/Arc 配置目录(含 Canary 变体),查找活跃的 `DevToolsActivePort` 文件 | +| `auto` | 自动扫描本地 Chrome/Chromium/Brave 配置目录(含 Canary 变体),查找活跃的 `DevToolsActivePort` 文件 | **关闭行为:** `bridgic-browser close` 会断开与远程浏览器的连接,但**不会**终止 Chrome 进程。浏览器继续运行,可以重新连接。 diff --git a/bridgic/browser/_cli_catalog.py b/bridgic/browser/_cli_catalog.py index 4a9ed53..09e3445 100644 --- a/bridgic/browser/_cli_catalog.py +++ b/bridgic/browser/_cli_catalog.py @@ -144,8 +144,8 @@ "trace-start": (ToolCategory.DEVELOPER, "Start browser tracing [--no-screenshots] [--no-snapshots]"), "trace-stop": (ToolCategory.DEVELOPER, "Stop tracing and save to PATH (.zip)"), "trace-chunk": (ToolCategory.DEVELOPER, "Add a named chunk marker to the current trace"), - "video-start": (ToolCategory.DEVELOPER, "Start video recording [--width W] [--height H]"), - "video-stop": (ToolCategory.DEVELOPER, "Stop video recording [PATH]"), + "video-start": (ToolCategory.DEVELOPER, "Start single-stream video recording on the active tab [--width W] [--height H]"), + "video-stop": (ToolCategory.DEVELOPER, "Stop video recording and save one .webm [PATH]"), "close": (ToolCategory.LIFECYCLE, "Close the browser session"), "resize": (ToolCategory.LIFECYCLE, "Resize the browser viewport to WIDTH x HEIGHT"), } diff --git a/bridgic/browser/cli/_commands.py b/bridgic/browser/cli/_commands.py index e1a2913..b020a70 100644 --- a/bridgic/browser/cli/_commands.py +++ b/bridgic/browser/cli/_commands.py @@ -985,11 +985,11 @@ def cmd_trace_chunk(title: str) -> None: @click.option("--width", default=None, type=int, help="Video width in pixels.") @click.option("--height", default=None, type=int, help="Video height in pixels.") def cmd_video_start(width: int | None, height: int | None) -> None: - """Start video recording on ALL pages in the context. + """Start single-stream video recording on the active tab. - Mirrors the Playwright CLI: one start call records every tab, - including tabs opened afterwards. Each page gets its own .webm file - returned by ``video-stop``. + Only one recorder is created. When the active tab changes, bridgic + hot-switches the CDP screencast source and keeps writing to the same + continuous ``.webm`` file. """ try: _ok(send_command("video_start", {"width": width, "height": height}, start_if_needed=False)) @@ -1000,14 +1000,12 @@ def cmd_video_start(width: int | None, height: int | None) -> None: @cli.command("video-stop", context_settings=CONTEXT_SETTINGS) @click.argument("path", required=False, default=None) def cmd_video_stop(path: str | None) -> None: - """Stop video recording and save files. - - PATH is optional. When omitted, recorded files stay in the temp dir. - When given: - * a directory → each recording is saved inside it - * a file path → first recording uses that exact path; extra - recordings from additional tabs get a ``-1``, ``-2`` … suffix - inserted before the ``.webm`` extension. + """Stop video recording and save one ``.webm`` file. + + PATH is optional. When omitted, the recorded file stays in the temp + dir. When given, PATH may be either: + * a directory → bridgic auto-generates a single filename inside it + * a file path → bridgic saves exactly one recording to that path """ try: abs_path = os.path.abspath(path) if path else None diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 1ec0cbd..584e135 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -1575,7 +1575,7 @@ async def _ensure_started(self) -> None: # Timeout (seconds) applied to individual page.close() calls during # shutdown so that a hung beforeunload handler cannot block forever. _PAGE_CLOSE_TIMEOUT = 5.0 - _TRACE_STOP_TIMEOUT = 10.0 + _TRACE_STOP_TIMEOUT = 30.0 _CONTEXT_CLOSE_TIMEOUT = 15.0 _BROWSER_CLOSE_TIMEOUT = 15.0 _PLAYWRIGHT_STOP_TIMEOUT = 15.0 @@ -4092,9 +4092,9 @@ async def input_text_by_ref( """Input text into a specific element identified by its snapshot ref. This is the primary text-input tool for interacting with form fields by - ref. Unlike :meth:`type_text` and :meth:`insert_text` which type into - the currently focused element, this method targets the element directly - via its ref and handles both visible and hidden (shadow-DOM) inputs. + ref. Unlike :meth:`type_text` which types into the currently focused + element, this method targets the element directly via its ref and + handles both visible and hidden (shadow-DOM) inputs. Comparison: @@ -4103,8 +4103,6 @@ async def input_text_by_ref( - :meth:`type_text` — no ref; types into focused element character-by-character via ``keyboard.press``; triggers per-character ``keydown``/``keyup`` events (needed for autocomplete widgets). - - :meth:`insert_text` — no ref; pastes into focused element in one shot - without key events; fastest for long strings. Parameters ---------- @@ -5448,8 +5446,7 @@ async def type_text(self, text: str, submit: bool = False) -> str: Each character fires ``keydown``, ``keypress``, and ``keyup`` events, which is required for fields with per-keystroke handlers such as - autocomplete widgets. This is slower than :meth:`insert_text` for - long strings. + autocomplete widgets. An element must already be focused before calling this method (e.g. via :meth:`focus_element_by_ref` or by clicking a field first). @@ -5460,8 +5457,6 @@ async def type_text(self, text: str, submit: bool = False) -> str: hidden inputs; **preferred** for form filling. - ``type_text`` — no ref; requires a pre-focused element; fires per- character key events; use when those events are needed. - - :meth:`insert_text` — no ref; pastes in one shot without key events; - fastest for long strings. Parameters ---------- @@ -5670,57 +5665,6 @@ async def fill_form( logger.error(f"[fill_form] {error_msg}") _raise_operation_error(error_msg) - async def insert_text(self, text: str) -> str: - """Insert text at the current cursor position without per-character key events. - - Pastes the text directly into the currently focused element. Unlike - :meth:`type_text`, no ``keydown``/``keyup`` events are fired per character, - so it is significantly faster for long strings but will not trigger - handlers that listen to individual keystrokes (e.g., autocomplete widgets - that react to ``onkeydown``). - - An element must already be focused before calling this method (e.g. - via :meth:`focus_element_by_ref`). - - Use :meth:`input_text_by_ref` to target a specific element by ref. - Use :meth:`type_text` when per-character key events must fire. - - Parameters - ---------- - text : str - Text to insert at the current cursor position. Requires an element - to already be focused (e.g., via :meth:`focus_element_by_ref`). - - Returns - ------- - str - "Inserted text ( characters)". - - Raises - ------ - StateError - If no active page is available. - OperationError - If insertion fails. - """ - try: - logger.info(f"[insert_text] start text_len={len(text)}") - - page = await self.get_current_page() - if page is None: - _raise_state_error("No active page available", code="NO_ACTIVE_PAGE") - - await page.keyboard.insert_text(text) - result = f"Inserted text ({len(text)} characters)" - logger.info(f"[insert_text] done {result}") - return result - except BridgicBrowserError: - raise - except Exception as e: - error_msg = f"Failed to insert text: {str(e)}" - logger.error(f"[insert_text] {error_msg}") - _raise_operation_error(error_msg) - # ==================== Screenshot and PDF Tools ==================== async def take_screenshot( @@ -7463,8 +7407,8 @@ async def start_video( # bridgic never called ``setViewportSize`` on the foreign Chrome. # Falling back to a hard-coded ``800×600`` is almost always wrong: # the real window is wider (typically 16:9), so Chrome downsamples - # to fit within 800×600 and ffmpeg's ``pad`` filter adds a gray - # strip at the bottom to make up the difference. Querying + # to fit within 800×600 and ffmpeg's ``scale`` filter stretches + # the frame to the target size. Querying # ``window.innerWidth/innerHeight`` returns the true visible area # for any of the three modes. # ``& ~1``: round down to an even number — VP8 requires even diff --git a/bridgic/browser/session/_video_recorder.py b/bridgic/browser/session/_video_recorder.py index 1eabebf..8c1f44c 100644 --- a/bridgic/browser/session/_video_recorder.py +++ b/bridgic/browser/session/_video_recorder.py @@ -122,10 +122,9 @@ def _find_ffmpeg() -> str: # A baked 1×1 white JPEG. Used in the rare case where no real frame arrived # before stop() (e.g. start_video → immediate stop_video). ffmpeg refuses to # produce a valid WebM when its input pipe is empty, so we feed it this single -# byte sequence; the ``pad=W:H:0:0:gray`` filter then expands it to the target -# resolution by adding gray padding (the original 1×1 white pixel ends up in -# the top-left corner). The resulting frame is intentionally minimal — the -# only goal is "produce a playable file", not "produce a meaningful frame". +# byte sequence; the ``scale=W:H`` filter stretches it to the target resolution. +# The resulting frame is intentionally minimal — the only goal is "produce a +# playable file", not "produce a meaningful frame". # Playwright's videoRecorder.ts has an analogous fallback in writeFrame(). _FALLBACK_WHITE_JPEG_1X1 = ( b"\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00" @@ -145,7 +144,7 @@ def _create_white_jpeg(width: int, height: int) -> bytes: If Pillow is available we render a true ``width × height`` white JPEG; otherwise we return the baked 1×1 fallback above and rely on ffmpeg's - ``pad`` filter to expand it. The fallback path is taken in production + ``scale`` filter to stretch it. The fallback path is taken in production because Pillow is not a project dependency — see the comment on ``_FALLBACK_WHITE_JPEG_1X1`` for the implications. @@ -260,8 +259,17 @@ async def start(self) -> None: # -speed 2 slower preset (valid 0-5 with deadline=good) # -threads 2 extra worker to keep up with the slower preset # Filters: - # pad={w}:{h}:0:0:gray pad smaller frames with a gray border - # crop={w}:{h}:0:0 crop to exact target size + # scale={w}:{h} scale frames to exact target dimensions. + # + # Why scale instead of pad: Chrome's Page.startScreencast honours + # maxWidth/maxHeight as an *aspect-preserving clamp* — it never + # upsamples. When the viewport's aspect ratio differs from W:H + # (e.g. viewport 1710×856 vs target 1280×720), Chrome produces + # frames that fit within W×H but are shorter/narrower than the + # target. ``pad`` would fill the gap with a visible gray border; + # ``scale`` stretches the frame to the exact target size instead. + # The distortion is negligible (typically < 12 %) and eliminates + # the gray bar entirely. args = [ ffmpeg_path, "-loglevel", "error", @@ -281,7 +289,7 @@ async def start(self) -> None: "-speed", "2", "-b:v", "5M", "-threads", "2", - "-vf", f"pad={w}:{h}:0:0:gray,crop={w}:{h}:0:0", + "-vf", f"scale={w}:{h}", self._output_path, ] # stdout/stderr → DEVNULL: ffmpeg launches with `-loglevel error`, so @@ -322,11 +330,10 @@ async def start(self) -> None: "quality": 95, # maxWidth/maxHeight is a *clamp*, not a target: Chrome # downsamples (preserving aspect) to fit within these bounds - # but never upsamples. They MUST equal the actual viewport - # the caller computed, otherwise an aspect-ratio mismatch - # makes Chrome ship a frame smaller than (self._width, - # self._height) and ffmpeg's pad filter fills the gap with - # gray. See bridgic/browser/session/_browser.py + # but never upsamples. When the viewport aspect ratio differs + # from W:H, Chrome produces smaller frames and ffmpeg's scale + # filter stretches them to the exact target size. + # See bridgic/browser/session/_browser.py # ``start_video()`` for the dimension-resolution comment. "maxWidth": self._width, "maxHeight": self._height, diff --git a/docs/API.md b/docs/API.md index 5e89417..4dcf14e 100644 --- a/docs/API.md +++ b/docs/API.md @@ -7,6 +7,8 @@ Short reference for the main session and download APIs. For tool lists and selec | Method / property | Description | |------------------|-------------| | `Browser(...)` | Constructor. Key args: `headless`, `viewport`, `user_data_dir`, `clear_user_data`, `stealth`, `cdp_url`, `channel`, `proxy`, `downloads_path`, etc. When `cdp_url` is set, connects to an existing Chrome via CDP (`connect_over_cdp`) instead of launching a new browser. | +| `find_cdp_url(mode, port, host, ...)` | Resolve a Chrome CDP WebSocket URL. `mode`: `"port"` (HTTP `/json/version`), `"file"` (read `DevToolsActivePort`), `"scan"` (auto-discover running Chrome/Chromium/Brave), `"service"` (return `ws_endpoint` as-is). Returns `ws://` URL. | +| `resolve_cdp_input(value)` | Normalize user-supplied CDP input to a `ws://` URL. Accepts: bare port (`"9222"`), `ws://`/`wss://` URL, `http://host:port`, or `"auto"`/`"scan"`. | | `await browser._start()` | Launch browser and create context. Called automatically by `navigate_to` / `search` (lazy start); call directly only when you need explicit startup before any navigation. | | `await browser.close()` | Stop the browser, auto-cleans active capture listeners. No-op if never started. | | `await browser.navigate_to(url, wait_until="domcontentloaded", timeout=None)` | Navigate to URL with optional auto-prefix when missing protocol. `wait_until`: `"domcontentloaded"` (default), `"load"`, `"networkidle"`, or `"commit"`. `timeout` in seconds. | diff --git a/docs/BROWSER_TOOLS_GUIDE.md b/docs/BROWSER_TOOLS_GUIDE.md index 20e7d49..88cf1ce 100644 --- a/docs/BROWSER_TOOLS_GUIDE.md +++ b/docs/BROWSER_TOOLS_GUIDE.md @@ -168,7 +168,7 @@ await browser.type_text("hello world") | Method | Speed | Events | Use Case | |--------|-------|--------|----------| -| `input_text_by_ref` | Fast | input, change | Standard forms | +| `input_text_by_ref` | Fast | focus, input, change | Standard forms | | `input_text_by_ref(slowly=True)` | Slow | All keyboard | Autocomplete | | `type_text` | Medium | All keyboard | At cursor (no ref) | diff --git a/docs/CDP_MODE.md b/docs/CDP_MODE.md index 7c908fd..13382fc 100644 --- a/docs/CDP_MODE.md +++ b/docs/CDP_MODE.md @@ -8,10 +8,11 @@ from bridgic.browser import Browser # SDK browser = Browser(cdp_url="ws://localhost:9222/devtools/browser/abc") -# CLI +# CLI (both open and search support --cdp) bridgic-browser open https://example.com --cdp 9222 bridgic-browser open https://example.com --cdp auto bridgic-browser open https://example.com --cdp "ws://localhost:9222/..." +bridgic-browser search "query" --cdp 9222 ``` ## How it works @@ -74,12 +75,13 @@ When connecting via CDP, bridgic borrows the browser's existing default context If the remote Chrome was not started with stealth flags, bridgic's JS patches can cover some fingerprints (navigator, webdriver, plugins) but cannot modify signals that require launch arguments (e.g., Blink feature disabling). -### Video recording covers all tabs in the context +### Video recording (single-stream, active tab) bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), **not** Playwright's `record_video` context option — so video recording works on borrowed contexts. -- **All pages are recorded.** `start_video()` starts a screencast session for every open page in the borrowed context, including the user's pre-existing tabs and any pages opened after recording starts. Each page is saved to its own `.webm` file. -- **Recording stops cleanly without touching user tabs.** `stop_video()` finalizes every screencast session and saves the files; no page is closed or navigated. +- **Only the active tab is recorded.** `start_video()` starts a single screencast session on the currently active page. When you switch tabs (via `switch_tab`, `new_tab`, etc.), the CDP screencast source is hot-swapped to the new page — ffmpeg stays alive and the output is a single continuous `.webm` file. +- **`stop_video()` saves the file immediately.** The `.webm` is written as soon as the recorder stops; no page close is needed. +- **Recording stops cleanly without touching user tabs.** No page is closed or navigated. **Tracing is not affected** — `tracing.stop()` works at any time without closing pages or contexts. @@ -94,7 +96,7 @@ bridgic records video via Chrome's CDP `Page.startScreencast` (piped to ffmpeg), | `context.close()` | Yes | **Skipped** | | `browser.close()` | Kills process | **Disconnects only** | | Save tracing artifacts | Yes | Yes | -| Save video artifacts | Yes | Yes (all tabs in context) | +| Save video artifacts | Yes | Yes (active tab recording) | After `close()`, the remote Chrome continues running with all tabs intact. diff --git a/docs/INTERNALS.md b/docs/INTERNALS.md new file mode 100644 index 0000000..5be6969 --- /dev/null +++ b/docs/INTERNALS.md @@ -0,0 +1,384 @@ +# Implementation Internals + +Deep implementation details for bridgic-browser. Read this when debugging ref lookup, iframe handling, stealth detection, or CLI daemon issues. For everyday development, see [CLAUDE.md](../CLAUDE.md). + +## Two Co-existing Ref Systems (Foundation for Understanding the Entire Chain) + +bridgic has **two distinct ref systems** that must not be confused: + +| | bridgic ref | playwright_ref | +|---|---|---| +| Example | `"8d4b03a9"` | `"e369"` / `"f1e5"` | +| Generated in | `_snapshot.py:_compute_stable_ref()` | Playwright injected script `computeAriaRef()` | +| Format | SHA-256(namespace+role+name+frame_path+nth) first 4 bytes hex | `{refPrefix}e{lastRef}` incrementing integer | +| Stability | **Stable across snapshots** (same element, same ref) | **Resets after each snapshot** (valid only within current snapshotForAI) | +| Purpose | Exposed to LLM / tool calls / CLI | O(1) DOM pointer lookup for aria-ref fast path | +| Stored in | `EnhancedSnapshot.refs: Dict[str, RefData]` | `RefData.playwright_ref` | + +--- + +## Playwright Source: Ref Generation Rules + +All source paths are under `.venv/lib/python3.10/site-packages/playwright/driver/package/lib/`. + +### 1. `lastRef` Counter and `computeAriaRef()` +**File**: `generated/injectedScriptSource.js` (this script is injected into each frame; each frame has its own independent instance) + +```javascript +// injectedScriptSource.js — module-level variable in injected script (independent per frame) +var lastRef = 0; + +function computeAriaRef(ariaNode, options) { + if (options.refs === "none") return; + // when mode="ai", refs="interactable" — only assigns refs to visible elements that receive pointer events + if (options.refs === "interactable" && (!ariaNode.box.visible || !ariaNode.receivesPointerEvents)) + return; + + let ariaRef = ariaNode.element._ariaRef; // cache on the DOM element + if (!ariaRef || ariaRef.role !== ariaNode.role || ariaRef.name !== ariaNode.name) { + // cache miss (first time / role or name changed) → generate new ref + ariaRef = { + role: ariaNode.role, + name: ariaNode.name, + ref: (options.refPrefix ?? "") + "e" + ++lastRef // ← core format + }; + ariaNode.element._ariaRef = ariaRef; // write back to DOM element + } + ariaNode.ref = ariaRef.ref; +} +``` + +**Key rules**: +- `lastRef` is a module-level integer that **monotonically increases throughout the lifetime of the injected script instance for the same frame and is never reset** +- If role+name is unchanged for the same element, **the previous ref is reused** (`element._ariaRef` cache), `lastRef` is not incremented +- Ref format: `{refPrefix}e{lastRef}`, e.g. `"e1"`, `"e5"`, `"f1e3"`, `"f2e7"` +- `refPrefix` is passed by the caller (see next section) + +### 2. Source of `refPrefix`: frame.seq +**File**: `server/page.js:825` (`snapshotFrameForAI` function) + +```javascript +// page.js — snapshotFrameForAI() +injectedScript.evaluate((injected, options) => { + return injected.incrementalAriaSnapshot(node, { mode: "ai", ...options }); +}, { + refPrefix: frame.seq ? "f" + frame.seq : "", // ← main frame seq=0 → "", child frame seq=N → "fN" + track: options.track +}); +``` + +**File**: `server/frames.js:368` (Frame constructor) + +```javascript +// frames.js — Frame constructor +this.seq = page.frameManager.nextFrameSeq(); +// main frame seq=0; subsequent frames increment: 1, 2, 3... +// seq is not "the Nth iframe" — it is a globally unique sequence number +``` + +**Format summary**: +- Main frame (seq=0): `refPrefix=""` → refs are `"e1"`, `"e2"`, … +- Child frame (seq=1): `refPrefix="f1"` → refs are `"f1e1"`, `"f1e2"`, … +- Child frame (seq=2): `refPrefix="f2"` → refs are `"f2e1"`, `"f2e3"`, … +- **Note**: seq is a page-level global counter, unrelated to iframe position in the DOM + +### 3. Building the `snapshot.elements` Map +**File**: `generated/injectedScriptSource.js` (the `visit` callback inside `generateAriaTree`) + +```javascript +// injectedScriptSource.js — generateAriaTree > visit() +if (childAriaNode.ref) { + snapshot.elements.set(childAriaNode.ref, element); // ref → DOM Element + snapshot.refs.set(element, childAriaNode.ref); // DOM Element → ref (reverse mapping) + if (childAriaNode.role === "iframe") + snapshot.iframeRefs.push(childAriaNode.ref); // iframes collected separately for recursive child snapshots +} +``` + +### 4. Writing to `_lastAriaSnapshotForQuery` +**File**: `generated/injectedScriptSource.js` (`InjectedScript.incrementalAriaSnapshot()` method) + +```javascript +// injectedScriptSource.js — InjectedScript class +incrementalAriaSnapshot(node, options) { + const ariaSnapshot = generateAriaTree(node, options); + // ... + this._lastAriaSnapshotForQuery = ariaSnapshot; // ← overwritten after each snapshot + return { full, incremental, iframeRefs: ariaSnapshot.iframeRefs }; +} +``` + +**Key**: `_lastAriaSnapshotForQuery` is a property on each frame's injected script instance and is **completely independent per frame**. The L1 frame's injected script only holds L1's `elements` Map (with keys like `"f1e1"`). + +--- + +## Playwright Source: Ref Lookup Rules + +### 5. aria-ref Engine: `_createAriaRefEngine()` +**File**: `generated/injectedScriptSource.js` (registered in the `InjectedScript` constructor) + +```javascript +// injectedScriptSource.js — _createAriaRefEngine() +_createAriaRefEngine() { + const queryAll = (root, selector) => { + const result = this._lastAriaSnapshotForQuery?.elements?.get(selector); + // selector = the raw string after "aria-ref=", e.g. "e369" or "f1e5" + return result && result.isConnected ? [result] : []; + // isConnected check: returns empty if element has been removed from DOM (stale case) + }; + return { queryAll }; +} +``` + +O(1) Map lookup; `isConnected` ensures stale refs return empty instead of throwing. + +### 6. `_jumpToAriaRefFrameIfNeeded()`: Cross-frame Routing +**File**: `server/frameSelectors.js:85` + +```javascript +// frameSelectors.js — FrameSelectors class +_jumpToAriaRefFrameIfNeeded(selector, info, frame) { + if (info.parsed.parts[0].name !== "aria-ref") return frame; + const body = info.parsed.parts[0].body; // "f1e5" or "e369" + const match = body.match(/^f(\d+)e\d+$/); // only matches child frame refs (with "f" prefix) + if (!match) return frame; // main frame ref → no jump + const frameSeq = +match[1]; // extract seq number + const jumptToFrame = this.frame._page.frameManager.frames() + .find(frame2 => frame2.seq === frameSeq); // global linear search + if (!jumptToFrame) + throw new InvalidSelectorError(...); + return jumptToFrame; +} +``` + +**Important**: `_jumpToAriaRefFrameIfNeeded` switches the execution target frame **before** running `queryAll`, so the query runs in the correct frame's injected script context (which holds the corresponding key in its `_lastAriaSnapshotForQuery`). + +**This means**: from an element resolution perspective, both `page.locator("aria-ref=f1e5")` and `frame_locator("iframe").nth(0).locator("aria-ref=f1e5")` correctly find the L1 frame element, because `_jumpToAriaRefFrameIfNeeded` auto-routes. However, `locator.evaluate()`'s JS execution context is **not affected** — it always runs in the frame that **owns the locator's scope** (see below). + +--- + +## bridgic Source: Ref Generation Rules + +### 7. Generating the bridgic ref (stable ID) +**File**: `bridgic/browser/session/_snapshot.py` + +```python +# _snapshot.py:394 +_REF_NAMESPACE = "bridgic-browser-v1" + +# _snapshot.py:422 — _compute_stable_ref() +@staticmethod +def _compute_stable_ref(role, name, frame_path, nth) -> str: + frame_str = ",".join(str(x) for x in frame_path) if frame_path else "" + raw = f"{_REF_NAMESPACE}\x1f{role}\x1f{name or ''}\x1f{frame_str}\x1f{nth}" + # \x1f (ASCII Unit Separator) used as field delimiter — cannot appear in HTML accessible names + digest = hashlib.sha256(raw.encode("utf-8")).digest() + return digest[:4].hex() # 8 hex characters, e.g. "8d4b03a9" +``` + +**Stability guarantee**: as long as the four fields role, name, frame_path, and nth remain unchanged, the same element always gets the same ref ID across snapshots — the LLM can use it persistently across snapshots. + +### 8. Extracting and Storing `playwright_ref` +**File**: `bridgic/browser/session/_snapshot.py` + +```python +# _snapshot.py:374 +_REF_EXTRACT_PATTERN = re.compile(r'\[ref=([a-zA-Z0-9]+)\]') + +# _snapshot.py:1400-1491 — _process_page_snapshot_for_ai() parsing loop +# Extract before clean_suffix removes [ref=...]: +_pw_ref_match = ref_extract_pattern.search(suffix) if suffix else None +playwright_ref_for_element = _pw_ref_match.group(1) if _pw_ref_match else None + +# Store in RefData: +refs[ref] = RefData( + ... + playwright_ref=playwright_ref_for_element, # Playwright's "e369" / "f1e5" +) +``` + +`playwright_ref` is extracted from the `[ref=...]` suffix in Playwright's snapshot text lines and is only valid for the lifetime of the current `snapshotForAI` call. + +### 9. Generating `frame_path` +**File**: `bridgic/browser/session/_snapshot.py:1229` (parsing loop) + +```python +# _snapshot.py — _process_page_snapshot_for_ai() +_iframe_local_counters: Dict[tuple, int] = {} # key=parent path tuple, value=number of child iframes seen so far +# ... +# When an iframe node is encountered: +parent_path = tuple(iframe_stack[-1][1]) if iframe_stack else () +local_idx = _iframe_local_counters.get(parent_path, 0) +_iframe_local_counters[parent_path] = local_idx + 1 +iframe_stack.append((original_depth, list(parent_path) + [local_idx])) +``` + +`frame_path` records **the per-level local indices from the main frame to the target iframe** (same-level iframes start from index 0), and is unrelated to `frame.seq`. + +--- + +## bridgic Source: Ref Lookup Rules + +### 10. Two-phase Lookup in `get_element_by_ref()` +**File**: `bridgic/browser/session/_browser.py` + +``` +Input: bridgic ref (e.g. "8d4b03a9") + ↓ +self._last_snapshot.refs.get(ref) → RefData + ↓ +Phase 1: aria-ref fast path (O(1)) + Condition: ref_data.playwright_ref is non-empty (i.e. no re-navigation since last snapshot) + Implementation: + scope = page + for nth in ref_data.frame_path: # build scope chain following frame_path + scope = scope.frame_locator("iframe").nth(nth) + locator = scope.locator(f"aria-ref={ref_data.playwright_ref}") + count = await locator.count() + count == 1 → return directly (Playwright's _jumpToAriaRefFrameIfNeeded guarantees routing) + count == 0 → stale, fall through + Exception → engine unavailable, fall through + +Phase 2: CSS rebuild path (get_locator_from_ref_async) + Location: _snapshot.py:1830 + Strategy priority (by signal strength): + 1) get_by_role(role, name=name, exact=True) ← most elements + 2) get_by_role(role).filter(has_text=...) ← ROLE_TEXT_MATCH_ROLES + 3) get_by_text(text, exact=True) ← TEXT_LEAF_ROLES (text pseudo-role) + 4) STRUCTURAL_NOISE_ROLES with match_text ← CSS-scoped + filter(has_text) + nth + 5) STRUCTURAL_NOISE_ROLES child-anchor path ← unnamed noise with no text + 6) get_by_role(role) ← bare role fallback when no name + scope: chain frame_locator("iframe").nth(n) per frame_path level first + nth: applied only when locator key space matches role:name key space (excluding STRUCTURAL_NOISE/TEXT_LEAF) + +STRUCTURAL_NOISE child-anchor path (strategy 5) detail: + Applies to: unnamed generic/group/none/presentation with no stored text + Sub-strategies (tried in order): + a) Find text-leaf child (role='text', parent_ref==ref) → CSS-scoped container locator (STRUCTURAL_NOISE_CSS) + b) Find named STRUCTURAL_NOISE child (parent_ref==ref, role in STRUCTURAL_NOISE_ROLES, name non-empty) + → scope.locator(STRUCTURAL_NOISE_CSS_NAMED).filter(has_text=name).locator('..') + Note: locator('..') is auto-detected as XPath parent by Playwright (selectorParser.js:159) + Note: STRUCTURAL_NOISE_CSS_NAMED adds span:not([role]) vs STRUCTURAL_NOISE_CSS because + the child may be a that Playwright maps to 'generic' role. + nth is NOT applied; the parent is located structurally via the child. + c) fallback: get_by_role(role) (returns 0 results for implicit generic — last resort) +``` + +--- + +## Covered-element Check + +**6 locations**: `_click_checkable_target` (`_browser.py:239`), `click_element_by_ref` (`~3151`), `hover_element_by_ref` (`~3393`), `check_checkbox_or_radio_by_ref` (`~3645`), `uncheck_checkbox_by_ref` (`~3751`), `double_click_element_by_ref` (`~3847`) + +```javascript +(el) => { + if (window.parent !== window) return false; // ← skip directly for iframe elements + const t = document.elementFromPoint(cx, cy); + return !!t && t !== el && !el.contains(t) && !t.contains(el); +} +``` + +**Do not change to `window.frameElement !== null`**: Chrome returns `null` for `window.frameElement` inside iframes under the `file://` protocol (security policy), causing false positives. `window.parent !== window` is a pure object comparison that is reliable across all protocols and origins. + +**Why iframe elements must be skipped**: `bounding_box()` returns main-viewport coordinates, while `document.elementFromPoint(cx, cy)` inside the iframe JS context uses iframe-local coordinates. The coordinate systems differ, so `elementFromPoint` finds the wrong element (typically the child iframe node), triggering a false "covered" report. After skipping, `locator.click()` lets Playwright handle coordinate transformation internally. + +--- + +## Nested iframes and frame_path + +`RefData.frame_path: Optional[List[int]]`: +- `None` → main frame +- `[0]` → first top-level iframe (local index 0) +- `[0, 1]` → second iframe inside the first top-level iframe + +All three locator-building code paths (aria-ref fast path, `get_locator_from_ref_async`, recovery path) use the same chained call: +```python +scope = page +for local_nth in frame_path: + scope = scope.frame_locator("iframe").nth(local_nth) +``` + +`_iframe_local_counters: Dict[tuple, int]` (`_snapshot.py:1229`) tracks the iframe count under each parent path, ensuring per-level nth values are independent across multiple nesting levels. + +--- + +## Interactive Element Detection — Small Icon Rule + +`_is_element_interactive()` (`_snapshot.py`) rule 9: small icon (10–50 px) is treated as interactive only when it carries **strong semantic signals**: + +- `data-action` attribute → explicit author intent +- `aria-label` → screen-reader accessible name + +**`classAndId` is intentionally excluded**: almost every element carries a CSS class, so including it causes false positives for purely decorative elements (badges, avatars, dividers) that happen to be small. `cursor=pointer` is covered by rule 10 (separate check) and is a stronger signal. + +Impact on `get_snapshot(interactive=True)`: a small icon with only a CSS class (no `data-action`, no `aria-label`, no `cursor:pointer`) will **not** appear in the interactive snapshot. If an icon is missing, add `data-action` or `aria-label` to the element. + +--- + +## Stealth JS Init Script — Patched Properties + +`_STEALTH_INIT_SCRIPT_TEMPLATE` in `_stealth.py` — **headless mode only**. Skipped entirely in headed mode because `context.add_init_script()` runs in ALL frames including Cloudflare Turnstile's challenge iframe; patching `window.chrome` (`configurable:false`), `navigator.permissions.query`, and WebGL prototype inside the iframe causes detectable API inconsistencies that fail the challenge. + +### Anti-toString-detection (`_mkNative` framework) + +All patched functions are registered in a `WeakSet` (`_nativeFns`) via `_mkNative(fn, name)`. `Function.prototype.toString` is itself intercepted to return `"function foo() { [native code] }"` for any registered function. This closes the entire class of "call `.toString()` on a function to detect monkey-patching" attacks used by DataDome, PerimeterX, and Cloudflare bot detectors. + +```javascript +const _nativeFns = new WeakSet(); +const _nativeFnNames = new WeakMap(); +const _mkNative = (fn, name) => { _nativeFns.add(fn); _nativeFnNames.set(fn, name); return fn; }; +Function.prototype.toString = _mkNative(function toString() { + if (_nativeFns.has(this)) return `function ${_nativeFnNames.get(this) ?? this.name}() { [native code] }`; + return _origFnToString.call(this); +}, 'toString'); +``` + +### Patched properties + +- `navigator.webdriver` → **conditionally** `undefined`; checks `Object.getOwnPropertyDescriptor(Navigator.prototype, 'webdriver')` first and patches the prototype descriptor. Falls back to instance property only if the prototype has no descriptor but the value is non-undefined. Avoids creating an own-property (which makes `'webdriver' in navigator` = true — detectable in real Chrome where the property is absent). +- `navigator.plugins` / `navigator.mimeTypes` → realistic PDF Viewer entries (5 plugins, 2 MIME types); each plugin holds its own per-plugin mime copies so `enabledPlugin` refs are correct +- `navigator.languages` → derived from `Browser(locale=...)` to keep `navigator.language === navigator.languages[0]` (e.g. `["zh-CN", "zh", "en"]` for `locale="zh-CN"`); defaults to `["en-US", "en"]` +- `window.chrome` → complete object with `runtime`, `csi()`, `loadTimes()` (all wrapped with `_mkNative`) +- `navigator.permissions.query` → returns `"default"` for notifications (not `"denied"`); wrapped with `_mkNative` +- `window.outerWidth/Height` → matches `innerWidth/Height` when zero (guard for edge cases; with `--headless=new` + `screen` context option these are already correctly set by Chrome) +- `navigator.deviceMemory` → `8` (headless environments may return `undefined`) +- `navigator.hardwareConcurrency` → `8` when value is 0 or 1 (headless may report fewer cores) +- `navigator.connection` → `{ effectiveType: '4g', downlink: 10, rtt: 100, saveData: false }` when absent +- `WebGLRenderingContext` / `WebGL2RenderingContext` → `getParameter(37445/37446)` **conditionally** returns `'Intel Inc.'` / `'Intel Iris OpenGL Engine'` only when the real vendor contains `'Google'` or `'SwiftShader'` (masks SwiftShader which is a well-known bot signal). On headed Apple Silicon Mac the real `'Apple Inc.'` value is preserved so the WebGL fingerprint stays consistent with DPI, Canvas, and font rendering signals. `getParameter` is wrapped with `_mkNative`. +- `document.hasFocus()` → always returns `true` (headless tabs return `false` by default; Cloudflare and DataDome probe this); wrapped with `_mkNative` +- `document.hidden` → always `false` (via `Object.defineProperty`) +- `document.visibilityState` → always `'visible'` (via `Object.defineProperty`); headless tabs default to `'hidden'` which is a strong bot signal +- `Notification.permission` → guarded: only patched if `Notification` exists and its permission is `'denied'`; returns `'default'` + +`get_init_script(locale=None)` accepts the locale and performs the `__BRIDGIC_LANGS__` substitution before returning the script. Called from `_browser.py:_start()` with `self._locale` only when `self._headless=True`. + +--- + +## CLI Architecture — Detailed Implementation + +### Client (`_client.py`) +- `send_command()` auto-starts the daemon if no socket exists. +- `_spawn_daemon()` uses `select.select()` + `os.read()` for the 30-second ready timeout (avoids blocking `proc.stdout.read()`). +- `start_if_needed=False` prevents auto-start for the `close` command. + +### Daemon (`_daemon.py`) +- `run_daemon()` creates a `Browser()` instance directly (lazy start — Playwright does **not** launch immediately; `Browser.__init__` auto-loads config from `_config.py`), writes `BRIDGIC_DAEMON_READY` to stdout, and serves one JSON command per connection. +- The browser's Playwright process starts on the first command that calls `_ensure_started()` (e.g. `navigate_to`). +- `asyncio.wait_for(reader.readline(), timeout=60)` prevents hanging on idle connections. +- Signal handling uses `loop.add_signal_handler()` (asyncio-safe). + +### Commands (`_commands.py`) +- 67 Click commands in 15 sections via `SectionedGroup`. +- `scroll` uses `--dy`/`--dx` options (not positional) to support negative values. +- `screenshot`/`pdf`/`upload`/`storage-save`/`storage-load`/`trace-stop` call `os.path.abspath()` on the client side before sending (daemon cwd may differ). +- `snapshot` supports `-i`/`--interactive`, `-f/-F`/`--full-page/--no-full-page`, `-l`/`--limit` (default 10000), and `-s`/`--file` (overflow file path); it delegates to `browser.get_snapshot_text()`. +- **`wait`**: argument is named `SECONDS_OR_TEXT`. When the argument parses as a float it always takes the time-wait path (`wait_seconds`); when it is a string it takes the text-wait path (`text` or `text_gone` with `--gone`). The `--gone` flag is **only** meaningful with a string argument — a numeric argument with `--gone` is ignored (number always → time). Unit is **seconds**, not milliseconds. Text search traverses **all frames** (main + iframes) via polling. +- **`type`**: text goes into the **currently focused element**; the user must `click` or `focus` the target first. +- **`mouse-move` / `mouse-click` / `mouse-drag`**: coordinates are **viewport pixels from the top-left corner**. +- **`eval-on`**: CODE must be an arrow function or named function that receives the element as its argument (e.g. `"(el) => el.textContent"`). + +### Close command fast-path +The daemon calls `browser.inspect_pending_close_artifacts()` to pre-allocate a session dir, trace path, and video paths (all grouped under `~/.bridgic/bridgic-browser/tmp/close--/`), responds to the client immediately with those paths, then sets `stop_event`. Actual `browser.close()` runs after the client disconnects. After close, `_write_close_report()` writes `close-report.json` in the session dir with status (`"success"`, `"success_with_timeouts"`, `"error"`, or `"timeout"`), artifact paths, and any errors. + +### Daemon cleanup ownership guard +After `browser.close()` finishes, `run_daemon()` reads the run-info file and compares its `pid` field to `os.getpid()` before calling `transport.cleanup()` / `remove_run_info()`. This prevents the outgoing daemon from deleting the new daemon's socket when a `close` is followed immediately by a new command (which starts a new daemon before the old one's shutdown completes). If the run-info is gone (`None`) the old daemon is still the owner and cleans up normally. diff --git a/docs/SNAPSHOT_AND_STATE.md b/docs/SNAPSHOT_AND_STATE.md index 648c9d3..f0ebed3 100644 --- a/docs/SNAPSHOT_AND_STATE.md +++ b/docs/SNAPSHOT_AND_STATE.md @@ -54,6 +54,7 @@ Stored in `EnhancedSnapshot.refs`. Used internally to build a Playwright locator | `text_content` | str, optional | Text content snippet. | | `parent_ref` | str, optional | Ref of the nearest ancestor element that has a ref. | | `frame_path` | List[int], optional | Per-level local iframe indices for nested iframes. `None` = main frame; `[0]` = 1st top-level iframe; `[0, 0]` = 1st iframe inside the 1st iframe. Used to build the `frame_locator(...).nth(n)` chain in `get_element_by_ref`. | +| `playwright_ref` | str, optional | Playwright's ephemeral aria-ref ID (e.g. `"e369"`, `"f1e5"`). Valid only for the lifetime of the current snapshot — used by the aria-ref fast path in `get_element_by_ref` for O(1) DOM lookup. | You normally do not need to use `RefData` directly; `get_element_by_ref(ref)` uses it under the hood. diff --git a/pyproject.toml b/pyproject.toml index f8b3cc8..9f2bf23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "bridgic-browser" -version = "0.0.4.dev1" +version = "0.0.4.dev3" license = {text = "MIT"} classifiers = [ "Programming Language :: Python :: 3.10", diff --git a/skills/bridgic-browser/references/cli-guide.md b/skills/bridgic-browser/references/cli-guide.md index b65c88b..0123dd5 100644 --- a/skills/bridgic-browser/references/cli-guide.md +++ b/skills/bridgic-browser/references/cli-guide.md @@ -139,7 +139,7 @@ bridgic-browser open https://example.com --cdp ws://localhost:9222/devtools/brow # Connect to cloud service bridgic-browser open https://example.com --cdp wss://cloud.example.com/chromium?token=... -# Auto-scan local Chrome/Brave/Edge/Arc profiles +# Auto-scan local Chrome/Chromium/Brave profiles (+ Canary variants) bridgic-browser open https://example.com --cdp auto ``` @@ -148,7 +148,7 @@ bridgic-browser open https://example.com --cdp auto | `9222` | Bare port -- queries `localhost:9222/json/version` | | `ws://...` / `wss://...` | Direct WebSocket URL, passed through as-is | | `http://host:port` | HTTP discovery endpoint | -| `auto` | Scan local browser profiles for `DevToolsActivePort` | +| `auto` | Scan local Chrome/Chromium/Brave profiles (+ Canary) for `DevToolsActivePort` | `close` disconnects from the remote browser but does **not** kill the Chrome process. @@ -158,7 +158,7 @@ bridgic-browser open https://example.com --cdp auto - When `snapshot` output exceeds `-l `, or `-s ` is provided, full content is saved to a file (auto-generated under `~/.bridgic/bridgic-browser/snapshot/` or the specified path). - `snapshot -i` returns only clickable/editable elements — use for action selection, not full-page inspection. - CLI uses a persistent daemon/browser. State survives across commands until `close`. -- **`open` and `search` accept `--headed` and `--clear-user-data`** (startup flags only — ignored when a daemon is already running): +- **`open` and `search` accept `--headed`, `--clear-user-data`, and `--cdp`** (startup flags only — ignored when a daemon is already running): - `bridgic-browser open --headed https://example.com` — start in headed mode - `bridgic-browser open --clear-user-data https://example.com` — start with ephemeral session (no persistent profile) - By default (no `--clear-user-data`), the browser uses a persistent profile saved at `~/.bridgic/bridgic-browser/user_data/`. diff --git a/skills/bridgic-browser/references/cli-sdk-api-mapping.md b/skills/bridgic-browser/references/cli-sdk-api-mapping.md index d18afdc..3ecdfcc 100644 --- a/skills/bridgic-browser/references/cli-sdk-api-mapping.md +++ b/skills/bridgic-browser/references/cli-sdk-api-mapping.md @@ -195,7 +195,7 @@ These CLI behaviors have no direct SDK equivalent or work differently: | `scroll` argument style | `--dy`/`--dx` flag options (not positional) to allow negative values | `mouse_wheel(delta_x=X, delta_y=Y)` keyword args | | `fill-form` input format | JSON string on command line | Python list of dicts | | `take_screenshot` return value | CLI always writes to a file path | SDK: `filename=None` returns base64 data URL; `filename="path.png"` writes file | -| Video file write timing | `video-stop` registers path; file is written when daemon/browser closes | Same for SDK: `.webm` is written when page closes via `close()` or `close_tab()` | +| Video file write timing | `video-stop` stops the recorder and saves the `.webm` file immediately | Same for SDK: `stop_video()` saves the file immediately — no page close needed | ## Practical Rule for Mixed Tasks diff --git a/skills/bridgic-browser/references/env-vars.md b/skills/bridgic-browser/references/env-vars.md index c0573ea..05ea782 100644 --- a/skills/bridgic-browser/references/env-vars.md +++ b/skills/bridgic-browser/references/env-vars.md @@ -11,14 +11,14 @@ Use this reference when the task needs environment variable behavior or login st | `BRIDGIC_CDP` | CLI daemon | unset | Connect to an existing Chrome via CDP. Accepts: port (`9222`), `ws://`/`wss://` URL, `http://host:port`, or `auto` (scan local profiles). Resolved at daemon startup. Also set internally by the CLI client (as an already-resolved `ws://` URL) when `--cdp` is passed, so the flag overrides any value inherited from the shell. | | `BRIDGIC_SOCKET` | CLI (Unix only) | platform default | Override Unix socket path for the daemon client/transport. | | `BRIDGIC_DAEMON_RESPONSE_TIMEOUT` | CLI client | `90` | Seconds to wait for a daemon response. | -| `BRIDGIC_DAEMON_STOP_TIMEOUT` | CLI daemon | `45` | Seconds to wait for daemon shutdown. | +| `BRIDGIC_DAEMON_STOP_TIMEOUT` | CLI daemon | `300` | Seconds to wait for daemon shutdown (safety net; individual cleanup steps have shorter timeouts). | | `SKIP_BROWSER_TESTS` | Tests | unset | If `1/true/yes`, skip browser tests. | Notes: - Config file precedence (SDK + CLI, lowest -> highest): defaults, `~/.bridgic/bridgic-browser/bridgic-browser.json`, `./bridgic-browser.json`, `BRIDGIC_BROWSER_JSON`. - To start the daemon in headed mode, pass `--headed` to `bridgic-browser open` / `bridgic-browser search`, or set `{"headless": false}` in `BRIDGIC_BROWSER_JSON`. - To start with an ephemeral (no persistent profile) session, pass `--clear-user-data` to `bridgic-browser open` / `bridgic-browser search`, or set `{"clear_user_data": true}` in `BRIDGIC_BROWSER_JSON`. These flags are only meaningful when starting a new daemon; they are ignored if a session is already running. -- To connect to an existing Chrome via CDP, pass `--cdp` to `bridgic-browser open`, or set the `BRIDGIC_CDP` env var. The `--cdp` flag accepts a port number, `ws://`/`wss://` URL, `http://host:port`, or `auto`. +- To connect to an existing Chrome via CDP, pass `--cdp` to `bridgic-browser open` or `bridgic-browser search`, or set the `BRIDGIC_CDP` env var. The `--cdp` flag accepts a port number, `ws://`/`wss://` URL, `http://host:port`, or `auto`. - When `headless=false` (headed mode) with stealth enabled and neither `channel` nor `executable_path` is specified, the daemon **auto-switches to system Chrome** (`channel=”chrome”`) if detected on the machine. This avoids Playwright’s bundled “Chrome for Testing” which is blocked by Google OAuth and shows a “test” label in the macOS Dock. If system Chrome is not installed, it falls back to Chrome for Testing. ### Config Files and `BRIDGIC_BROWSER_JSON` Values @@ -33,7 +33,7 @@ Notes: | `viewport` | `{ "width": int, "height": int }` or `null` | Default `1600x900` when `no_viewport` is not set. | | `user_data_dir` | string (path) | Custom path for persistent profile. Ignored when `clear_user_data=true`. | | `clear_user_data` | `true | false` | Default `false`. If `true`, use ephemeral session (`launch`+`new_context`, no profile saved). If `false`, use persistent profile (defaults to `~/.bridgic/bridgic-browser/user_data/`). | -| `cdp_url` | string (ws:// URL) | Connect to existing Chrome via CDP instead of launching. Set via `BRIDGIC_CDP` env var or `--cdp` CLI flag (not via config JSON). | +| `cdp_url` | string | Connect to existing Chrome via CDP instead of launching. Accepts any format supported by `resolve_cdp_input()` (port, `ws://`/`wss://` URL, `http://host:port`, `auto`); non-WebSocket values are auto-resolved at startup. Can be set via config JSON, `BRIDGIC_CDP` env var, or `--cdp` CLI flag. | | `stealth` | `true | false` or object | Object uses the StealthConfig keys below. | | `channel` | string | Examples: `"chrome"`, `"msedge"`, `"chromium"`. | | `executable_path` | string (path) | Custom browser binary path. | diff --git a/skills/bridgic-browser/references/sdk-guide.md b/skills/bridgic-browser/references/sdk-guide.md index 1d3b815..99cf2c9 100644 --- a/skills/bridgic-browser/references/sdk-guide.md +++ b/skills/bridgic-browser/references/sdk-guide.md @@ -138,7 +138,7 @@ Use `resolve_cdp_input()` to convert user-friendly formats (port, URL, `"auto"`) from bridgic.browser import resolve_cdp_input ws_url = resolve_cdp_input("9222") # queries localhost:9222/json/version -ws_url = resolve_cdp_input("auto") # scans local Chrome/Brave/Edge profiles +ws_url = resolve_cdp_input("auto") # scans local Chrome/Chromium/Brave profiles browser = Browser(cdp_url=ws_url) ``` @@ -156,7 +156,7 @@ Notes: - `take_screenshot(filename=None)` returns base64 data URL string. - `take_screenshot(filename="path.png")` writes file and returns a status string. - `verify_element_visible` uses `(role, accessible_name)` rather than ref. -- `start_video` must run before `stop_video`; `stop_video` registers the destination path but does **not** close any pages. The actual `.webm` file is written by Playwright when pages close (via `close()` or `close_tab()`). +- `start_video` must run before `stop_video`; `stop_video` stops the recorder and saves the `.webm` file immediately — no page close is needed. ## SDK Error Handling diff --git a/tests/integration/_chrome_utils.py b/tests/integration/_chrome_utils.py new file mode 100644 index 0000000..a252f3e --- /dev/null +++ b/tests/integration/_chrome_utils.py @@ -0,0 +1,159 @@ +"""Cross-platform Chrome / Chromium binary discovery for CDP integration tests. + +Search order +------------ +1. ``CHROME_BIN`` environment variable (explicit override for CI or local dev) +2. Platform-specific well-known system paths +3. ``shutil.which()`` for common executable names +4. Playwright's bundled Chromium (always available after ``playwright install chromium``) +""" + +from __future__ import annotations + +import os +import platform +import shutil + + +def find_chrome_binary() -> str | None: + """Return the path to a Chrome / Chromium binary, or *None* if not found. + + The returned binary must support ``--remote-debugging-port``. + """ + # 1. Explicit override ───────────────────────────────────────────────── + env_bin = os.environ.get("CHROME_BIN") + if env_bin and os.path.isfile(env_bin): + return env_bin + + system = platform.system() + + # 2. Platform-specific known paths ───────────────────────────────────── + candidates: list[str] = [] + if system == "Darwin": + candidates = [ + "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "/Applications/Chromium.app/Contents/MacOS/Chromium", + ] + elif system == "Linux": + candidates = [ + "/usr/bin/google-chrome-stable", + "/usr/bin/google-chrome", + "/usr/bin/chromium-browser", + "/usr/bin/chromium", + ] + elif system == "Windows": + for env_key in ("PROGRAMFILES", "PROGRAMFILES(X86)", "LOCALAPPDATA"): + base = os.environ.get(env_key, "") + if base: + candidates.append( + os.path.join(base, "Google", "Chrome", "Application", "chrome.exe") + ) + + for path in candidates: + if os.path.isfile(path): + return path + + # 3. shutil.which() fallback ─────────────────────────────────────────── + names = ( + ["chrome.exe", "chromium.exe"] + if system == "Windows" + else ["google-chrome-stable", "google-chrome", "chromium-browser", "chromium"] + ) + for name in names: + found = shutil.which(name) + if found: + return found + + # 4. Playwright's bundled Chromium ───────────────────────────────────── + return _find_playwright_chromium(system) + + +# ── internal ────────────────────────────────────────────────────────────────── + + +def _find_playwright_chromium(system: str | None = None) -> str | None: + """Locate Playwright's bundled Chromium binary in its cache directory. + + Handles multiple Playwright versions and architectures: + - Newer Playwright uses ``Google Chrome for Testing.app`` (macOS) and + architecture-suffixed directories (``chrome-mac-arm64``, ``chrome-linux64``). + - Older Playwright uses ``Chromium.app`` and ``chrome-mac`` / ``chrome-linux``. + """ + if system is None: + system = platform.system() + + browsers_path = os.environ.get("PLAYWRIGHT_BROWSERS_PATH") + if browsers_path: + cache_dir = browsers_path + elif system == "Linux": + cache_dir = os.path.expanduser("~/.cache/ms-playwright") + elif system == "Darwin": + cache_dir = os.path.expanduser("~/Library/Caches/ms-playwright") + elif system == "Windows": + local_app = os.environ.get("LOCALAPPDATA", "") + cache_dir = os.path.join(local_app, "ms-playwright") if local_app else "" + else: + return None + + if not cache_dir or not os.path.isdir(cache_dir): + return None + + # Scan for chromium-* dirs (newest first, skip chromium_headless_shell-*) + try: + chromium_dirs = sorted( + (d for d in os.listdir(cache_dir) + if d.startswith("chromium-") and "headless" not in d), + reverse=True, + ) + except OSError: + return None + + for dirname in chromium_dirs: + base = os.path.join(cache_dir, dirname) + for exe in _chromium_exe_candidates(base, system): + if os.path.isfile(exe): + return exe + + return None + + +def _chromium_exe_candidates(base: str, system: str) -> list[str]: + """Return possible executable paths within a Playwright chromium-XXXX dir.""" + results: list[str] = [] + + if system == "Darwin": + # Scan for chrome-mac* subdirectories (chrome-mac, chrome-mac-arm64, …) + _scan_mac_app_bundles(base, results) + elif system == "Linux": + # chrome-linux64/chrome, chrome-linux/chrome + for sub in ("chrome-linux64", "chrome-linux"): + results.append(os.path.join(base, sub, "chrome")) + elif system == "Windows": + # chrome-win64/chrome.exe, chrome-win/chrome.exe + for sub in ("chrome-win64", "chrome-win"): + results.append(os.path.join(base, sub, "chrome.exe")) + + return results + + +def _scan_mac_app_bundles(base: str, results: list[str]) -> None: + """Append all possible macOS .app bundle executables under *base*.""" + try: + subdirs = [d for d in os.listdir(base) if d.startswith("chrome-mac")] + except OSError: + return + + # Prefer arm64 directory on Apple Silicon + subdirs.sort(key=lambda d: ("arm64" not in d, d)) + + for sub in subdirs: + sub_path = os.path.join(base, sub) + # Newer Playwright: "Google Chrome for Testing.app" + results.append(os.path.join( + sub_path, "Google Chrome for Testing.app", + "Contents", "MacOS", "Google Chrome for Testing", + )) + # Older Playwright: "Chromium.app" + results.append(os.path.join( + sub_path, "Chromium.app", "Contents", "MacOS", "Chromium", + )) diff --git a/tests/integration/test_cdp_borrowed_mode.py b/tests/integration/test_cdp_borrowed_mode.py new file mode 100644 index 0000000..2a94be1 --- /dev/null +++ b/tests/integration/test_cdp_borrowed_mode.py @@ -0,0 +1,573 @@ +""" +Comprehensive integration tests for CDP borrowed mode. + +Scenario: a real Chrome is already running with pre-existing tabs opened +BEFORE bridgic connects. These tabs are the exact ones that trigger the +Playwright _mainContext() race condition — Runtime.executionContextCreated +events arrive before Playwright registers its handlers, so page.evaluate() +/ page.title() would hang forever without our CDPSession bypass. + +Setup: + - Launches system Chrome with --remote-debugging-port=9229 + - Opens 3 pre-existing tabs (Wikipedia, httpbin.org, example.com) via CDP + - Then connects bridgic via cdp_url + +Coverage: + - tabs listing (all 3 pre-existing + 1 bridgic-owned tab) + - switch-tab to each pre-existing tab + - info (uses _get_page_title + get_page_size_info via CDPSession) + - snapshot (aria tree on pre-existing tab) + - reload (uses _get_page_title) + - evaluate_javascript (asyncio.wait_for timeout guard) + - evaluate_javascript_on_ref (asyncio.wait_for timeout guard) + - get_dropdown_options_by_ref + - focus_element_by_ref + - input_text_by_ref / click_element_by_ref + - verify_title / verify_url + - get_page_size_info (CDP Page.getLayoutMetrics path) + - get_current_page_info (combined snapshot + size) +""" + +import asyncio +import json +import os +import subprocess +import tempfile +import time +import urllib.request + +import pytest +import pytest_asyncio + +from bridgic.browser.session import Browser + +from ._chrome_utils import find_chrome_binary + + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers +# ───────────────────────────────────────────────────────────────────────────── + +CDP_PORT = 9229 +CDP_HOST = "localhost" +CHROME_BIN: str | None = find_chrome_binary() + +# Public pages used as "pre-existing tabs" opened before bridgic attaches. +# Chosen for stability, low JS complexity, and HTTPS. +PREOPENED_URLS = [ + "https://example.com", + "https://httpbin.org/forms/post", # has a form with select + inputs + checkbox + "https://en.wikipedia.org/wiki/Browser_automation", +] + + +def _open_tab_via_cdp(url: str) -> None: + """Open a new tab in the already-running Chrome via the /json/new endpoint.""" + req = urllib.request.Request( + f"http://{CDP_HOST}:{CDP_PORT}/json/new?{url}", + method="PUT", + ) + with urllib.request.urlopen(req, timeout=5): + pass + + +def _list_tabs_via_cdp() -> list: + with urllib.request.urlopen( + f"http://{CDP_HOST}:{CDP_PORT}/json/list", timeout=5 + ) as resp: + return json.loads(resp.read()) + + +def _wait_for_chrome(timeout: float = 15.0) -> None: + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + _list_tabs_via_cdp() + return + except Exception: + time.sleep(0.3) + raise RuntimeError(f"Chrome did not start debugging interface on port {CDP_PORT}") + + +# ───────────────────────────────────────────────────────────────────────────── +# Session-scoped Chrome fixture +# ───────────────────────────────────────────────────────────────────────────── + +@pytest.fixture(scope="module") +def chrome_with_preopened_tabs(): + """ + Start a real Chrome process with remote debugging and 3 pre-existing tabs. + + The tabs are opened BEFORE bridgic attaches — this is the exact scenario + that triggers the Playwright _mainContext() race condition. + + Yields the WebSocket debugger URL for the browser. + """ + if CHROME_BIN is None: + pytest.skip("Chrome/Chromium not found on this system") + + tmpdir = tempfile.mkdtemp(prefix="bridgic_cdp_test_") + proc = subprocess.Popen( + [ + CHROME_BIN, + f"--remote-debugging-port={CDP_PORT}", + f"--user-data-dir={tmpdir}", + "--no-first-run", + "--no-default-browser-check", + "--disable-extensions", + "--disable-sync", + "--headless=new", + "about:blank", # initial tab + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + try: + _wait_for_chrome(timeout=20.0) + + # Open pre-existing tabs BEFORE bridgic attaches + for url in PREOPENED_URLS: + _open_tab_via_cdp(url) + # Give pages a moment to start loading + time.sleep(2.0) + + # Get the WS URL + tabs = _list_tabs_via_cdp() + browser_tab = next( + (t for t in tabs if t.get("type") == "browser"), None + ) + # Fall back to /json/version for the browser WS URL + with urllib.request.urlopen( + f"http://{CDP_HOST}:{CDP_PORT}/json/version", timeout=5 + ) as resp: + info = json.loads(resp.read()) + ws_url = info["webSocketDebuggerUrl"] + + yield ws_url + + finally: + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + proc.kill() + import shutil + shutil.rmtree(tmpdir, ignore_errors=True) + + +# ───────────────────────────────────────────────────────────────────────────── +# Browser fixture that attaches via CDP +# ───────────────────────────────────────────────────────────────────────────── + +@pytest_asyncio.fixture +async def cdp_browser(chrome_with_preopened_tabs): + """Attach bridgic to the running Chrome via CDP (borrowed mode).""" + ws_url = chrome_with_preopened_tabs + browser = Browser(cdp_url=ws_url, stealth=False, headless=True) + await browser._start() + yield browser + await browser.close() + + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers for common assertions +# ───────────────────────────────────────────────────────────────────────────── + +async def _switch_to_url(browser: Browser, url_fragment: str) -> str: + """Switch to the tab whose URL contains url_fragment and return the page_id.""" + descs = await browser.get_all_page_descs() + match = next((d for d in descs if url_fragment in d.url), None) + assert match is not None, ( + f"No tab with URL containing {url_fragment!r}. Available: " + + str([d.url for d in descs]) + ) + result = await browser.switch_to_page(match.page_id) + assert result[0], f"switch_to_page failed: {result[1]}" + return match.page_id + + +# ───────────────────────────────────────────────────────────────────────────── +# Tests +# ───────────────────────────────────────────────────────────────────────────── + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_tabs_lists_all_preopened_pages(cdp_browser): + """tabs / get_all_page_descs must include all pre-existing tabs. + + Root cause guarded: get_page_desc() calls _get_page_title() which used + page.title() — hangs forever on pre-existing CDP tabs. + """ + descs = await asyncio.wait_for( + cdp_browser.get_all_page_descs(), timeout=15.0 + ) + urls = [d.url for d in descs] + print(f"\n[tabs] found {len(descs)} tabs: {urls}") + + # All three pre-opened tabs must appear + assert any("example.com" in u for u in urls), f"example.com missing: {urls}" + assert any("httpbin.org" in u for u in urls), f"httpbin.org missing: {urls}" + assert any("wikipedia.org" in u for u in urls), f"wikipedia.org missing: {urls}" + + # Every tab must have a non-empty page_id + for d in descs: + assert d.page_id, f"Missing page_id in {d}" + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_switch_tab_to_preopened_example(cdp_browser): + """switch_to_page on a pre-existing tab must not hang. + + Root cause guarded: switch_to_page called _get_page_title() which hung. + """ + page_id = await asyncio.wait_for( + _switch_to_url(cdp_browser, "example.com"), timeout=15.0 + ) + print(f"\n[switch-tab] switched to {page_id}") + assert page_id.startswith("page_") + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_info_on_preopened_tab(cdp_browser): + """get_current_page_info on a pre-existing tab must return URL + title + size. + + Root cause guarded: _get_page_title() and get_page_size_info() both hung. + """ + await _switch_to_url(cdp_browser, "example.com") + + info = await asyncio.wait_for( + cdp_browser.get_current_page_info(), timeout=20.0 + ) + print(f"\n[info] {info}") + assert "example.com" in info.lower() or "example" in info.lower() + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_page_size_info_on_preopened_tab(cdp_browser): + """get_page_size_info must use CDP Page.getLayoutMetrics (not page.evaluate). + + Root cause guarded: page.evaluate() hung indefinitely. + """ + await _switch_to_url(cdp_browser, "example.com") + + size = await asyncio.wait_for( + cdp_browser.get_page_size_info(), timeout=10.0 + ) + print(f"\n[size] {size}") + assert size is not None + assert size.viewport_width > 0, "viewport_width must be positive" + assert size.viewport_height > 0, "viewport_height must be positive" + assert size.page_width > 0, "page_width must be positive" + assert size.page_height > 0, "page_height must be positive" + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_snapshot_on_preopened_example(cdp_browser): + """get_snapshot on pre-existing tab must return an accessibility tree. + + The snapshot uses snapshotForAI (Playwright's own CDP, 30 s timeout) — + it works because Playwright's snapshot path is different from evaluate(). + """ + await _switch_to_url(cdp_browser, "example.com") + + snapshot = await asyncio.wait_for( + cdp_browser.get_snapshot(), timeout=30.0 + ) + print(f"\n[snapshot] tree length={len(snapshot.tree)} chars") + assert snapshot is not None + assert len(snapshot.tree) > 50, "Accessibility tree too short" + assert "heading" in snapshot.tree or "link" in snapshot.tree, ( + "Expected heading or link in example.com tree" + ) + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_snapshot_interactive_on_preopened_httpbin(cdp_browser): + """Interactive snapshot on httpbin.org/forms/post — form inputs must appear.""" + await _switch_to_url(cdp_browser, "httpbin.org") + + snapshot = await asyncio.wait_for( + cdp_browser.get_snapshot(interactive=True), timeout=30.0 + ) + print(f"\n[snapshot-interactive] {snapshot.tree[:500]}") + assert snapshot is not None + # httpbin form has text inputs and a submit button + assert len(snapshot.refs) > 0, "No refs found — interactive snapshot empty" + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_reload_on_preopened_tab(cdp_browser): + """reload_page on pre-existing tab must complete (uses _get_page_title).""" + await _switch_to_url(cdp_browser, "example.com") + + result = await asyncio.wait_for( + cdp_browser.reload_page(), timeout=30.0 + ) + print(f"\n[reload] {result}") + assert "reloaded" in result.lower() + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_evaluate_javascript_on_preopened_tab(cdp_browser): + """evaluate_javascript on pre-existing tab must complete (asyncio.wait_for guard).""" + await _switch_to_url(cdp_browser, "example.com") + + result = await asyncio.wait_for( + cdp_browser.evaluate_javascript("document.title"), + timeout=15.0, + ) + print(f"\n[eval] result={result!r}") + assert result is not None + # Should return the page title + assert "example" in str(result).lower() or isinstance(result, str) + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_verify_url_on_preopened_tab(cdp_browser): + """verify_url on pre-existing tab must work without hanging.""" + await _switch_to_url(cdp_browser, "example.com") + + result = await asyncio.wait_for( + cdp_browser.verify_url("example.com"), + timeout=15.0, + ) + print(f"\n[verify-url] {result}") + assert "PASS" in result + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_verify_title_on_preopened_tab(cdp_browser): + """verify_title on pre-existing tab uses _get_page_title (CDPSession bypass).""" + await _switch_to_url(cdp_browser, "example.com") + + result = await asyncio.wait_for( + cdp_browser.verify_title("Example"), + timeout=15.0, + ) + print(f"\n[verify-title] {result}") + assert "PASS" in result + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_screenshot_on_preopened_tab(cdp_browser): + """take_screenshot must work on a pre-existing tab.""" + await _switch_to_url(cdp_browser, "example.com") + + data = await asyncio.wait_for( + cdp_browser.take_screenshot(), timeout=15.0 + ) + print(f"\n[screenshot] {len(data)} bytes") + assert len(data) > 1000, "Screenshot data too small" + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_click_element_on_preopened_example(cdp_browser): + """click_element_by_ref on pre-existing tab must not hang. + + Root cause guarded: covered-element check used locator.evaluate() without timeout. + """ + await _switch_to_url(cdp_browser, "example.com") + snapshot = await asyncio.wait_for( + cdp_browser.get_snapshot(interactive=True), timeout=30.0 + ) + print(f"\n[click] refs available: {list(snapshot.refs.keys())[:5]}") + + # Find any link ref to click (prefer refs with a non-empty name) + link_ref = next( + (ref for ref, rd in snapshot.refs.items() if rd.role == "link" and rd.name), + None, + ) or next( + (ref for ref, rd in snapshot.refs.items() if rd.role == "link"), + None, + ) + if link_ref is None: + pytest.skip("No link found in interactive snapshot") + + result = await asyncio.wait_for( + cdp_browser.click_element_by_ref(link_ref), + timeout=15.0, + ) + print(f"\n[click] {result}") + assert "clicked" in result.lower() or "click" in result.lower() + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_input_text_on_preopened_httpbin(cdp_browser): + """input_text_by_ref on a pre-existing tab's form input must not hang. + + Root cause guarded: hidden element path used locator.evaluate() without timeout; + focus() calls used locator.evaluate("el.focus()") instead of locator.focus(). + """ + await _switch_to_url(cdp_browser, "httpbin.org") + snapshot = await asyncio.wait_for( + cdp_browser.get_snapshot(interactive=True), timeout=30.0 + ) + + # Find a textbox input ref (prefer named ones) + input_ref = next( + (ref for ref, rd in snapshot.refs.items() if rd.role == "textbox"), + None, + ) or next( + (ref for ref, rd in snapshot.refs.items() + if rd.role in {"spinbutton", "searchbox"}), + None, + ) + if input_ref is None: + pytest.skip("No text input found in httpbin interactive snapshot") + + result = await asyncio.wait_for( + cdp_browser.input_text_by_ref(input_ref, "hello world"), + timeout=15.0, + ) + print(f"\n[input] {result}") + assert "input" in result.lower() or "text" in result.lower() + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_focus_element_on_preopened_tab(cdp_browser): + """focus_element_by_ref must use locator.focus() not locator.evaluate('el.focus()').""" + await _switch_to_url(cdp_browser, "httpbin.org") + snapshot = await asyncio.wait_for( + cdp_browser.get_snapshot(interactive=True), timeout=30.0 + ) + + input_ref = next( + (ref for ref, rd in snapshot.refs.items() + if rd.role in {"textbox", "spinbutton", "searchbox"}), + None, + ) + if input_ref is None: + pytest.skip("No text input found in httpbin interactive snapshot") + + result = await asyncio.wait_for( + cdp_browser.focus_element_by_ref(input_ref), + timeout=10.0, + ) + print(f"\n[focus] {result}") + assert "focused" in result.lower() or "focus" in result.lower() + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_cdp_get_dropdown_options_on_preopened_httpbin(cdp_browser): + """get_dropdown_options_by_ref on pre-existing tab — asyncio.wait_for guard.""" + await _switch_to_url(cdp_browser, "httpbin.org") + snapshot = await asyncio.wait_for( + cdp_browser.get_snapshot(interactive=True), timeout=30.0 + ) + + select_ref = next( + (ref for ref, rd in snapshot.refs.items() + if rd.role == "combobox" and rd.name), + None, + ) or next( + (ref for ref, rd in snapshot.refs.items() if rd.role == "combobox"), + None, + ) + if select_ref is None: + pytest.skip("No options for custom comboboxes Previously _get_dropdown_option_locators tried locator.locator("option") first, which matched the hidden native + + + + + + + + @@ -119,6 +162,30 @@

Section 3: React + antd Select

logMsg('portal-select → ' + label + ' (value=' + value + ')'); } +/* --- Shadow-select portal dropdown helpers (Section 4) --- */ +function toggleShadowSelectDropdown(trigger) { + var lb = document.getElementById('status-listbox'); + var expanded = trigger.getAttribute('aria-expanded') === 'true'; + lb.style.display = expanded ? 'none' : 'block'; + trigger.setAttribute('aria-expanded', expanded ? 'false' : 'true'); + if (!expanded) { + var rect = trigger.getBoundingClientRect(); + lb.style.left = rect.left + 'px'; + lb.style.top = (rect.bottom + 2) + 'px'; + } +} +function selectShadowOption(optionEl, value, label) { + document.getElementById('status-display').textContent = label; + document.getElementById('status-listbox').style.display = 'none'; + var trigger = document.querySelector('[aria-controls="status-listbox"]'); + trigger.setAttribute('aria-expanded', 'false'); + trigger.dataset.selectedValue = value; + // Keep shadow `` + inside the trigger (for a11y / form posting) while rendering the real + visible options in a portalized ``[role='listbox']``. + + Previously ``_get_dropdown_option_locators`` preferred ``locator.locator("option")`` + first, hitting the hidden shadow ``