From eeb27c9ba758b13a39759a8aa7a3a0f64aa1f065 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 12:34:19 +0800 Subject: [PATCH 1/7] fix: tighten download-event surface after PR #28 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-ups to PR #28's always-on DownloadManager + new wait/list APIs. - _browser.py: wait_for_next_download / get_downloaded_files_text now return a mode-specific explanation in CDP-borrowed mode instead of blocking on a queue that will never receive a completion (DownloadManager is intentionally not attached in that mode — CdpDownloadRenamer handles downloads there). Without this guard the wait silently times out and the caller can't tell "no downloads" apart from "wrong mode". - _download.py: documented the two coexisting wait APIs (one-shot Future via wait_for_download vs persistent FIFO queue via wait_for_next_download) in the DownloadManager class docstring so callers pick the right one. - CLAUDE.md: bumped tool/command counts 67 → 69 (4 stale occurrences PR #28 missed). - tests/unit/test_download.py: regression test for clear_history() draining _completed_queue, covering the invariant introduced in the merge commit. - tests/unit/test_browser.py: TestBrowserDownloadMethods covers the CDP-borrowed unsupported branch on both new methods plus the empty-session fallback path. make test-quick: 1253 passed (was 1249, +4 new). --- CLAUDE.md | 8 ++--- bridgic/browser/session/_browser.py | 25 +++++++++++++- bridgic/browser/session/_download.py | 20 ++++++++++++ tests/unit/test_browser.py | 49 ++++++++++++++++++++++++++++ tests/unit/test_download.py | 25 ++++++++++++++ 5 files changed, 122 insertions(+), 5 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index afb88a1..1900b92 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -**Bridgic Browser** is an LLM-driven browser automation library built on Playwright with built-in stealth mode. It provides 67 browser tools organized into categories, an accessibility tree-based snapshot system, a stable element reference system (refs like "1f79fe5e", "8d4b03a9", …) designed for reliable AI agent interactions, and a `bridgic-browser` CLI tool backed by a persistent daemon. +**Bridgic Browser** is an LLM-driven browser automation library built on Playwright with built-in stealth mode. It provides 69 browser tools organized into categories, an accessibility tree-based snapshot system, a stable element reference system (refs like "1f79fe5e", "8d4b03a9", …) designed for reliable AI agent interactions, and a `bridgic-browser` CLI tool backed by a persistent daemon. ## Commands @@ -57,7 +57,7 @@ bridgic/browser/ ├── _redact.py # Log redaction helpers ├── errors.py # Public BridgicBrowserError hierarchy ├── session/ # Core browser session -│ ├── _browser.py # Browser class – main entry point (all 67 tool methods live here) +│ ├── _browser.py # Browser class – main entry point (all 69 tool methods live here) │ ├── _browser_model.py # Data models │ ├── _snapshot.py # SnapshotGenerator + EnhancedSnapshot + RefData │ ├── _stealth.py # StealthConfig + StealthArgsBuilder (50+ Chrome args) @@ -67,12 +67,12 @@ bridgic/browser/ │ ├── _launch.py # launch-mode helpers (retriable_launch, etc.) │ ├── _locator_utils.py # _click_checkable_target and other locator helpers │ └── _errors.py # session-internal error types -├── tools/ # 67 automation tools (all implemented in _browser.py) +├── tools/ # 69 automation tools (all implemented in _browser.py) │ ├── _browser_tool_set_builder.py # BrowserToolSetBuilder (category/name selection) │ └── _browser_tool_spec.py # BrowserToolSpec (wraps tool for agents) └── cli/ # CLI tool (bridgic-browser command) ├── __init__.py # Exports main() - ├── _commands.py # Click command definitions (67 commands, SectionedGroup) + ├── _commands.py # Click command definitions (69 commands, SectionedGroup) ├── _client.py # Socket client: send_command(), ensure_daemon_running() ├── _daemon.py # Daemon: asyncio Unix socket server + Browser instance └── _transport.py # Unix-socket transport layer (used by client and daemon) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index d281b2e..326db55 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -725,7 +725,20 @@ def downloaded_files(self) -> List[DownloadedFile]: return [] async def get_downloaded_files_text(self) -> str: - """Return a human-readable summary of all downloads in this session.""" + """Return a human-readable summary of all downloads in this session. + + Returns an explanatory message in CDP-borrowed mode (where downloads + are renamed in place by ``CdpDownloadRenamer`` and never enter + ``DownloadManager``'s tracking list), so callers can tell "no downloads + happened" apart from "this mode doesn't track downloads". + """ + if self._is_cdp_borrowed: + return ( + "Download tracking is not available in CDP-borrowed mode. " + "Downloads in this mode are renamed in place by CdpDownloadRenamer " + "and do not pass through DownloadManager. Inspect downloads_path " + "directly to find the saved files." + ) files = self.downloaded_files if not files: return "No downloads in this session." @@ -740,7 +753,17 @@ async def wait_for_next_download(self, timeout: float = 30.0) -> str: """Wait up to *timeout* seconds for the next download to complete. Returns a one-line summary of the downloaded file, or a timeout message. + Returns an explanatory message in CDP-borrowed mode — DownloadManager + is intentionally not attached there (downloads are handled by + ``CdpDownloadRenamer`` instead), so blocking on the completion queue + would always time out and confuse callers. """ + if self._is_cdp_borrowed: + return ( + "wait_for_next_download is not supported in CDP-borrowed mode. " + "Downloads in this mode are renamed in place by CdpDownloadRenamer " + "and do not go through DownloadManager." + ) if not self._download_manager: return "Download manager not available." file = await self._download_manager.wait_for_next_download(timeout=timeout) diff --git a/bridgic/browser/session/_download.py b/bridgic/browser/session/_download.py index c404524..efd6603 100644 --- a/bridgic/browser/session/_download.py +++ b/bridgic/browser/session/_download.py @@ -91,6 +91,26 @@ class DownloadManager: 2. When a download starts, waits for it to complete 3. Uses `download.suggested_filename` to get the original name 4. Saves the file using `download.save_as()` with the correct name + + Wait APIs (two distinct mechanisms — pick the one that matches your call + site, they are not interchangeable) + -------------------------------------------------------------------------- + * ``wait_for_download(page, action, timeout)`` — **one-shot Future**, bound + to a specific trigger ``action`` you pass in. Concurrent callers each + get their own Future and are fulfilled FIFO. Times out silently and + leaves no residue. Use when you control both the click and the wait in + the same async scope. + + * ``wait_for_next_download(timeout)`` — **persistent FIFO queue**, + independent of any trigger. Every successful download is pushed onto + the queue regardless of who (if anyone) is waiting; callers drain it + one item at a time. Cleared by :meth:`clear_history`. Use when the + trigger and the wait happen in separate calls (e.g. a CLI command + issues the click, a later CLI command waits for completion). + + Both APIs coexist: a single completed download wakes the oldest + ``wait_for_download`` Future *and* lands on ``_completed_queue``. That is + intentional — the two surfaces have different lifetimes and audiences. """ def __init__( diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index ee57677..c132ffe 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -185,6 +185,55 @@ def test_get_config(self): assert config["use_persistent_context"] is True +class TestBrowserDownloadMethods: + """Tests for Browser.wait_for_next_download / get_downloaded_files_text + surface — in particular the explicit unsupported-in-CDP-borrowed-mode + behavior so callers can tell the modes apart instead of blocking until + timeout.""" + + @pytest.mark.asyncio + async def test_wait_for_next_download_in_cdp_borrowed_mode_returns_explanation( + self, + ): + browser = Browser() + # Force the CDP-borrowed state without actually starting Playwright: + # _is_cdp_borrowed == bool(_cdp_resolved) and not _cdp_context_owned. + browser._cdp_resolved = "ws://localhost:9222/devtools/browser/test" + assert browser._is_cdp_borrowed is True + + result = await browser.wait_for_next_download(timeout=30.0) + + assert "not supported in CDP-borrowed mode" in result + # The completion queue must NOT have been awaited (no blocking). + assert browser.download_manager._completed_queue.qsize() == 0 + + @pytest.mark.asyncio + async def test_get_downloaded_files_text_in_cdp_borrowed_mode_returns_explanation( + self, + ): + browser = Browser() + browser._cdp_resolved = "ws://localhost:9222/devtools/browser/test" + assert browser._is_cdp_borrowed is True + + result = await browser.get_downloaded_files_text() + + assert "not available in CDP-borrowed mode" in result + assert "CdpDownloadRenamer" in result + + @pytest.mark.asyncio + async def test_get_downloaded_files_text_empty_session_message(self): + """Non-CDP-borrowed mode with zero downloads still uses the generic + 'No downloads in this session.' message — the CDP-borrowed branch + must not short-circuit it.""" + browser = Browser() + # Default fresh Browser: _cdp_resolved is None → _is_cdp_borrowed False. + assert browser._is_cdp_borrowed is False + + result = await browser.get_downloaded_files_text() + + assert result == "No downloads in this session." + + class TestBrowserLaunchOptions: """Tests for Browser launch options generation.""" diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index 3539dcb..e8de7cc 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -162,6 +162,31 @@ def test_clear_history(self, temp_downloads_dir): assert len(manager.downloaded_files) == 0 + @pytest.mark.asyncio + async def test_clear_history_drains_completed_queue(self, temp_downloads_dir): + """clear_history() must also drain _completed_queue so a subsequent + wait_for_next_download() does not immediately yield a stale completion + from before the clear.""" + manager = DownloadManager(downloads_path=temp_downloads_dir) + + # Seed the completion queue as if a download had finished earlier. + manager._completed_queue.put_nowait( + DownloadedFile( + url="https://example.com/old.pdf", + path="/test/old.pdf", + file_name="old.pdf", + file_size=1, + ) + ) + assert manager._completed_queue.qsize() == 1 + + manager.clear_history() + + # Queue is empty → the next wait must time out, not return the stale file. + assert manager._completed_queue.qsize() == 0 + result = await manager.wait_for_next_download(timeout=0.05) + assert result is None + def test_get_downloads_by_type(self, temp_downloads_dir): """Test filtering downloads by file type.""" manager = DownloadManager(downloads_path=temp_downloads_dir) From a44383bfc0f7af49cd0bc19114e744b47edaa3d4 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 14:32:40 +0800 Subject: [PATCH 2/7] test(integration): adapt popup-follow download bookkeeping test to PR #28 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #28 made `Browser._download_manager` always non-None (defaulting to ~/Downloads when downloads_path is unset). The original setup in `test_download_manager_reattached_on_popup_follow` was gated on `_download_manager is None` to inject a fresh manager AND attach it to bridgic's page. After PR #28 the inject branch never runs, but production code in CDP-borrowed mode intentionally does NOT attach DownloadManager at _start() (downloads route through CdpDownloadRenamer there) — so `_page_handlers` was empty and the bookkeeping assertion failed. Rewrote the setup to be idempotent: keep the optional manager-creation path, then always ensure `self._page` is attached. The test verifies the *migration* logic in `_switch_self_page_to`, not the production decision to skip the initial attach. No production code change. Only this integration test was affected. --- tests/integration/test_owned_pages.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_owned_pages.py b/tests/integration/test_owned_pages.py index 9f09171..51e884b 100644 --- a/tests/integration/test_owned_pages.py +++ b/tests/integration/test_owned_pages.py @@ -443,11 +443,20 @@ async def test_download_manager_reattached_on_popup_follow(cdp_browser, tmp_path We don't trigger an actual download (data: URLs cannot drive Chrome's download path); instead we verify the attachment bookkeeping in the DownloadManager moved with self._page.""" - # Inject a fresh DownloadManager so the assertion below has something to - # observe. The fixture's `Browser(downloads_path=None)` skips creating one. + # Wire up DownloadManager → bridgic's own page so the popup-follow + # re-attach logic in `_switch_self_page_to` has a state to migrate. + # + # Production code in CDP-borrowed mode intentionally does NOT attach + # DownloadManager at `_start()` (downloads in this mode go through + # CdpDownloadRenamer instead — see `_browser.py:_start()` comments). + # Since PR #28 a fresh `_download_manager` is always created, but + # since it's never wired to a page in this mode we attach manually + # here. The test verifies the *bookkeeping migration*, not the + # production decision to skip the initial attach. from bridgic.browser.session._download import DownloadManager if cdp_browser._download_manager is None: cdp_browser._download_manager = DownloadManager(downloads_path=str(tmp_path)) + if str(id(cdp_browser._page)) not in cdp_browser._download_manager._page_handlers: cdp_browser._download_manager.attach_to_page(cdp_browser._page) dm = cdp_browser._download_manager From 5dfd1067427581b8025c6b4b18f0e2dbdc0eb986 Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 14:43:00 +0800 Subject: [PATCH 3/7] fix: attach DownloadManager page-scoped in CDP-borrowed mode at _start() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align DownloadManager attach timing between _start() and _switch_self_page_to. Before: _start() in CDP-borrowed mode intentionally skipped attaching the DownloadManager ("Borrowed context: NOT attached"), but _switch_self_page_to already did a detach-old + attach-new on popup follow, implicitly assuming the old page was already attached. The two pieces of code disagreed: the first popup follow's detach was a no-op and its attach was actually a first-time attach, leaving the bridgic-initial-page's bookkeeping in _page_handlers permanently empty. After: _start() in CDP-borrowed mode now attaches the DownloadManager page-scoped to bridgic's own tab (`self._page`). The registered handler remains effectively dormant — Playwright doesn't fire `download` events in CDP-borrowed mode because CdpDownloadRenamer routed the path out of artifactsDir — but `_page_handlers` is now in sync with `self._page` from the first moment, giving _switch_self_page_to a well-defined initial state. Context (does NOT change): - Owned-context CDP: still attach_to_context (all pages belong to bridgic). - Borrowed-context: NOT attach_to_context (would hijack user tabs — privacy boundary). Only attach_to_page on bridgic's own tab. Updated test_download_manager_NOT_attached_in_borrowed_context → renamed to test_download_manager_page_scoped_attach_in_borrowed_context to reflect the new invariant (attach_to_context NOT called, attach_to_page called exactly once with bridgic's tab). make test-quick: 1253 passed. --- bridgic/browser/session/_browser.py | 25 ++++++++++++++++--------- tests/unit/test_browser.py | 19 ++++++++++++------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 326db55..dd01dd5 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -1712,15 +1712,22 @@ async def _start(self) -> None: # Playwright's per-context setDownloadBehavior(allowAndName) # still routes downloads through the artifactsDir, so # DownloadManager.save_as() can copy files to downloads_path. - # - Borrowed context: NOT attached. Our L1 override took the - # default context to allow + downloadPath, so Chrome writes - # directly to the final path; bridgic is not in the - # file-transfer loop. Trying to `save_as` here would block - # forever (Playwright no longer receives - # Browser.downloadProgress(completed) once the path moved - # out of artifactsDir). - if self._download_manager and self._cdp_context_owned: - self._download_manager.attach_to_context(self._context) + # - Borrowed context: page-scoped attach to bridgic's own tab + # only. Attaching to the whole borrowed context would hijack + # the user's other tabs — disallowed by the borrowed-mode + # privacy boundary. The L1 override above routed downloads + # on bridgic's tab through CdpDownloadRenamer, so Playwright + # does NOT fire `download` events here (the path moved out + # of artifactsDir) and the registered handler stays dormant. + # We attach anyway so `_page_handlers` is kept in sync with + # `self._page` and the popup-follow migration in + # `_switch_self_page_to` has a well-defined initial state + # to detach from. + if self._download_manager: + if self._cdp_context_owned: + self._download_manager.attach_to_context(self._context) + else: + self._download_manager.attach_to_page(self._page) logger.info("Playwright started (mode=cdp, stealth_js=%s)", self.stealth_enabled) return diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index c132ffe..02ecd7f 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -2201,13 +2201,18 @@ async def test_cdp_new_page_called_unconditionally(self): assert browser._page is mock_pg @pytest.mark.asyncio - async def test_download_manager_NOT_attached_in_borrowed_context(self, tmp_path): + async def test_download_manager_page_scoped_attach_in_borrowed_context( + self, tmp_path + ): """In CDP borrowed-context mode the download manager must NOT attach - anywhere. L1's revoke restored Chrome's native download path, so - Playwright never receives `Browser.downloadProgress(completed)`. A - page-scoped attach would leak a hung `save_as()` task per download. - Chrome handles downloads natively (potentially via its 'Save As' - dialog); programmatic capture requires owned mode.""" + to the whole context (that would hijack the user's other tabs — + disallowed by the borrowed-mode privacy boundary), but it SHOULD + attach page-scoped to bridgic's own tab. The registered handler is + effectively dormant — Playwright never fires `download` events in + CDP-borrowed mode (downloads route through CdpDownloadRenamer + instead) — but keeping `_page_handlers` in sync with `self._page` + gives the popup-follow migration in `_switch_self_page_to` a + well-defined initial state to detach from.""" mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks() downloads_dir = tmp_path / "dl" downloads_dir.mkdir() @@ -2222,7 +2227,7 @@ async def test_download_manager_NOT_attached_in_borrowed_context(self, tmp_path) patch.object(browser._download_manager, "attach_to_page") as mock_attach_pg: await browser._start() mock_attach_ctx.assert_not_called() - mock_attach_pg.assert_not_called() + mock_attach_pg.assert_called_once_with(mock_pg) @pytest.mark.asyncio async def test_download_manager_attached_owned_context(self, tmp_path): From 96910c307ec00e84928b267a3925317a5a1ae2de Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 15:58:32 +0800 Subject: [PATCH 4/7] Revert "fix: attach DownloadManager page-scoped in CDP-borrowed mode at _start()" This reverts commit 5dfd1067427581b8025c6b4b18f0e2dbdc0eb986. --- bridgic/browser/session/_browser.py | 25 +++++++++---------------- tests/unit/test_browser.py | 19 +++++++------------ 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index dd01dd5..326db55 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -1712,22 +1712,15 @@ async def _start(self) -> None: # Playwright's per-context setDownloadBehavior(allowAndName) # still routes downloads through the artifactsDir, so # DownloadManager.save_as() can copy files to downloads_path. - # - Borrowed context: page-scoped attach to bridgic's own tab - # only. Attaching to the whole borrowed context would hijack - # the user's other tabs — disallowed by the borrowed-mode - # privacy boundary. The L1 override above routed downloads - # on bridgic's tab through CdpDownloadRenamer, so Playwright - # does NOT fire `download` events here (the path moved out - # of artifactsDir) and the registered handler stays dormant. - # We attach anyway so `_page_handlers` is kept in sync with - # `self._page` and the popup-follow migration in - # `_switch_self_page_to` has a well-defined initial state - # to detach from. - if self._download_manager: - if self._cdp_context_owned: - self._download_manager.attach_to_context(self._context) - else: - self._download_manager.attach_to_page(self._page) + # - Borrowed context: NOT attached. Our L1 override took the + # default context to allow + downloadPath, so Chrome writes + # directly to the final path; bridgic is not in the + # file-transfer loop. Trying to `save_as` here would block + # forever (Playwright no longer receives + # Browser.downloadProgress(completed) once the path moved + # out of artifactsDir). + if self._download_manager and self._cdp_context_owned: + self._download_manager.attach_to_context(self._context) logger.info("Playwright started (mode=cdp, stealth_js=%s)", self.stealth_enabled) return diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index 02ecd7f..c132ffe 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -2201,18 +2201,13 @@ async def test_cdp_new_page_called_unconditionally(self): assert browser._page is mock_pg @pytest.mark.asyncio - async def test_download_manager_page_scoped_attach_in_borrowed_context( - self, tmp_path - ): + async def test_download_manager_NOT_attached_in_borrowed_context(self, tmp_path): """In CDP borrowed-context mode the download manager must NOT attach - to the whole context (that would hijack the user's other tabs — - disallowed by the borrowed-mode privacy boundary), but it SHOULD - attach page-scoped to bridgic's own tab. The registered handler is - effectively dormant — Playwright never fires `download` events in - CDP-borrowed mode (downloads route through CdpDownloadRenamer - instead) — but keeping `_page_handlers` in sync with `self._page` - gives the popup-follow migration in `_switch_self_page_to` a - well-defined initial state to detach from.""" + anywhere. L1's revoke restored Chrome's native download path, so + Playwright never receives `Browser.downloadProgress(completed)`. A + page-scoped attach would leak a hung `save_as()` task per download. + Chrome handles downloads natively (potentially via its 'Save As' + dialog); programmatic capture requires owned mode.""" mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks() downloads_dir = tmp_path / "dl" downloads_dir.mkdir() @@ -2227,7 +2222,7 @@ async def test_download_manager_page_scoped_attach_in_borrowed_context( patch.object(browser._download_manager, "attach_to_page") as mock_attach_pg: await browser._start() mock_attach_ctx.assert_not_called() - mock_attach_pg.assert_called_once_with(mock_pg) + mock_attach_pg.assert_not_called() @pytest.mark.asyncio async def test_download_manager_attached_owned_context(self, tmp_path): From f60b0ce22b6a02a9c9d86ce5cd60b75e1054191a Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 16:02:34 +0800 Subject: [PATCH 5/7] feat(download): bounded completion queue + external-download record API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DownloadManager (`_download.py`) gains two capabilities that the rest of PR #31's surface depends on: 1. **Sliding-window `_completed_queue`** — bounded at `maxsize=256` so a long-lived daemon session with many downloads but no `wait-download` consumer no longer accumulates `DownloadedFile` objects indefinitely. `_enqueue_completed()` is the single push point: it evicts the oldest entry on `QueueFull` so the queue stays a fresh window. Full history is still available via `_downloaded_files`. `_handle_download` switched to the new helper. 2. **`record_external_download()`** — public entry for non-Playwright pipelines (specifically `CdpDownloadRenamer` in CDP-borrowed mode, see the next commit) to inject a completion record. Mirrors the post-save bookkeeping of `_handle_download`: appends to `_downloaded_files`, calls `_enqueue_completed`, and wakes the oldest pending Future waiter. Lets `wait_for_next_download` / `downloaded_files` / `get_downloaded_files_text` work uniformly across all pipelines. Class docstring also documents the two coexisting wait APIs (`wait_for_download` one-shot Future bound to a trigger vs `wait_for_next_download` persistent FIFO queue) so callers pick the right one. Tests: `test_completed_queue_evicts_oldest_when_full` covers the sliding-window eviction behavior. --- bridgic/browser/session/_download.py | 51 ++++++++++++++++++++++++++-- tests/unit/test_download.py | 26 ++++++++++++++ 2 files changed, 74 insertions(+), 3 deletions(-) diff --git a/bridgic/browser/session/_download.py b/bridgic/browser/session/_download.py index efd6603..f863cb8 100644 --- a/bridgic/browser/session/_download.py +++ b/bridgic/browser/session/_download.py @@ -13,6 +13,7 @@ import re import tempfile import time +from contextlib import suppress from dataclasses import dataclass, field from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional @@ -167,11 +168,17 @@ def __init__( # own Future; _handle_download fulfils the oldest pending waiter on # completion so callers do not stomp on each other's callbacks. self._pending_waiters: List[asyncio.Future[DownloadedFile]] = [] - # Persistent FIFO of all successfully saved downloads. Drained by + # Sliding-window FIFO of recently saved downloads. Drained by # wait_for_next_download() — independent from _pending_waiters so the # CLI's `wait-download` command can pick up completions that already # happened before the agent asked. - self._completed_queue: asyncio.Queue[DownloadedFile] = asyncio.Queue() + # + # Bounded (maxsize=256) so long-lived daemon sessions with many + # downloads but no `wait-download` consumer don't accumulate + # DownloadedFile objects indefinitely. When full, the oldest entry + # is evicted via `_enqueue_completed`. Full history is still + # available via `_downloaded_files`. + self._completed_queue: asyncio.Queue[DownloadedFile] = asyncio.Queue(maxsize=256) @property def downloads_path(self) -> Path: @@ -373,7 +380,7 @@ async def _handle_download(self, download: "Download") -> None: ) self._downloaded_files.append(downloaded_file) - self._completed_queue.put_nowait(downloaded_file) + self._enqueue_completed(downloaded_file) logger.info(f"Download saved: {target_path} ({file_size} bytes)") # Call complete callback if configured @@ -572,6 +579,44 @@ async def wait_for_download( if not waiter.done(): waiter.cancel() + def _enqueue_completed(self, downloaded_file: DownloadedFile) -> None: + """Push to the sliding-window completion queue. + + If the queue is at maxsize (no consumer is draining it), drop the + oldest entry to keep memory bounded. Full history remains in + :attr:`_downloaded_files` — only the *recent-completions* surface + used by :meth:`wait_for_next_download` shrinks. + """ + try: + self._completed_queue.put_nowait(downloaded_file) + except asyncio.QueueFull: + with suppress(asyncio.QueueEmpty): + self._completed_queue.get_nowait() + self._completed_queue.put_nowait(downloaded_file) + + def record_external_download(self, downloaded_file: DownloadedFile) -> None: + """Inject a completion record from a non-Playwright pipeline. + + Used in CDP-borrowed mode where :class:`CdpDownloadRenamer` handles + the actual rename (Playwright never sees these downloads because the + path moved out of ``artifactsDir``). The renamer calls this method + after a successful rename so the unified surfaces + (``downloaded_files`` / :meth:`wait_for_next_download` / + :meth:`wait_for_download`) work the same way as in non-CDP / CDP-owned + modes. + + Mirrors the post-save bookkeeping of :meth:`_handle_download`: + appends to ``_downloaded_files``, enqueues into the sliding window, + and wakes the oldest pending Future waiter (if any). + """ + self._downloaded_files.append(downloaded_file) + self._enqueue_completed(downloaded_file) + while self._pending_waiters: + waiter = self._pending_waiters.pop(0) + if not waiter.done(): + waiter.set_result(downloaded_file) + break + async def wait_for_next_download(self, timeout: float = 30.0) -> Optional[DownloadedFile]: """Wait up to *timeout* seconds for the next download to complete. diff --git a/tests/unit/test_download.py b/tests/unit/test_download.py index e8de7cc..eb89736 100644 --- a/tests/unit/test_download.py +++ b/tests/unit/test_download.py @@ -187,6 +187,32 @@ async def test_clear_history_drains_completed_queue(self, temp_downloads_dir): result = await manager.wait_for_next_download(timeout=0.05) assert result is None + @pytest.mark.asyncio + async def test_completed_queue_evicts_oldest_when_full(self, temp_downloads_dir): + """_enqueue_completed drops the oldest entry when the queue is at + maxsize, rather than blocking the download handler task. Production + uses maxsize=256; shrink it here so the test is fast.""" + manager = DownloadManager(downloads_path=temp_downloads_dir) + manager._completed_queue = asyncio.Queue(maxsize=2) + + def _mk(name): + return DownloadedFile( + url=f"https://example.com/{name}", + path=f"/tmp/{name}", + file_name=name, + file_size=1, + ) + + manager._enqueue_completed(_mk("a")) + manager._enqueue_completed(_mk("b")) + manager._enqueue_completed(_mk("c")) # evicts "a" + + assert manager._completed_queue.qsize() == 2 + first = await manager.wait_for_next_download(timeout=0.05) + second = await manager.wait_for_next_download(timeout=0.05) + assert first is not None and second is not None + assert [first.file_name, second.file_name] == ["b", "c"] + def test_get_downloads_by_type(self, temp_downloads_dir): """Test filtering downloads by file type.""" manager = DownloadManager(downloads_path=temp_downloads_dir) From aaa60e45f632ffb630814be218b2cf5048d1b7aa Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 16:03:01 +0800 Subject: [PATCH 6/7] feat(cdp-borrowed): pipe CdpDownloadRenamer completions into DownloadManager MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #31 followup: surface CDP-borrowed downloads through the unified DownloadManager APIs (`wait_for_next_download`, `get_downloaded_files_text`, `downloaded_files`) instead of returning "not supported in CDP-borrowed mode" placeholder messages. Before this change, agents using `--cdp auto` saw the explanatory string and had no way to observe download completions via the standard CLI / SDK surface. **`CdpDownloadRenamer` (`_cdp_download_renamer.py`)** - Optional `on_completed: Callable[[DownloadedFile], None]` constructor arg. - `_Pending` gains a `url` field captured from `Browser.downloadWillBegin` so the callback can build a fully-populated `DownloadedFile`. - `_finalize_rename()` calls the callback after a successful rename with filename, real path, byte count from `final.stat().st_size`, file_type from extension, and the original suggestedFilename. Callback exceptions are caught and warning-logged so a buggy consumer cannot corrupt the rename pipeline. **`Browser` (`_browser.py`)** - `_start()` constructs the renamer with `on_completed=self._download_manager.record_external_download`. CDP-borrowed completions now flow into DM and populate the unified surface. - DM is still NOT attached to the borrowed context (would hijack user tabs) or page-scoped to bridgic's own tab (the latter was tried in the reverted commit and produced 0-byte placeholder files because Playwright still fires `download` events in CDP-borrowed mode even when `setDownloadBehavior(allowAndName)` routes the file away from `artifactsDir`). The comment block documents this so the lesson is not re-learned. - `_switch_self_page_to` no longer touches DM attach/detach — the legacy CDP-borrowed migration block was tied to the reverted page-scoped attach and is no longer needed. - `wait_for_next_download` / `get_downloaded_files_text` lose their `_is_cdp_borrowed` short-circuits — they now serve the unified surface. - `_format_file_size()` extracted as a static helper, used by both methods (was duplicated inline). **Tests** - `tests/unit/test_browser.py::TestBrowserDownloadMethods` reworked: drop the two "CDP-borrowed unsupported" assertions; add `test_wait_for_next_download_cdp_borrowed_via_record_external`, `test_get_downloaded_files_text_cdp_borrowed_via_record_external`, and a non-CDP happy-path summary-string test. - `tests/unit/test_browser.py::test_download_manager_NOT_attached_in_borrowed_context` restored: both `attach_to_context` and `attach_to_page` must NOT be called for the borrowed context. - `tests/unit/test_cdp_download_renamer.py::TestOnCompletedCallback`: three tests for the new callback wiring (populated DownloadedFile delivered, no-callback default still renames, callback exception does not break the rename pipeline). - `tests/integration/test_owned_pages.py::test_popup_follow_does_not_attach_download_manager` replaces the legacy popup-follow bookkeeping test (which assumed the reverted page-scoped attach). New invariant: `_page_handlers` stays empty before and after popup follow in CDP-borrowed mode. Real-world verification: clicked the bridgic 0.0.5 release `.whl` from `https://github.com/bitsky-tech/bridgic-browser/releases/tag/v0.0.5` in `--cdp auto` mode. `wait-download 30` returned `"Download complete: bridgic_browser-0.0.5-py3-none-any.whl — 242.6 KB — /...whl"`, `downloads` listed exactly one entry, and no 0-byte placeholder appeared in `~/Downloads`. `make test-quick`: 1258 passed. --- bridgic/browser/session/_browser.py | 111 +++++++-------- .../browser/session/_cdp_download_renamer.py | 52 ++++++- tests/integration/test_owned_pages.py | 47 +++---- tests/unit/test_browser.py | 127 +++++++++++++----- tests/unit/test_cdp_download_renamer.py | 93 +++++++++++++ 5 files changed, 313 insertions(+), 117 deletions(-) diff --git a/bridgic/browser/session/_browser.py b/bridgic/browser/session/_browser.py index 326db55..f9b5422 100644 --- a/bridgic/browser/session/_browser.py +++ b/bridgic/browser/session/_browser.py @@ -724,54 +724,49 @@ def downloaded_files(self) -> List[DownloadedFile]: return self._download_manager.downloaded_files return [] + @staticmethod + def _format_file_size(size_bytes: int) -> str: + """Format byte count as KB or MB with appropriate precision.""" + size_kb = size_bytes / 1024 + if size_kb >= 1024: + return f"{size_kb / 1024:.2f} MB" + return f"{size_kb:.1f} KB" + async def get_downloaded_files_text(self) -> str: """Return a human-readable summary of all downloads in this session. - Returns an explanatory message in CDP-borrowed mode (where downloads - are renamed in place by ``CdpDownloadRenamer`` and never enter - ``DownloadManager``'s tracking list), so callers can tell "no downloads - happened" apart from "this mode doesn't track downloads". + Works across all pipelines: + - non-CDP / CDP-owned: entries come from Playwright's `download` + event via ``DownloadManager._handle_download``. + - CDP-borrowed: entries come from ``CdpDownloadRenamer`` via + ``DownloadManager.record_external_download``. """ - if self._is_cdp_borrowed: - return ( - "Download tracking is not available in CDP-borrowed mode. " - "Downloads in this mode are renamed in place by CdpDownloadRenamer " - "and do not pass through DownloadManager. Inspect downloads_path " - "directly to find the saved files." - ) files = self.downloaded_files if not files: return "No downloads in this session." - lines = [] - for i, f in enumerate(files, 1): - size_kb = f.file_size / 1024 - size_str = f"{size_kb / 1024:.2f} MB" if size_kb >= 1024 else f"{size_kb:.1f} KB" - lines.append(f"[{i}] {f.file_name} — {size_str} — {f.path}") + lines = [ + f"[{i}] {f.file_name} — {self._format_file_size(f.file_size)} — {f.path}" + for i, f in enumerate(files, 1) + ] return "\n".join(lines) async def wait_for_next_download(self, timeout: float = 30.0) -> str: """Wait up to *timeout* seconds for the next download to complete. - Returns a one-line summary of the downloaded file, or a timeout message. - Returns an explanatory message in CDP-borrowed mode — DownloadManager - is intentionally not attached there (downloads are handled by - ``CdpDownloadRenamer`` instead), so blocking on the completion queue - would always time out and confuse callers. + Returns a one-line summary of the downloaded file, or a timeout + message. Works across all pipelines — in CDP-borrowed mode the + completion record arrives via ``CdpDownloadRenamer`` → + ``DownloadManager.record_external_download`` after the rename. """ - if self._is_cdp_borrowed: - return ( - "wait_for_next_download is not supported in CDP-borrowed mode. " - "Downloads in this mode are renamed in place by CdpDownloadRenamer " - "and do not go through DownloadManager." - ) if not self._download_manager: return "Download manager not available." file = await self._download_manager.wait_for_next_download(timeout=timeout) if file is None: return f"No download completed within {timeout:.0f}s timeout." - size_kb = file.file_size / 1024 - size_str = f"{size_kb / 1024:.2f} MB" if size_kb >= 1024 else f"{size_kb:.1f} KB" - return f"Download complete: {file.file_name} — {size_str} — {file.path}" + return ( + f"Download complete: {file.file_name} — " + f"{self._format_file_size(file.file_size)} — {file.path}" + ) @property def headless(self) -> bool: @@ -1687,8 +1682,18 @@ async def _start(self) -> None: if override_ok: self._current_cdp_download_path = take_over_path try: + # Pipe successful renames into DownloadManager + # so CDP-borrowed downloads surface through the + # same downloaded_files / wait_for_next_download + # API as non-CDP / CDP-owned modes. + on_completed = ( + self._download_manager.record_external_download + if self._download_manager is not None + else None + ) self._cdp_download_renamer = CdpDownloadRenamer( - default_dir=take_over_path + default_dir=take_over_path, + on_completed=on_completed, ) await self._cdp_download_renamer.attach( self._cdp_download_session @@ -1712,13 +1717,19 @@ async def _start(self) -> None: # Playwright's per-context setDownloadBehavior(allowAndName) # still routes downloads through the artifactsDir, so # DownloadManager.save_as() can copy files to downloads_path. - # - Borrowed context: NOT attached. Our L1 override took the - # default context to allow + downloadPath, so Chrome writes - # directly to the final path; bridgic is not in the - # file-transfer loop. Trying to `save_as` here would block - # forever (Playwright no longer receives - # Browser.downloadProgress(completed) once the path moved - # out of artifactsDir). + # - Borrowed context: NOT attached anywhere. Attaching to the + # whole context would hijack user tabs (privacy boundary); + # attaching page-scoped to bridgic's own tab was empirically + # verified to cause duplicate-record bugs — Playwright STILL + # fires `download` events in CDP-borrowed mode (the + # assumption that "path moved out of artifactsDir suppresses + # the event" is wrong with `setDownloadBehavior(allowAndName)` + # on a page session), and `_handle_download.save_as` writes a + # 0-byte placeholder to the DM default dir. Instead, + # `CdpDownloadRenamer.on_completed` pipes the real completion + # back into DM via `record_external_download`, which + # populates `downloaded_files` / `_completed_queue` / + # `_pending_waiters` without going through `_handle_download`. if self._download_manager and self._cdp_context_owned: self._download_manager.attach_to_context(self._context) @@ -2981,21 +2992,15 @@ async def _switch_self_page_to(self, new_page: Page) -> None: await self._switch_video_to_page(new_page) except Exception as e: logger.debug("[_switch_self_page_to] video switch failed: %s", e) - # CDP-borrowed mode attaches DownloadManager per-page (not per-context) - # to avoid hijacking the user's private downloads. Migrate handlers so - # downloads triggered from the followed popup still land in bridgic's - # downloads_path. - if self._is_cdp_borrowed and self._download_manager and old is not None: - try: - self._download_manager.detach_from_page(old) - except Exception: - pass - try: - self._download_manager.attach_to_page(new_page) - except Exception as e: - logger.debug( - "[_switch_self_page_to] download manager re-attach failed: %s", e - ) + # No DownloadManager migration here: in CDP-borrowed mode the manager + # is intentionally NOT attached to any page (see `_start()` comments — + # attaching causes duplicate-record bugs because Playwright still + # fires `download` events even when allowAndName routes the file + # away from artifactsDir). CdpDownloadRenamer handles bridgic's + # primary tab via `record_external_download`; popup-triggered + # CDP-borrowed downloads are out of scope (see KNOWN_LIMITATIONS). + # In non-CDP / CDP-owned modes the global `attach_to_context` is the + # active pipeline, so no per-page migration is needed either. async def _select_fallback_page(self, closed_page: Page) -> Optional[Page]: """Pick the next `self._page` after `closed_page` is closed. diff --git a/bridgic/browser/session/_cdp_download_renamer.py b/bridgic/browser/session/_cdp_download_renamer.py index f169a98..6b10329 100644 --- a/bridgic/browser/session/_cdp_download_renamer.py +++ b/bridgic/browser/session/_cdp_download_renamer.py @@ -28,6 +28,8 @@ from pathlib import Path from typing import Any, Callable, Dict, Optional, Tuple +from bridgic.browser.session._download import DownloadedFile + logger = logging.getLogger(__name__) @@ -115,11 +117,14 @@ class _Pending: """Per-download state captured at ``downloadWillBegin`` time. ``target_dir`` is snapshotted so that a hot ``set_default_dir`` call does - not retarget downloads already in flight. + not retarget downloads already in flight. ``url`` is captured so the + completion callback can construct a fully-populated ``DownloadedFile`` + record. """ sanitized_name: str target_dir: Path + url: str = "" class CdpDownloadRenamer: @@ -128,14 +133,28 @@ class CdpDownloadRenamer: Lifecycle: ``attach`` → ``set_default_dir`` (as many as needed) → ``detach``. Thread-safety: events arrive on the asyncio loop thread; all state mutation happens in the same thread, no locking required. + + The optional ``on_completed`` callback is invoked once per successfully + renamed download with a fully-populated :class:`DownloadedFile`. This is + how CDP-borrowed-mode completions are surfaced to ``DownloadManager``'s + unified tracking lists (``downloaded_files`` / ``wait_for_next_download``) + — Playwright never fires ``download`` events in CDP-borrowed mode (the + path moved out of ``artifactsDir``) so this is the only available hook. + Callback exceptions are caught and logged; they cannot corrupt the + rename pipeline. """ - def __init__(self, default_dir: Path) -> None: + def __init__( + self, + default_dir: Path, + on_completed: Optional[Callable[[DownloadedFile], None]] = None, + ) -> None: self._default_dir: Path = Path(default_dir) self._pending: Dict[str, _Pending] = {} self._session: Optional[Any] = None # playwright CDPSession self._handlers: Dict[str, Callable[[dict], None]] = {} self._attached: bool = False + self._on_completed = on_completed @property def default_dir(self) -> Path: @@ -204,9 +223,11 @@ def _on_will_begin(self, params: dict) -> None: return suggested = params.get("suggestedFilename") or "" sanitized = sanitize_filename(str(suggested)) + url = str(params.get("url") or "") self._pending[guid] = _Pending( sanitized_name=sanitized, target_dir=self._default_dir, + url=url, ) def _on_progress(self, params: dict) -> None: @@ -259,3 +280,30 @@ def _finalize_rename(self, guid: str, pending: _Pending) -> None: "(file left at its GUID path)", src, final, exc, ) + return + + # Surface the completion to any registered consumer (typically + # `DownloadManager.record_external_download` so the unified + # downloaded_files / wait_for_next_download surface works in + # CDP-borrowed mode too). Callback exceptions are swallowed. + if self._on_completed is not None: + try: + try: + file_size = final.stat().st_size + except OSError: + file_size = 0 + ext = final.suffix.lstrip(".").lower() or None + downloaded_file = DownloadedFile( + url=pending.url, + path=str(final), + file_name=final.name, + file_size=file_size, + file_type=ext, + suggested_filename=pending.sanitized_name, + ) + self._on_completed(downloaded_file) + except Exception as exc: + logger.warning( + "[CdpDownloadRenamer] on_completed callback failed: %s", + exc, + ) diff --git a/tests/integration/test_owned_pages.py b/tests/integration/test_owned_pages.py index 51e884b..1b87f47 100644 --- a/tests/integration/test_owned_pages.py +++ b/tests/integration/test_owned_pages.py @@ -431,38 +431,27 @@ async def test_close_owner_then_child_falls_back_to_focus_stack(cdp_browser): # ───────────────────────────────────────────────────────────────────────────── -# I10 — DownloadManager follows the popup (CDP borrowed only) +# I10 — popup adoption in CDP-borrowed mode (no DownloadManager migration) # ───────────────────────────────────────────────────────────────────────────── @pytest.mark.integration @pytest.mark.asyncio -async def test_download_manager_reattached_on_popup_follow(cdp_browser, tmp_path): - """When `_switch_self_page_to` moves `self._page` to a popup in - CDP-borrowed mode, the (page-scoped) DownloadManager must re-attach. - - We don't trigger an actual download (data: URLs cannot drive Chrome's - download path); instead we verify the attachment bookkeeping in the - DownloadManager moved with self._page.""" - # Wire up DownloadManager → bridgic's own page so the popup-follow - # re-attach logic in `_switch_self_page_to` has a state to migrate. - # - # Production code in CDP-borrowed mode intentionally does NOT attach - # DownloadManager at `_start()` (downloads in this mode go through - # CdpDownloadRenamer instead — see `_browser.py:_start()` comments). - # Since PR #28 a fresh `_download_manager` is always created, but - # since it's never wired to a page in this mode we attach manually - # here. The test verifies the *bookkeeping migration*, not the - # production decision to skip the initial attach. - from bridgic.browser.session._download import DownloadManager - if cdp_browser._download_manager is None: - cdp_browser._download_manager = DownloadManager(downloads_path=str(tmp_path)) - if str(id(cdp_browser._page)) not in cdp_browser._download_manager._page_handlers: - cdp_browser._download_manager.attach_to_page(cdp_browser._page) - +async def test_popup_follow_does_not_attach_download_manager(cdp_browser): + """In CDP-borrowed mode the DownloadManager is intentionally NOT attached + to any page — `_start()` doesn't attach (avoiding Playwright `download` + event duplicate-record bugs), and `_switch_self_page_to` does not + attach on popup follow either. CdpDownloadRenamer handles bridgic's + primary tab via `record_external_download`; popup-triggered + CDP-borrowed downloads are a known limitation (see + docs/KNOWN_LIMITATIONS.md). + + This test guards that invariant: after a popup is auto-followed, no + page in `_page_handlers` belongs to the DownloadManager.""" dm = cdp_browser._download_manager old_page = cdp_browser._page - # Attachment is tracked internally via `_page_handlers` keyed by str(id(page)). - assert str(id(old_page)) in dm._page_handlers + + # Initial state: DM has no page-scoped attachments in CDP-borrowed mode. + assert dm._page_handlers == {} await old_page.goto(LINK_TARGET_BLANK, wait_until="domcontentloaded") async with cdp_browser._context.expect_page() as info: @@ -476,9 +465,9 @@ async def test_download_manager_reattached_on_popup_follow(cdp_browser, tmp_path await asyncio.sleep(0.05) assert cdp_browser._page is popup - # New attachment: popup is now handled; old attachment was detached. - assert str(id(popup)) in dm._page_handlers - assert str(id(old_page)) not in dm._page_handlers + # After follow: still no attachments. The DM remains a pure record + # sink populated only by CdpDownloadRenamer.record_external_download. + assert dm._page_handlers == {} # ───────────────────────────────────────────────────────────────────────────── diff --git a/tests/unit/test_browser.py b/tests/unit/test_browser.py index c132ffe..05abde9 100644 --- a/tests/unit/test_browser.py +++ b/tests/unit/test_browser.py @@ -186,52 +186,109 @@ def test_get_config(self): class TestBrowserDownloadMethods: - """Tests for Browser.wait_for_next_download / get_downloaded_files_text - surface — in particular the explicit unsupported-in-CDP-borrowed-mode - behavior so callers can tell the modes apart instead of blocking until - timeout.""" + """Tests for Browser.wait_for_next_download / get_downloaded_files_text. + + Both APIs work uniformly across pipelines: non-CDP / CDP-owned downloads + arrive via Playwright's `download` event into DownloadManager; + CDP-borrowed downloads arrive via CdpDownloadRenamer → + DownloadManager.record_external_download. The Browser methods themselves + only see DownloadManager's unified state.""" @pytest.mark.asyncio - async def test_wait_for_next_download_in_cdp_borrowed_mode_returns_explanation( - self, - ): + async def test_get_downloaded_files_text_empty_session_message(self): + """Zero downloads → generic message regardless of mode.""" browser = Browser() - # Force the CDP-borrowed state without actually starting Playwright: - # _is_cdp_borrowed == bool(_cdp_resolved) and not _cdp_context_owned. - browser._cdp_resolved = "ws://localhost:9222/devtools/browser/test" - assert browser._is_cdp_borrowed is True + assert browser._is_cdp_borrowed is False - result = await browser.wait_for_next_download(timeout=30.0) + result = await browser.get_downloaded_files_text() - assert "not supported in CDP-borrowed mode" in result - # The completion queue must NOT have been awaited (no blocking). - assert browser.download_manager._completed_queue.qsize() == 0 + assert result == "No downloads in this session." @pytest.mark.asyncio - async def test_get_downloaded_files_text_in_cdp_borrowed_mode_returns_explanation( - self, - ): + async def test_wait_for_next_download_returns_summary_string(self): + """Happy path: when a DownloadedFile is already on the completion + queue, wait_for_next_download returns the formatted 'Download + complete: ...' summary string (filename, KB/MB size, path).""" + from bridgic.browser.session._download import DownloadedFile + browser = Browser() + + browser.download_manager._completed_queue.put_nowait( + DownloadedFile( + url="https://example.com/r.pdf", + path="/tmp/r.pdf", + file_name="r.pdf", + file_size=261_000, # → 254.9 KB + ) + ) + + result = await browser.wait_for_next_download(timeout=1.0) + + assert result.startswith("Download complete:") + assert "r.pdf" in result + assert "/tmp/r.pdf" in result + assert "KB" in result + + @pytest.mark.asyncio + async def test_wait_for_next_download_cdp_borrowed_via_record_external(self): + """CDP-borrowed mode now surfaces downloads via + DownloadManager.record_external_download. wait_for_next_download must + return the formatted summary just like in non-CDP modes — no + 'not supported' fallback.""" + from bridgic.browser.session._download import DownloadedFile browser = Browser() + # Force CDP-borrowed state without launching Playwright. browser._cdp_resolved = "ws://localhost:9222/devtools/browser/test" assert browser._is_cdp_borrowed is True - result = await browser.get_downloaded_files_text() + # Simulate what CdpDownloadRenamer would do after a successful rename. + browser.download_manager.record_external_download( + DownloadedFile( + url="https://example.com/cdp.pdf", + path="/tmp/cdp.pdf", + file_name="cdp.pdf", + file_size=2048, # 2 KB + ) + ) + + result = await browser.wait_for_next_download(timeout=1.0) - assert "not available in CDP-borrowed mode" in result - assert "CdpDownloadRenamer" in result + assert result.startswith("Download complete:") + assert "cdp.pdf" in result + assert "/tmp/cdp.pdf" in result @pytest.mark.asyncio - async def test_get_downloaded_files_text_empty_session_message(self): - """Non-CDP-borrowed mode with zero downloads still uses the generic - 'No downloads in this session.' message — the CDP-borrowed branch - must not short-circuit it.""" + async def test_get_downloaded_files_text_cdp_borrowed_via_record_external(self): + """In CDP-borrowed mode, downloads injected via + record_external_download are listed by get_downloaded_files_text the + same way as Playwright-tracked downloads in other modes.""" + from bridgic.browser.session._download import DownloadedFile browser = Browser() - # Default fresh Browser: _cdp_resolved is None → _is_cdp_borrowed False. - assert browser._is_cdp_borrowed is False + browser._cdp_resolved = "ws://localhost:9222/devtools/browser/test" + assert browser._is_cdp_borrowed is True + + browser.download_manager.record_external_download( + DownloadedFile( + url="https://example.com/a.pdf", + path="/tmp/a.pdf", + file_name="a.pdf", + file_size=1024, + ) + ) + browser.download_manager.record_external_download( + DownloadedFile( + url="https://example.com/b.zip", + path="/tmp/b.zip", + file_name="b.zip", + file_size=5120, + ) + ) result = await browser.get_downloaded_files_text() - assert result == "No downloads in this session." + assert "[1] a.pdf" in result + assert "[2] b.zip" in result + assert "/tmp/a.pdf" in result + assert "/tmp/b.zip" in result class TestBrowserLaunchOptions: @@ -2203,11 +2260,15 @@ async def test_cdp_new_page_called_unconditionally(self): @pytest.mark.asyncio async def test_download_manager_NOT_attached_in_borrowed_context(self, tmp_path): """In CDP borrowed-context mode the download manager must NOT attach - anywhere. L1's revoke restored Chrome's native download path, so - Playwright never receives `Browser.downloadProgress(completed)`. A - page-scoped attach would leak a hung `save_as()` task per download. - Chrome handles downloads natively (potentially via its 'Save As' - dialog); programmatic capture requires owned mode.""" + anywhere — neither to the borrowed context (would hijack user tabs) + nor page-scoped to bridgic's own tab. An empirically-verified + regression with page-scoped attach: Playwright STILL fires `download` + events in CDP-borrowed mode (`setDownloadBehavior(allowAndName)` on a + page session does not suppress them), so a registered handler runs + `save_as` and writes a 0-byte placeholder while the real file is + already produced by `CdpDownloadRenamer`. The renamer pipes real + completions back into DM via `record_external_download` instead — the + single, correct surface.""" mock_pw, _, mock_ctx, mock_pg = self._make_cdp_mocks() downloads_dir = tmp_path / "dl" downloads_dir.mkdir() diff --git a/tests/unit/test_cdp_download_renamer.py b/tests/unit/test_cdp_download_renamer.py index 5c834a2..1f0ea67 100644 --- a/tests/unit/test_cdp_download_renamer.py +++ b/tests/unit/test_cdp_download_renamer.py @@ -329,3 +329,96 @@ async def test_path_traversal_filename_is_neutralized( landed = [p for p in tmp_downloads.iterdir() if p.name != guid] assert len(landed) == 1 assert tmp_downloads in landed[0].parents or landed[0].parent == tmp_downloads + + +@pytest.mark.asyncio +class TestOnCompletedCallback: + """The on_completed callback wires CdpDownloadRenamer into + DownloadManager so CDP-borrowed downloads surface uniformly through + downloaded_files / wait_for_next_download in the rest of bridgic.""" + + async def test_callback_receives_populated_downloaded_file( + self, tmp_downloads: Path + ) -> None: + captured: list = [] + renamer = CdpDownloadRenamer( + default_dir=tmp_downloads, + on_completed=lambda df: captured.append(df), + ) + session = FakeCDPSession() + await renamer.attach(session) # type: ignore[arg-type] + + guid = "abc-123" + (tmp_downloads / guid).write_bytes(b"some-pdf-content-here") + session.fire( + "Browser.downloadWillBegin", + { + "guid": guid, + "suggestedFilename": "report.pdf", + "url": "https://example.com/report.pdf", + }, + ) + session.fire( + "Browser.downloadProgress", + {"guid": guid, "state": "completed"}, + ) + + assert len(captured) == 1 + df = captured[0] + assert df.file_name == "report.pdf" + assert df.path.endswith("report.pdf") + assert df.url == "https://example.com/report.pdf" + assert df.file_size == len(b"some-pdf-content-here") + assert df.file_type == "pdf" + assert df.suggested_filename == "report.pdf" + + async def test_no_callback_when_not_registered(self, tmp_downloads: Path) -> None: + """Renamer constructed without on_completed must still rename the + file — only the callback step is skipped.""" + renamer = CdpDownloadRenamer(default_dir=tmp_downloads) # no callback + session = FakeCDPSession() + await renamer.attach(session) # type: ignore[arg-type] + + guid = "no-cb" + (tmp_downloads / guid).write_bytes(b"x") + session.fire( + "Browser.downloadWillBegin", + {"guid": guid, "suggestedFilename": "x.bin"}, + ) + session.fire( + "Browser.downloadProgress", + {"guid": guid, "state": "completed"}, + ) + + # Rename still succeeded + assert (tmp_downloads / "x.bin").exists() + + async def test_callback_exception_does_not_break_renamer( + self, tmp_downloads: Path + ) -> None: + """A buggy on_completed callback must not corrupt the rename + pipeline — exception is swallowed and logged.""" + def bad_callback(_df): + raise RuntimeError("simulated downstream failure") + + renamer = CdpDownloadRenamer( + default_dir=tmp_downloads, on_completed=bad_callback + ) + session = FakeCDPSession() + await renamer.attach(session) # type: ignore[arg-type] + + guid = "boom" + (tmp_downloads / guid).write_bytes(b"payload") + session.fire( + "Browser.downloadWillBegin", + {"guid": guid, "suggestedFilename": "ok.bin"}, + ) + # Must not raise. + session.fire( + "Browser.downloadProgress", + {"guid": guid, "state": "completed"}, + ) + + # Rename completed despite callback exception. + assert (tmp_downloads / "ok.bin").read_bytes() == b"payload" + assert not (tmp_downloads / guid).exists() From 9c2936d7b6b0d972c873aa111ad7737f9433599b Mon Sep 17 00:00:00 2001 From: NiceCode666 Date: Tue, 19 May 2026 16:03:20 +0800 Subject: [PATCH 7/7] docs: align download docs with unified surface + popup limitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documentation pass to make all download-related references consistent with the post-PR-#31 reality: - **Unified surface across modes**: `wait_for_next_download` / `get_downloaded_files_text` / `downloaded_files` work the same in non-CDP, CDP-owned, and CDP-borrowed modes (CDP-borrowed entries flow in via `CdpDownloadRenamer` → `DownloadManager.record_external_download`). Removed the older "wait_for_next_download is unsupported in CDP-borrowed mode" wording from `README.md`, `README_zh.md`, `docs/API.md`, `CLAUDE.md`, and `skills/bridgic-browser/references/ cli-sdk-api-mapping.md`. The action-bound `wait_for_download(page, action, timeout)` variant remains unsupported in CDP-borrowed mode (still depends on Playwright's `download` event firing on bridgic's tab) and is documented as such. - **PR #28 corrections**: `CLAUDE.md` table row "If `downloads_path` is unset … files are lost" no longer matches the implementation (PR #28 defaults DM to `~/Downloads`); rewritten to reflect the auto-default. Tool/command counts bumped 67 → 69 across CLAUDE.md's package-tree section. - **`docs/INTERNALS.md` `_switch_self_page_to` section**: rewrote the legacy "no-op safety net" description. The DM migration is gone (see the previous commit) — INTERNALS now states the actual invariant: in CDP-borrowed mode DM is intentionally not attached anywhere, popup follow doesn't touch attach/detach, and popup-triggered downloads are a known limitation rather than a wiring bug. - **`docs/API.md` reorganization**: section title "DownloadManager (non-CDP / CDP-owned modes)" → "DownloadManager (unified across all modes)" with an intro listing how each pipeline populates the same surface. `download_manager` and `downloaded_files` row descriptions updated. - **`docs/KNOWN_LIMITATIONS.md` new entry**: "Popup-triggered Downloads in CDP-borrowed Mode Are Not Captured" with symptom, root cause (page-level `setDownloadBehavior` only governs the originating target — popup gets its own session), three workarounds, and a verification pointer to the new `test_popup_follow_does_not_attach_ download_manager` integration test. - **`_download.py` class docstring** (this commit only touches docs inside it, not code) documents the two coexisting wait APIs so future readers don't mistake them for aliases. No code changes in this commit. --- CLAUDE.md | 5 ++- README.md | 13 ++++++- README_zh.md | 12 +++++- docs/API.md | 20 ++++++---- docs/INTERNALS.md | 4 +- docs/KNOWN_LIMITATIONS.md | 38 +++++++++++++++++++ .../references/cli-sdk-api-mapping.md | 2 +- 7 files changed, 79 insertions(+), 15 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 1900b92..5f92d1d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -108,7 +108,7 @@ bridgic has two independent download pipelines, picked by mode: | Mode | Pipeline | Notes | |---|---|---| -| non-CDP (launch / persistent_context) | Playwright's per-context `setDownloadBehavior(allowAndName, downloadPath=)` → `download` events fire → `DownloadManager.save_as()` copies to `downloads_path` with the real filename. | Files land at the real filename in `downloads_path`. If `downloads_path` is unset, DownloadManager is not attached and files are lost when Playwright deletes `artifactsDir` on close. | +| non-CDP (launch / persistent_context) | Playwright's per-context `setDownloadBehavior(allowAndName, downloadPath=)` → `download` events fire → `DownloadManager.save_as()` copies to `downloads_path` with the real filename. | Files land at the real filename in `downloads_path`. If `downloads_path` is unset, `Browser.__init__` defaults the manager to `~/Downloads` (PR #28), so downloads are still captured — no more silently-lost files. | | CDP-owned (bridgic creates its own context on the remote Chrome) | Same as non-CDP: Playwright's per-context `allowAndName` routes through `artifactsDir`, DownloadManager copies. | Per-context override targets bridgic's own context, doesn't touch the user. | | **CDP-borrowed** (`Browser(cdp=...)` against a user's running Chrome) | bridgic's own override on bridgic's tab: `Browser.setDownloadBehavior(allowAndName, downloadPath=, eventsEnabled=true)` sent **via the page CDP session** (`BrowserContext.new_cdp_session(self._page)`). `CdpDownloadRenamer` subscribes to `Browser.downloadWillBegin/downloadProgress` on the same session and renames `/` → `/` on completion. | Page-session routing is the *only* form Chrome 138+ honors when the user has "Ask where to save each file" enabled — `Browser.setDownloadBehavior` over a browser-level session and `Page.setDownloadBehavior(allow, ...)` both still pop the dialog. See [empirically-tried alternatives](#empirically-tried-alternatives-for-cdp-borrowed-downloads) below. | @@ -159,7 +159,8 @@ agent-browser's `Some(session_id)` argument is the same trick — page-level CDP #### Caveats - **bridgic's tab gets the override; user's tabs keep their normal Chrome UX** (intentional — the page-session scope is bridgic's tab only). User-initiated downloads in their other tabs still go to their Chrome's configured directory and obey their "Ask where to save" pref. This is by design and matches the "I gave you full control of *my agent's* tab via `--cdp`" semantics — user's private workspace is untouched. -- **DownloadManager is not attached in CDP-borrowed mode.** Chrome writes directly to the final path; Playwright's per-context `download` event doesn't fire when the file is routed away from `artifactsDir`. `wait_for_download()` is correspondingly **unsupported in CDP-borrowed mode** — use CDP-owned or non-CDP for that. +- **DownloadManager in CDP-borrowed mode is NOT attached to any page or context.** Attaching to the borrowed context would hijack user tabs (privacy boundary); attaching page-scoped to bridgic's own tab was empirically verified to cause duplicate-record bugs — Playwright STILL fires `download` events in CDP-borrowed mode (`setDownloadBehavior(allowAndName)` on a page session does not suppress them), and `_handle_download.save_as` writes a 0-byte placeholder while the real file is already produced by `CdpDownloadRenamer`. Instead, `CdpDownloadRenamer.on_completed → DownloadManager.record_external_download` pipes real completions back into DM, populating `downloaded_files` / `_completed_queue` / `_pending_waiters`. `downloaded_files` / `wait_for_next_download` / `get_downloaded_files_text` therefore work for **downloads triggered on bridgic's primary tab** across all modes. The action-bound `wait_for_download()` variant (which still needs Playwright's `download` event to fire on bridgic's tab) **remains unsupported in CDP-borrowed mode** — use `wait_for_next_download()` instead. +- **Popup-triggered downloads in CDP-borrowed mode are not captured by the renamer.** `setDownloadBehavior(allowAndName, ...)` was sent on a single page-level CDP session bound to bridgic's original `self._page`; that override only scopes the originating target. Downloads triggered from an auto-followed popup fall back to Chrome's native UX (the user's "Ask where to save each file" preference governs) and `wait_for_next_download` will time out. See [docs/KNOWN_LIMITATIONS.md](docs/KNOWN_LIMITATIONS.md). - **The renamer is best-effort.** If a CDP event is missed or the OS rename fails (cross-FS, permission, etc.) the file stays at its GUID path with a warning logged. It never deletes content. - **`last_close_artifacts()`** exposes a `rescued_downloads` list when L2 actually moved anything. - **"Show in Folder"** in Chrome's download bubble is broken whenever `setDownloadBehavior(allowAndName, eventsEnabled=true)` is active. This is a Chromium bug (`#324282051`) affecting all CDP-using tools. See [docs/KNOWN_LIMITATIONS.md](docs/KNOWN_LIMITATIONS.md). diff --git a/README.md b/README.md index 787204b..3f39098 100644 --- a/README.md +++ b/README.md @@ -741,8 +741,17 @@ for f in browser.download_manager.downloaded_files: print(f"Downloaded: {f.file_name} ({f.file_size} bytes)") # CDP-borrowed (CdpDownloadRenamer pipeline; downloads land at downloads_path -# with real filenames; download_manager is None — wait_for_download / -# wait_for_next_download are unsupported here). +# with real filenames). CdpDownloadRenamer pipes completions back into +# DownloadManager via record_external_download, so wait_for_next_download / +# get_downloaded_files_text / downloaded_files work the same way as above +# for downloads triggered on bridgic's primary tab. wait_for_download (the +# action-bound, Playwright-event variant) remains unsupported in +# CDP-borrowed mode — use wait_for_next_download. +# +# Edge case: downloads triggered from an auto-followed popup are NOT +# routed through the renamer (setDownloadBehavior was bound to bridgic's +# original page session, not the popup), so wait_for_next_download will +# time out on those. See docs/KNOWN_LIMITATIONS.md. browser = Browser(cdp="auto", downloads_path="./downloads") ``` diff --git a/README_zh.md b/README_zh.md index 5d27f08..5fa6b99 100644 --- a/README_zh.md +++ b/README_zh.md @@ -734,8 +734,16 @@ print(await browser.get_downloaded_files_text()) for f in browser.download_manager.downloaded_files: print(f"已下载:{f.file_name}({f.file_size} 字节)") -# CDP-borrowed(CdpDownloadRenamer 流水线;文件以真名落到 downloads_path, -# download_manager 为 None —— wait_for_download / wait_for_next_download 在此模式不支持) +# CDP-borrowed(CdpDownloadRenamer 流水线;文件以真名落到 downloads_path) +# CdpDownloadRenamer 通过 record_external_download 把完成事件回灌到 +# DownloadManager,所以 wait_for_next_download / get_downloaded_files_text / +# downloaded_files 在 bridgic 主 tab 触发的下载场景下与非 CDP 模式一致。 +# wait_for_download(基于触发动作的 Playwright 事件版本)在此模式仍不支持 —— +# 请改用 wait_for_next_download。 +# +# 边界:从 auto-follow popup 上触发的下载不走 renamer(setDownloadBehavior +# 只绑定在 bridgic 原 page 的 CDP session),所以 wait_for_next_download 会 +# 超时。详见 docs/KNOWN_LIMITATIONS.md。 browser = Browser(cdp="auto", downloads_path="./downloads") ``` diff --git a/docs/API.md b/docs/API.md index c7bc25b..2286980 100644 --- a/docs/API.md +++ b/docs/API.md @@ -18,10 +18,10 @@ Short reference for the main session and download APIs. For tool lists and selec | `await browser.get_current_page_title()` | Get current page title string, or `None` if no page is open. | | `browser.get_current_page_url()` | Get current page URL string, or `None` if no page is open. (sync) | | `browser.get_config()` | Return dict of all current browser configuration options. | -| `await browser.get_downloaded_files_text()` | Numbered list of all files downloaded in this session, or `"No downloads in this session."` if none. Each line: `[N] filename — size — /path/to/file`. | -| `await browser.wait_for_next_download(timeout=30.0)` | Block until the next download completes and return a one-line summary, or a timeout message. **Call immediately after the action that triggers the download.** Timeout unit is seconds. Not supported in CDP-borrowed mode (the manager isn't attached there). | -| `browser.download_manager` | Always-created `DownloadManager` instance. Defaults to `~/Downloads` when `downloads_path` is not configured. In **CDP-borrowed** mode the manager is intentionally not attached to the borrowed context — `CdpDownloadRenamer` handles downloads instead. See [CdpDownloadRenamer](#cdp-borrowed-downloads-cdpdownloadrenamer). | -| `browser.downloaded_files` | Shortcut for `browser.download_manager.downloaded_files`. Returns `[]` in CDP-borrowed mode because the manager isn't attached there. | +| `await browser.get_downloaded_files_text()` | Numbered list of all files downloaded in this session (across all modes), or `"No downloads in this session."` if none. Each line: `[N] filename — size — /path/to/file`. | +| `await browser.wait_for_next_download(timeout=30.0)` | Block until the next download completes and return a one-line summary, or a timeout message. Timeout unit is seconds. Works for downloads triggered on bridgic's primary tab across all modes — in CDP-borrowed mode the completion is piped in by `CdpDownloadRenamer` after the rename. **Caveat**: in CDP-borrowed mode, downloads triggered from an auto-followed popup are NOT routed through the renamer and will time out here — see [docs/KNOWN_LIMITATIONS.md](KNOWN_LIMITATIONS.md). | +| `browser.download_manager` | Always-created `DownloadManager` instance. Defaults to `~/Downloads` when `downloads_path` is not configured. In **CDP-borrowed** mode the manager is NOT attached to any page or context — attaching would either hijack user tabs or cause duplicate-record bugs (Playwright still fires `download` events). Completions from `CdpDownloadRenamer` are pushed in via `record_external_download` so the tracking surface is unified. | +| `browser.downloaded_files` | Shortcut for `browser.download_manager.downloaded_files`. Populated across all modes for primary-tab downloads (CDP-borrowed entries arrive via `record_external_download`); popup-triggered CDP-borrowed downloads are not included — see [docs/KNOWN_LIMITATIONS.md](KNOWN_LIMITATIONS.md). | | `browser.headless` | `bool` — whether the browser runs in headless mode. | | `browser.viewport` | `dict` or `None` — current viewport size configuration. | | `browser.channel` | `str` or `None` — browser distribution channel. | @@ -43,17 +43,23 @@ bridgic has two download pipelines, picked by mode. The download path resolution | CDP-owned (rare — `Browser(cdp=...)` against a remote Chrome that has no contexts yet) | Same as non-CDP | Same as non-CDP. | | CDP-borrowed (`Browser(cdp=...)` against a user's running Chrome) | `CdpDownloadRenamer` (rename GUID → real name post-completion) via a page-level CDP session | `~/Downloads`. In daemon mode the CLI client's CWD (`os.getcwd()`) takes priority over the `~/Downloads` fallback — see [CLAUDE.md → Downloads](../CLAUDE.md#downloads). | -### DownloadManager (non-CDP / CDP-owned modes) +### DownloadManager (unified across all modes) `Browser` always creates a `DownloadManager`. It saves files to the configured `downloads_path`, or `~/Downloads` by default. Access it via `browser.download_manager` after the browser has started. +The pipeline that *populates* it differs by mode: + +- **Non-CDP / CDP-owned**: Playwright's `download` event → `DownloadManager._handle_download` → `download.save_as()` copies the file to `downloads_path` with its real name. +- **CDP-borrowed (bridgic's primary tab)**: `CdpDownloadRenamer` renames `` → real filename on the page CDP session, then calls back into `DownloadManager.record_external_download` to populate the same surfaces (`downloaded_files`, `_completed_queue`, `_pending_waiters`). +- **CDP-borrowed (popup-triggered)**: Not captured — see [KNOWN_LIMITATIONS.md](KNOWN_LIMITATIONS.md). + | Method / property | Description | |------------------|-------------| -| `browser.download_manager` | The `DownloadManager` instance. Always non-`None` after `__init__`. In CDP-borrowed mode it is created but not attached to the context; downloads in that mode go through `CdpDownloadRenamer` instead. | +| `browser.download_manager` | The `DownloadManager` instance. Always non-`None` after `__init__`. In CDP-borrowed mode it is NOT attached anywhere (attach to context would hijack user tabs; attach to bridgic's page would cause duplicate-record bugs). CDP-borrowed completions flow in via `record_external_download` from `CdpDownloadRenamer`, so this instance is the single tracking surface across all modes. | | `browser.download_manager.downloaded_files` | List of `DownloadedFile` (`.url`, `.path`, `.file_name`, `.file_size`). | | `await browser.download_manager.wait_for_next_download(timeout=30.0)` | Wait up to *timeout* seconds for the next download to complete. Returns `DownloadedFile` or `None` on timeout. | -`browser.wait_for_download(...)` is **not supported in CDP-borrowed mode** — Playwright's per-context `download` event does not fire when the file is routed away from `artifactsDir` by the page-session override. +`browser.wait_for_download(page, action, timeout)` (the action-bound, Playwright-event variant) is **not supported in CDP-borrowed mode** — Playwright's per-context `download` event does not fire when the file is routed away from `artifactsDir` by the page-session override. Use `wait_for_next_download(timeout)` instead, which works uniformly across all modes (CDP-borrowed completions arrive via `CdpDownloadRenamer` → `DownloadManager.record_external_download`). ### CDP-borrowed downloads (CdpDownloadRenamer) diff --git a/docs/INTERNALS.md b/docs/INTERNALS.md index ac3a6cc..18e3fbe 100644 --- a/docs/INTERNALS.md +++ b/docs/INTERNALS.md @@ -567,7 +567,9 @@ When `self._page` is closed, `_select_fallback_page` returns the first match fro - `_mark_owned(page)` is idempotent: it adds to set + stack, then registers a `page.on("close", self._on_owned_page_close)` callback for automatic pruning. - `_on_owned_page_close` only prunes bookkeeping; it deliberately does NOT touch `self._page` — the `_close_page` flow handles that, and double-handling would cause double video/download swaps. -- `_switch_self_page_to(new_page)` consolidates "move self._page" side effects: focus stack push, `_invalidate_page_state()`, `_switch_video_to_page()`. A legacy `DownloadManager.detach_from_page(old) + attach_to_page(new)` swap is kept as a no-op safety net — DownloadManager is not the active pipeline in CDP-borrowed mode; `CdpDownloadRenamer` is, and it stays bound to the original `self._page`'s CDP session. Downloads triggered from a followed popup therefore are **not** subject to the renamer; they fall back to Playwright's per-context override (artifactsDir) and rely on the L2 rescue net on close. See [CLAUDE.md → Downloads](../CLAUDE.md#downloads). +- `_switch_self_page_to(new_page)` consolidates "move self._page" side effects: focus stack push, `_invalidate_page_state()`, `_switch_video_to_page()`. It does NOT touch `DownloadManager.attach_to_page` / `detach_from_page` — in CDP-borrowed mode the DM is intentionally not attached anywhere (page-scoped attach was tried and caused duplicate-record bugs: Playwright still fires `download` events even when `allowAndName` routes the file away from `artifactsDir`, so `_handle_download.save_as` writes a 0-byte placeholder while the real file is already produced by `CdpDownloadRenamer`); in non-CDP modes the global `attach_to_context` handles all pages. + + **Popup edge case in CDP-borrowed mode**: `CdpDownloadRenamer` stays bound to the original `self._page`'s CDP session — `setDownloadBehavior(allowAndName, ...)` was sent on that single page session and only governs *that target*. Downloads triggered from a followed popup are NOT routed through the renamer; they fall back to Chrome's native "Save As" UX (the user's preference governs) and `wait_for_next_download` will time out. The L2 rescue net captures these on close if they ended up in Playwright's artifactsDir. See [CLAUDE.md → Downloads](../CLAUDE.md#downloads) and [docs/KNOWN_LIMITATIONS.md](KNOWN_LIMITATIONS.md). ### Adoption truth table (CDP borrowed mode) diff --git a/docs/KNOWN_LIMITATIONS.md b/docs/KNOWN_LIMITATIONS.md index 556b993..d086bee 100644 --- a/docs/KNOWN_LIMITATIONS.md +++ b/docs/KNOWN_LIMITATIONS.md @@ -73,6 +73,44 @@ triggered by the CDP `setDownloadBehavior` command, not a bridgic-browser issue. --- +## Popup-triggered Downloads in CDP-borrowed Mode Are Not Captured + +### Symptom + +In **CDP-borrowed** mode (`Browser(cdp=...)` against a user's running Chrome), if bridgic opens a popup (e.g. a `` click that auto-follows, or a `window.open()` flow) and the download is triggered **from that popup**, then: + +- `await browser.wait_for_next_download(timeout=N)` times out and returns the "No download completed within Ns timeout." message. +- `await browser.get_downloaded_files_text()` does not list the file. +- `browser.downloaded_files` does not contain the file. +- Chrome may show its native "Save As" dialog (depending on the user's "Ask where to save each file" preference) or save silently to Chrome's configured Downloads directory. + +Downloads triggered from **bridgic's primary tab** (`self._page`) work normally and surface through all the APIs above. + +### Root Cause + +In CDP-borrowed mode bridgic takes over downloads by sending +`Browser.setDownloadBehavior(behavior="allowAndName", downloadPath=..., eventsEnabled=true)` over a **page-level CDP session** attached to `self._page`. This is the only CDP form that bypasses Chrome's "Ask where to save each file" user preference (Chrome 138+); a browser-level session still pops the dialog. + +The trade-off: a page-level `setDownloadBehavior` scopes to **that target only**. When bridgic auto-follows a popup, `self._page` moves to the popup, but the CDP session and `CdpDownloadRenamer` stay bound to the original page session — they cannot observe download events fired on the popup target. + +### Workarounds + +1. **Avoid auto-follow**: trigger the download from a regular link in bridgic's primary tab, not a `target="_blank"` link. +2. **Pre-arm the popup**: if you control the page, change `target="_blank"` to a same-page link, or download via `fetch()` + `URL.createObjectURL(...)` from bridgic's primary tab. +3. **Switch modes**: use **CDP-owned** (`Browser(cdp=...)` against a remote Chrome with no contexts yet) or non-CDP mode — both attach `DownloadManager` to the whole context, so any tab's downloads are captured. + +### Verification + +`tests/integration/test_owned_pages.py::test_popup_follow_does_not_attach_download_manager` guards the invariant that DM stays unattached after popup follow in CDP-borrowed mode — confirming the limitation is by design, not an accidental wiring issue. (Earlier versions of the code attempted a page-scoped attach + migration; that approach was reverted after empirical testing showed it produced 0-byte placeholder files via Playwright `download` events that still fire in CDP-borrowed mode.) + +### References + +- [bridgic CLAUDE.md → Downloads](../CLAUDE.md#downloads) — full design including empirically-tried alternatives for CDP-borrowed downloads. +- `bridgic/browser/session/_browser.py::_start()` — CDP-borrowed L1 override is sent on `await self._context.new_cdp_session(self._page)`, the page-scoped session. +- `bridgic/browser/session/_cdp_download_renamer.py` — the renamer attaches to that single page session. + +--- + ## Stealth: TLS Fingerprint Cannot Be Matched in Headless Mode ### Symptom diff --git a/skills/bridgic-browser/references/cli-sdk-api-mapping.md b/skills/bridgic-browser/references/cli-sdk-api-mapping.md index a6afe79..ac7d7f8 100644 --- a/skills/bridgic-browser/references/cli-sdk-api-mapping.md +++ b/skills/bridgic-browser/references/cli-sdk-api-mapping.md @@ -140,7 +140,7 @@ This model is the foundation of all correspondence in this guide. - SDK-only params: `display_header_footer`, `print_background`, `scale`, `paper_width`, `paper_height`, `margin_top`, `margin_bottom`, `margin_left`, `margin_right`, `landscape` - `video-stop path.webm` -> `stop_video(filename="path.webm")` - `downloads` -> `get_downloaded_files_text()` — returns a numbered human-readable list of all files downloaded in the session, or `"No downloads in this session."` if none -- `wait-download [SECONDS]` -> `wait_for_next_download(timeout=30.0)` — **unit is SECONDS** (default 30); blocks until the next download completes and returns a one-line summary (`"Download complete: filename — size — path"`), or a timeout message if no download arrives within the limit. Call immediately after the action that triggers the download. +- `wait-download [SECONDS]` -> `wait_for_next_download(timeout=30.0)` — **unit is SECONDS** (default 30); blocks until the next download completes and returns a one-line summary (`"Download complete: filename — size — path"`), or a timeout message if no download arrives within the limit. Call immediately after the action that triggers the download. **Works across all modes** (non-CDP / CDP-owned / CDP-borrowed); CDP-borrowed completions arrive via `CdpDownloadRenamer.on_completed → record_external_download`. **Caveat**: in CDP-borrowed mode, downloads triggered from an auto-followed popup are NOT captured (the renamer is bound to bridgic's primary page session only) — `wait-download` will time out on those. See `docs/KNOWN_LIMITATIONS.md`. - `type TEXT [--submit]` -> `type_text(text, submit=False)` — **requires a focused element**; call `focus_element_by_ref` or `click_element_by_ref` on the target before `type` - `eval-on REF CODE` -> `evaluate_javascript_on_ref(ref, code)` — **CODE must be an arrow or named function** that accepts the element: - `"(el) => el.textContent"` ✓