diff --git a/examples/cloud_browser.py b/examples/cloud_browser.py index f641ea6..f0e7ed3 100644 --- a/examples/cloud_browser.py +++ b/examples/cloud_browser.py @@ -13,15 +13,15 @@ async def main() -> None: session_timeout=3600, # Optional: session timeout in seconds ) - # Run a task in this browser window. - response = await window.agent( - prompt=( - 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' - "page, then tell me who the authors are." - ) + # Run a task in this browser window. + response = await window.agent( + prompt=( + 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' + "page, then tell me who the authors are." ) + ) - print("Response:", response.model_dump_json(indent=2)) + print("Response:", response.model_dump_json(indent=2)) # The cloud session is still running after exiting the context manager. # You can save the session ID for later reconnection or management. @@ -50,6 +50,10 @@ async def main() -> None: ) await remote_window.close() # This will stop the cloud session. + # Get files downloaded during the session + downloaded_files = await window.get_downloaded_files() + print(f"Downloaded files {downloaded_files}") + ############################################################################ # IMPORTANT: The cloud browser continues accruing costs until the session # # is stopped or times out. To avoid unexpected costs, make sure to stop # diff --git a/packages/narada/src/narada/client.py b/packages/narada/src/narada/client.py index 695d27c..b25c252 100644 --- a/packages/narada/src/narada/client.py +++ b/packages/narada/src/narada/client.py @@ -261,14 +261,6 @@ async def _initialize_cloud_browser_window( logging.info("Waiting for Narada extension to be installed...") await asyncio.sleep(1) - # TODO: consider this - # Get side panel page - # side_panel_url = create_side_panel_url(config, browser_window_id) - # side_panel_page = next( - # (p for p in context.pages if p.url == side_panel_url), None - # ) - # await self._fix_download_behavior(side_panel_page) - cloud_window = CloudBrowserWindow( browser_window_id=browser_window_id, session_id=session_id, diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py index 4ea9050..a02576a 100644 --- a/packages/narada/src/narada/window.py +++ b/packages/narada/src/narada/window.py @@ -3,6 +3,7 @@ import os import time from abc import ABC +from dataclasses import dataclass from http import HTTPStatus from pathlib import Path from typing import IO, Any, TypeVar, overload, override @@ -69,6 +70,15 @@ class _PresignedPost(BaseModel): fields: dict[str, Any] +@dataclass +class SessionDownloadItem: + """A file downloaded during a cloud browser session (file name, size, presigned GET URL).""" + + file_name: str + size: int + download_url: str + + class BaseBrowserWindow(ABC): _auth_headers: dict[str, str] _base_url: str @@ -639,6 +649,18 @@ async def close(self, *, timeout: int | None = None) -> None: timeout=timeout, ) + async def get_downloaded_files(self) -> list[SessionDownloadItem]: + """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" + if self._cloud_browser_session_id is None: + raise ValueError( + "Cloud browser session ID is required to get downloaded files" + ) + return await _get_cloud_browser_downloads( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self._cloud_browser_session_id, + ) + def __str__(self) -> str: return f"RemoteBrowserWindow(browser_window_id={self.browser_window_id})" @@ -687,6 +709,14 @@ async def close(self, *, timeout: int | None = None) -> None: timeout=timeout, ) + async def get_downloaded_files(self) -> list[SessionDownloadItem]: + """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" + return await _get_cloud_browser_downloads( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self._session_id, + ) + def __str__(self) -> str: return ( "CloudBrowserWindow(" @@ -696,6 +726,70 @@ def __str__(self) -> str: ) +async def _fetch_presigned_download_url( + http_session: aiohttp.ClientSession, + *, + base_url: str, + auth_headers: dict[str, str], + session_id: str, + key: str, + timeout: aiohttp.ClientTimeout, +) -> str: + async with http_session.get( + f"{base_url}/cloud-browser/replay/download-url", + params={"session_id": session_id, "key": key}, + headers=auth_headers, + timeout=timeout, + ) as resp: + resp.raise_for_status() + data = await resp.json() + return data["presigned_url"] + + +async def _get_cloud_browser_downloads( + *, + base_url: str, + auth_headers: dict[str, str], + session_id: str, +) -> list[SessionDownloadItem]: + """GET cloud-browser session downloads and return list of SessionDownloadItem with presigned URLs.""" + timeout = aiohttp.ClientTimeout(total=60) + async with aiohttp.ClientSession() as http_session: + async with http_session.get( + f"{base_url}/cloud-browser/replay/downloads", + params={"session_id": session_id}, + headers=auth_headers, + timeout=timeout, + ) as resp: + resp.raise_for_status() + data = await resp.json() + files = data.get("downloaded_files") or [] + if not files: + return [] + + presigned_urls = await asyncio.gather( + *[ + _fetch_presigned_download_url( + http_session, + base_url=base_url, + auth_headers=auth_headers, + session_id=session_id, + key=f["key"], + timeout=timeout, + ) + for f in files + ] + ) + return [ + SessionDownloadItem( + file_name=item["file_name"], + size=item["size"], + download_url=presigned_urls[i], + ) + for i, item in enumerate(files) + ] + + async def _stop_cloud_browser_session( *, base_url: str, @@ -709,7 +803,7 @@ async def _stop_cloud_browser_session( f"{base_url}/cloud-browser/stop-cloud-browser-session", headers=auth_headers, json={"session_id": session_id}, - timeout=aiohttp.ClientTimeout(total=timeout or 10), + timeout=aiohttp.ClientTimeout(total=timeout or 40), ) as resp: if resp.ok: response_data = await resp.json()