diff --git a/examples/agentic_mouse_action.py b/examples/agentic_mouse_action.py index add01d2..50cf89c 100644 --- a/examples/agentic_mouse_action.py +++ b/examples/agentic_mouse_action.py @@ -1,14 +1,15 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) - await window.go_to_url(url="https://www.google.com") - await window.agentic_mouse_action( + try: + await agent.go_to_url(url="https://www.google.com") + await agent.agentic_mouse_action( action={"type": "click"}, recorded_click={ "x": 500, @@ -21,7 +22,7 @@ async def main() -> None: fallback_operator_query="click on the search box", ) - await window.agentic_mouse_action( + await agent.agentic_mouse_action( action={"type": "fill", "text": "Narada AI", "press_enter": False}, recorded_click={ "x": 500, @@ -34,7 +35,7 @@ async def main() -> None: fallback_operator_query='type "Narada AI" in the search box', ) - await window.agentic_mouse_action( + await agent.agentic_mouse_action( action={"type": "scroll", "horizontal": 0, "vertical": 500}, recorded_click={ "x": 640, @@ -46,6 +47,8 @@ async def main() -> None: }, fallback_operator_query="scroll down the page", ) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/agentic_selector.py b/examples/agentic_selector.py index 4d2a32b..3b84a03 100644 --- a/examples/agentic_selector.py +++ b/examples/agentic_selector.py @@ -1,17 +1,16 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) - await window.go_to_url(url="https://www.google.com") + try: + await agent.go_to_url(url="https://www.google.com") - await window.agentic_selector( + await agent.agentic_selector( action={"type": "fill", "value": "Narada AI"}, selectors={ "tag_name": "textarea", @@ -20,13 +19,15 @@ async def main() -> None: fallback_operator_query='type "Narada AI" in the search box', ) - await window.agentic_selector( + await agent.agentic_selector( action={"type": "click"}, selectors={ "xpath": "/html/body/div[2]/div[4]/form/div[1]/div[1]/div[2]/div[4]/div[6]/center/input[1]", }, fallback_operator_query="click on the Google Search button", ) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/agentic_selector_get_property.py b/examples/agentic_selector_get_property.py index d59f296..b284db0 100644 --- a/examples/agentic_selector_get_property.py +++ b/examples/agentic_selector_get_property.py @@ -1,29 +1,32 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) - await window.go_to_url(url="https://app.narada.ai", timeout=60) - property_response = await window.agentic_selector( + try: + await agent.go_to_url(url="https://app.narada.ai", timeout=60) + property_response = await agent.agentic_selector( action={"type": "get_property", "property_name": "className"}, selectors={"data_testid": "create-new-agent-button"}, fallback_operator_query="get className from create button", timeout=60, ) - print(f"Class Name: {property_response['value']}") + print(f"Class Name: {property_response.value}") print("\nTest 2: Getting text content...") - text_response = await window.agentic_selector( + text_response = await agent.agentic_selector( action={"type": "get_text"}, selectors={"data_testid": "create-new-agent-button"}, fallback_operator_query="get text from create button", timeout=60, ) - print(f"Text: {text_response['value']}") + print(f"Text: {text_response.value}") + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/attachment.py b/examples/attachment.py index ddaf929..e3a7a98 100644 --- a/examples/attachment.py +++ b/examples/attachment.py @@ -1,26 +1,26 @@ import asyncio from pathlib import Path -from narada import Agent, Narada +from narada import Agent, AgentKind, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env, kind=AgentKind.CORE_AGENT) - # Upload a file to be later used as an attachment. + try: + # Pass a file-like object as an attachment. The SDK uploads it automatically + # before dispatching the request. current_dir = Path(__file__).parent - with open(current_dir / "demo_attachment_file.txt") as f: - file = await window.upload_file(file=f) - - # Ask the agent to use the attachment. - response = await window.agent( - prompt="Summarize the attached file.", - agent=Agent.CORE_AGENT, - attachment=file, - ) + with open(current_dir / "demo_attachment_file.txt", "rb") as f: + response = await agent.run( + prompt="Summarize the attached file.", + attachment=f, + ) print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/choose_agent.py b/examples/choose_agent.py index 28ccea6..b063a10 100644 --- a/examples/choose_agent.py +++ b/examples/choose_agent.py @@ -1,18 +1,19 @@ import asyncio -from narada import Agent, Narada +from narada import Agent, AgentKind, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env, kind=AgentKind.CORE_AGENT) + try: # Choose a specific agent to handle the task. By default, the Operator agent is used. - response = await window.agent(prompt="Tell me a joke.", agent=Agent.CORE_AGENT) + response = await agent.run(prompt="Tell me a joke.") print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/cloud_browser.py b/examples/cloud_browser.py index f0e7ed3..5571444 100644 --- a/examples/cloud_browser.py +++ b/examples/cloud_browser.py @@ -1,58 +1,59 @@ import asyncio -from narada import Narada -from narada.window import RemoteBrowserWindow +from narada import Agent, CloudBrowserEnvironment, RemoteBrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a cloud browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_cloud_browser_window( - session_name="my-cloud-browser-session", # Optional: label the session - session_timeout=3600, # Optional: session timeout in seconds - ) + # Create a cloud browser environment. It initializes lazily on the first action. + env = CloudBrowserEnvironment( + session_name="my-cloud-browser-session", # Optional: label the session + session_timeout=3600, # Optional: session timeout in seconds + ) + agent = Agent(environment=env) - # Run a task in this browser window. - response = await window.agent( - prompt=( - 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' - "page, then tell me who the authors are." + cloud_browser_session_id = None + browser_window_id = None + + try: + # Run a task in this cloud browser. + response = await agent.run( + prompt=( + 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' + "page, then tell me who the authors are." + ) ) - ) - print("Response:", response.model_dump_json(indent=2)) + print("Response:", response.model_dump_json(indent=2)) - # The cloud session is still running after exiting the context manager. - # You can save the session ID for later reconnection or management. - cloud_browser_session_id = window.cloud_browser_session_id - browser_window_id = window.browser_window_id + # The cloud session keeps running until explicitly stopped or it times out. + # Save these IDs for later reconnection or management. + cloud_browser_session_id = env.cloud_browser_session_id + browser_window_id = env.browser_window_id - # Change these to test the different options below. - stop_session_now = False + # Get files downloaded during the session. + downloaded_files = await env.get_downloaded_files() + print(f"Downloaded files {downloaded_files}") - # The cloud session runs independently. If you want to stop it after the task is - # complete, you can explicitly close it. The session will also auto-expire after the - # configured session_timeout. - if stop_session_now: - print( - f"Stopping cloud session {cloud_browser_session_id} through original window" - ) - await window.close() - else: - # Create a `RemoteBrowserWindow` instance with the session ID to manage the session later. - print( - f"Stopping cloud session {cloud_browser_session_id} through RemoteBrowserWindow" - ) - remote_window = RemoteBrowserWindow( - cloud_browser_session_id=cloud_browser_session_id, - browser_window_id=browser_window_id, - ) - await remote_window.close() # This will stop the cloud session. + finally: + # Change this to test stopping through the original environment versus + # reconnecting with a remote environment. + stop_session_through_original_environment = False - # Get files downloaded during the session - downloaded_files = await window.get_downloaded_files() - print(f"Downloaded files {downloaded_files}") + if cloud_browser_session_id is None or browser_window_id is None: + await env.close() + elif stop_session_through_original_environment: + print(f"Stopping cloud session {cloud_browser_session_id}") + await env.close() + else: + # Create a RemoteBrowserEnvironment with the session ID to manage the session later. + print( + f"Stopping cloud session {cloud_browser_session_id} through RemoteBrowserEnvironment" + ) + remote_env = RemoteBrowserEnvironment( + cloud_browser_session_id=cloud_browser_session_id, + browser_window_id=browser_window_id, + ) + await remote_env.close() # This will stop the cloud session. ############################################################################ # IMPORTANT: The cloud browser continues accruing costs until the session # diff --git a/examples/complex_workflow.py b/examples/complex_workflow.py index 149b045..112731b 100644 --- a/examples/complex_workflow.py +++ b/examples/complex_workflow.py @@ -1,6 +1,6 @@ import asyncio -from narada import Agent, Narada +from narada import Agent, AgentKind, BrowserEnvironment from pydantic import BaseModel @@ -14,16 +14,15 @@ class Papers(BaseModel): async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + core_agent = Agent(environment=env, kind=AgentKind.CORE_AGENT) + operator = Agent(environment=env) - await window.go_to_url(url="https://arxiv.org/list/cs.AI/recent") + try: + await core_agent.go_to_url(url="https://arxiv.org/list/cs.AI/recent") - resp = await window.agent( + resp = await core_agent.run( prompt="What are the top 2 AI papers based on the current page?", - agent=Agent.CORE_AGENT, output_schema=Papers, ) @@ -33,10 +32,12 @@ async def main() -> None: print("Top 2 AI papers:", papers.model_dump_json(indent=2)) for paper in papers.papers: - await window.go_to_url(url=paper.url) - await window.agent(prompt="Click 'View PDF' then download the PDF") + await operator.go_to_url(url=paper.url) + await operator.run(prompt="Click 'View PDF' then download the PDF") - await window.print_message(message="All done!") + await operator.print_message(message="All done!") + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/conversation.py b/examples/conversation.py index 2611117..d1b560b 100644 --- a/examples/conversation.py +++ b/examples/conversation.py @@ -1,38 +1,32 @@ import asyncio -from narada import Agent, Narada +from narada import Agent, AgentKind, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env, kind=AgentKind.CORE_AGENT) - resp = await window.agent( + try: + resp = await agent.run( prompt="Pick a lucky number for me between 1 and 100", - agent=Agent.CORE_AGENT, - # By default, the chat history is cleared when an agent is invoked so that the agent can - # start fresh. - clear_chat=True, ) print(resp.text) - resp = await window.agent( + resp = await agent.run( prompt="What did you pick again?", - agent=Agent.CORE_AGENT, - # By not clearing the chat history, we can continue the conversation. - clear_chat=False, + # Pass the previous request ID to continue from the earlier response. + previous_request_id=resp.request_id, ) print(resp.text) - resp = await window.agent( + resp = await agent.run( prompt="What's double that number?", - agent=Agent.CORE_AGENT, - # By not clearing the chat history, we can continue the conversation. - clear_chat=False, + previous_request_id=resp.request_id, ) print(resp.text) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/critic.py b/examples/critic.py index 9f80c70..372ff56 100644 --- a/examples/critic.py +++ b/examples/critic.py @@ -1,6 +1,6 @@ import asyncio -from narada import CriticConfig, Narada +from narada import Agent, BrowserEnvironment, CriticConfig from pydantic import BaseModel, Field @@ -10,10 +10,10 @@ class SearchCriticOutput(BaseModel): async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) + try: # Define a critic that verifies the agent completed the task and extracts # additional structured information from the agent's actions. critic = CriticConfig( @@ -26,13 +26,15 @@ async def main() -> None: # Run a task with the critic. After the main agent finishes, the critic # evaluates whether the task was completed successfully. - response = await window.agent( + response = await agent.run( prompt='Search Google for "Narada AI" and tell me how many results were found.', critic=critic, ) print("Agent response:", response.text) print("Critic result:", response.critic_result.validation_passed) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/custom_agent.py b/examples/custom_agent.py index 9637b0c..0d83d25 100644 --- a/examples/custom_agent.py +++ b/examples/custom_agent.py @@ -1,26 +1,25 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() - - # Run a custom agent with a prompt (mapped to `chat_input` server-side). - # - # The definition of this demo agent can be viewed at: - # https://app.narada.ai/agent-studio/agents/e9d8vb8Q7bD2AcaSkqmRZ - custom_agent = "/demo@narada.ai/greeter-agent" - chat_input = "John Doe" - response = await window.agent( - prompt=chat_input, - agent=custom_agent, - ) + env = BrowserEnvironment() + + # Run a custom agent with a prompt (mapped to `chat_input` server-side). + # + # The definition of this demo agent can be viewed at: + # https://app.narada.ai/agent-studio/agents/e9d8vb8Q7bD2AcaSkqmRZ + custom_agent = "/demo@narada.ai/greeter-agent" + agent = Agent(environment=env, kind=custom_agent) + chat_input = "John Doe" + + try: + response = await agent.run(prompt=chat_input) print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/excel_sheets.py b/examples/excel_sheets.py index 7b2aa8c..5de1d93 100644 --- a/examples/excel_sheets.py +++ b/examples/excel_sheets.py @@ -1,15 +1,16 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) + try: # To read from an Excel workbook, connect the Microsoft account that has access # to the workbook and use the workbook URL from Excel Online. - resp = await window.read_excel_sheet( + resp = await agent.read_excel_sheet( workbook_url="https://contoso.sharepoint.com/:x:/r/sites/Team/Shared%20Documents/Workbook.xlsx", range="Sheet1!A1:D10", microsoft_account_email="person@example.com", @@ -19,12 +20,14 @@ async def main() -> None: # To write to an Excel workbook, you need to have write permission to the workbook. # You can copy the workbook URL from Excel Online. # - # await window.write_excel_sheet( + # await agent.write_excel_sheet( # workbook_url="WORKBOOK_URL", # range="Sheet1!A11:D12", # microsoft_account_email="person@example.com", # values=[["hello", "world", "foo", "bar"], ["1", "2", "3", "4"]], # ) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/existing_browser.py b/examples/existing_browser.py index 1a24c3d..3d6ae13 100644 --- a/examples/existing_browser.py +++ b/examples/existing_browser.py @@ -2,7 +2,7 @@ import subprocess import sys -from narada import Narada +from narada import Agent, BrowserEnvironment from narada.config import BrowserConfig @@ -49,20 +49,25 @@ async def main() -> None: # Step 2: Use Narada SDK to attach to the existing browser. print("Connecting to existing browser with Narada SDK...") - async with Narada() as narada: + env = BrowserEnvironment(config=config, attach_to_existing=True) + agent = Agent(environment=env) + + try: # Attach to the existing browser window. - window = await narada.initialize_in_existing_browser_window(config) + await env.start() - print(f"Successfully attached to browser window: {window.browser_window_id}") + print(f"Successfully attached to browser window: {env.browser_window_id}") # Run a task in this browser window - response = await window.agent( + response = await agent.run( prompt='Search for "LLM Compiler" on Google and open the first arXiv paper on the results page, then open the PDF. Then download the PDF of the paper.', # Optionally generate a GIF of the agent's actions generate_gif=True, ) print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/get_url.py b/examples/get_url.py index 661af3d..e53e5f8 100644 --- a/examples/get_url.py +++ b/examples/get_url.py @@ -1,15 +1,18 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) - await window.go_to_url(url="https://www.google.com", timeout=60) - result = await window.get_url(timeout=30) + try: + await agent.go_to_url(url="https://www.google.com", timeout=60) + result = await agent.get_url(timeout=30) print(f"Current URL: {result.url}") + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/google_sheets.py b/examples/google_sheets.py index 8e2ee2f..d53fbf2 100644 --- a/examples/google_sheets.py +++ b/examples/google_sheets.py @@ -1,14 +1,15 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) + try: # Read from a public Google Sheet. - resp = await window.read_google_sheet( + resp = await agent.read_google_sheet( spreadsheet_id="1COnQZsoxb_eMKWscX3e5OuFk-xQAHWza9QN2Tw0H6sg", range="Sheet1!A1:D10", ) @@ -18,11 +19,13 @@ async def main() -> None: # the spreadsheet ID from the URL of the sheet, which looks like: # https://docs.google.com/spreadsheets/d/{SPREADSHEET_ID}/... # - # await window.write_google_sheet( + # await agent.write_google_sheet( # spreadsheet_id="SPREADSHEET_ID", # range="Sheet1!A11:D12", # values=[["hello", "world", "foo", "bar"], ["1", "2", "3", "4"]], # ) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/human_in_the_loop.py b/examples/human_in_the_loop.py index 96684cb..4799ac1 100644 --- a/examples/human_in_the_loop.py +++ b/examples/human_in_the_loop.py @@ -1,69 +1,67 @@ import asyncio +from narada import Agent, AgentKind, BrowserEnvironment, UserAbortedError from narada_core.actions.models import PromptForUserInputVariable -from narada import Agent, Narada, UserAbortedError - async def main() -> None: - async with Narada() as narada: - # Open a browser window where Narada can show the human-in-the-loop UI. - window = await narada.open_and_initialize_browser_window() + # Use a browser environment where Narada can show the human-in-the-loop UI. + env = BrowserEnvironment() + agent = Agent(environment=env, kind=AgentKind.CORE_AGENT) - try: - # Use prompt_for_user_input when the script needs runtime details - # that should come from a person instead of being hard-coded. - values = await window.prompt_for_user_input( - step_id="collect-research-details", - variables=[ - PromptForUserInputVariable( - name="company", - type="string", - required=True, - ), - PromptForUserInputVariable( - name="research_focus", - type="enum", - required=True, - enum_values=["pricing", "customers", "recent news"], - ), - ], - prompt_message="Tell us which company to research and what to focus on.", - ) + try: + # Use prompt_for_user_input when the script needs runtime details + # that should come from a person instead of being hard-coded. + values = await agent.prompt_for_user_input( + step_id="collect-research-details", + variables=[ + PromptForUserInputVariable( + name="company", + type="string", + required=True, + ), + PromptForUserInputVariable( + name="research_focus", + type="enum", + required=True, + enum_values=["pricing", "customers", "recent news"], + ), + ], + prompt_message="Tell us which company to research and what to focus on.", + ) - company = values["company"] - research_focus = values["research_focus"] + company = values["company"] + research_focus = values["research_focus"] - # Use user_approval before an action that costs time, uses credits, - # changes external state, or depends on the user's confirmation. - approved = await window.user_approval( - step_id="approve-research-run", - prompt_message=( - f"Research {company} with a focus on {research_focus}?" - ), - approve_label="Run research", - reject_label="Cancel", - ) + # Use user_approval before an action that costs time, uses credits, + # changes external state, or depends on the user's confirmation. + approved = await agent.user_approval( + step_id="approve-research-run", + prompt_message=f"Research {company} with a focus on {research_focus}?", + approve_label="Run research", + reject_label="Cancel", + ) - if not approved: - print("The user rejected the research run.") - return + if not approved: + print("The user rejected the research run.") + return - # The agent only runs after the user has supplied the missing - # details and approved the proposed action. - response = await window.agent( - prompt=( - f"Research {company}. Focus on {research_focus}. " - "Return a concise summary with the most relevant findings." - ), - agent=Agent.CORE_AGENT, - ) + # The agent only runs after the user has supplied the missing + # details and approved the proposed action. + response = await agent.run( + prompt=( + f"Research {company}. Focus on {research_focus}. " + "Return a concise summary with the most relevant findings." + ), + ) - print("Response:", response.model_dump_json(indent=2)) + print("Response:", response.model_dump_json(indent=2)) - except UserAbortedError: - # The user can also close/cancel a human-in-the-loop prompt. - print("The user cancelled the human-in-the-loop flow.") + except UserAbortedError: + # The user can also close/cancel a human-in-the-loop prompt. + print("The user cancelled the human-in-the-loop flow.") + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/input_variables_file.py b/examples/input_variables_file.py index 46ee2a0..3bb18d5 100644 --- a/examples/input_variables_file.py +++ b/examples/input_variables_file.py @@ -1,13 +1,14 @@ import asyncio from io import BytesIO -from narada import Agent, Narada +from narada import Agent, AgentKind, BrowserEnvironment async def main() -> None: - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env, kind=AgentKind.CORE_AGENT) + try: # Create an in-memory file object and pass it in input_variables. # The SDK uploads it automatically before dispatching the request. file_obj = BytesIO( @@ -15,13 +16,14 @@ async def main() -> None: ) file_obj.name = "sample_document.txt" - response = await window.agent( + response = await agent.run( prompt="Summarize {{$doc}}.", - agent=Agent.CORE_AGENT, input_variables={"doc": file_obj}, ) print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/mcp_server.py b/examples/mcp_server.py index 15aca6e..6cbe7d7 100644 --- a/examples/mcp_server.py +++ b/examples/mcp_server.py @@ -1,6 +1,6 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment from narada_core.models import ( AuthenticationNone, McpServer, @@ -8,11 +8,10 @@ async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) + try: # Define an MCP server configuration. # This example uses no authentication, but you can also use: # - AuthenticationBearerToken(bearerToken="your-token") @@ -29,12 +28,14 @@ async def main() -> None: # Run a task with the MCP server linked to the agent. # The agent will have access to the tools from the specified MCP server. - response = await window.agent( + response = await agent.run( prompt="Use the MCP server tools to fetch and process some data", mcp_servers=[mcp_server], ) print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/multiple_windows.py b/examples/multiple_windows.py index 4cd7331..104c3f0 100644 --- a/examples/multiple_windows.py +++ b/examples/multiple_windows.py @@ -1,31 +1,33 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Helper function to run a task in a new browser window. - async def run_task(prompt: str): - window = await narada.open_and_initialize_browser_window() - return await window.agent(prompt=prompt) + # Helper function to run a task in a new browser environment. + async def run_task(prompt: str): + env = BrowserEnvironment() + agent = Agent(environment=env) + try: + return await agent.run(prompt=prompt) + finally: + await env.close() - # Run multiple tasks in parallel. - responses = await asyncio.gather( - run_task( - "Search for Kurt Keutzer on Google and extract his h-index which you can find by clicking on cited by tab in google scholar" - ), - run_task( - 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results page, then open the PDF. Then download the PDF of the paper.' - ), - run_task( - 'Search for "random number" on Google and extract the generated number from the search result page' - ), - ) + # Run multiple tasks in parallel. + responses = await asyncio.gather( + run_task( + "Search for Kurt Keutzer on Google and extract his h-index which you can find by clicking on cited by tab in google scholar" + ), + run_task( + 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results page, then open the PDF. Then download the PDF of the paper.' + ), + run_task( + 'Search for "random number" on Google and extract the generated number from the search result page' + ), + ) - for i, response in enumerate(responses): - print(f"Response #{i + 1}: {response.model_dump_json(indent=2)}\n") + for i, response in enumerate(responses): + print(f"Response #{i + 1}: {response.model_dump_json(indent=2)}\n") if __name__ == "__main__": diff --git a/examples/multiple_windows_interdependencies.py b/examples/multiple_windows_interdependencies.py index 68684be..8a8046f 100644 --- a/examples/multiple_windows_interdependencies.py +++ b/examples/multiple_windows_interdependencies.py @@ -1,6 +1,6 @@ import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment from pydantic import BaseModel @@ -23,14 +23,16 @@ async def main() -> None: # # Step 3: In window 1, add a note with the h-index information. - async with Narada() as narada: - window_1, window_2 = await asyncio.gather( - narada.open_and_initialize_browser_window(), - narada.open_and_initialize_browser_window(), - ) + env_1 = BrowserEnvironment() + env_2 = BrowserEnvironment() + agent_1 = Agent(environment=env_1) + agent_2 = Agent(environment=env_2) + + try: + await asyncio.gather(env_1.start(), env_2.start()) # First, get the author's name from window 1 - response = await window_1.agent( + response = await agent_1.run( prompt=( 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' "page, then extract the first author's name from the arXiv page." @@ -43,15 +45,15 @@ async def main() -> None: # Start parallel tasks: name filling in window 1, h-index search in window 2 async def fill_name_in_contact() -> None: - await window_1.go_to_url(url="https://contacts.google.com/new") - await window_1.agent( + await agent_1.go_to_url(url="https://contacts.google.com/new") + await agent_1.run( prompt=( f"Fill in the first name and last name fields for {author_name}. Do not save." ) ) async def search_h_index() -> int: - response = await window_2.agent( + response = await agent_2.run( prompt=( f"Search for {author_name} of LLM Compiler with Google and extract their " "h-index, which you can find by opening their Google Scholar profile and " @@ -71,9 +73,11 @@ async def search_h_index() -> int: # Now add a note with h-index information. print("Adding h-index note to contact...") - await window_1.agent( + await agent_1.run( prompt=(f"Add a note that their h-index is {h_index}. Do not click save."), ) + finally: + await asyncio.gather(env_1.close(), env_2.close()) if __name__ == "__main__": diff --git a/examples/proxy.py b/examples/proxy.py index 49fb64e..9897067 100644 --- a/examples/proxy.py +++ b/examples/proxy.py @@ -1,6 +1,6 @@ import asyncio -from narada import BrowserConfig, Narada, ProxyConfig +from narada import Agent, BrowserConfig, BrowserEnvironment, ProxyConfig async def main() -> None: @@ -14,15 +14,18 @@ async def main() -> None: config = BrowserConfig(proxy=proxy) - async with Narada() as narada: - window = await narada.open_and_initialize_browser_window(config) + env = BrowserEnvironment(config=config) + agent = Agent(environment=env) + try: # Browser traffic now routes through the proxy. - response = await window.agent( + response = await agent.run( prompt="Go to https://httpbin.org/ip and tell me what IP address is shown.", ) print("Response:", response.text) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/remote_browser.py b/examples/remote_browser.py index 2686bc2..c1c8552 100644 --- a/examples/remote_browser.py +++ b/examples/remote_browser.py @@ -1,6 +1,6 @@ import asyncio -from narada import RemoteBrowserWindow +from narada import Agent, RemoteBrowserEnvironment async def main() -> None: @@ -13,28 +13,30 @@ async def main() -> None: # enable additional management capabilities such as stopping the session: # # ``` - # win_1 = await narada.open_and_initialize_cloud_browser_window(...) + # cloud_env = CloudBrowserEnvironment(...) + # await cloud_env.start() # - # browser_window_id = win_1.browser_window_id - # cloud_browser_session_id = win_1.cloud_browser_session_id + # browser_window_id = cloud_env.browser_window_id + # cloud_browser_session_id = cloud_env.cloud_browser_session_id # # ... # - # win_2 = RemoteBrowserWindow( + # remote_env = RemoteBrowserEnvironment( # browser_window_id=browser_window_id, - # loud_browser_session_id=cloud_browser_session_id, + # cloud_browser_session_id=cloud_browser_session_id, # ) - # await win_2.close() # This will stop the cloud session. + # await remote_env.close() # This will stop the cloud session. # ``` cloud_browser_session_id = None - window = RemoteBrowserWindow( + env = RemoteBrowserEnvironment( browser_window_id=browser_window_id, cloud_browser_session_id=cloud_browser_session_id, ) + agent = Agent(environment=env) # Run a task on another machine. - response = await window.agent( + response = await agent.run( prompt=( 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' "page, then tell me who the authors are." diff --git a/examples/single_window.py b/examples/single_window.py index 9d2eb78..06b57a9 100644 --- a/examples/single_window.py +++ b/examples/single_window.py @@ -2,35 +2,34 @@ import os import signal -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() - - # Run a task in this browser window. - response = await window.agent( - prompt="Search for ${paper_name} on Google and open the first arXiv paper on the results page, then open the PDF. Then download the PDF of the paper.", - # Optionally generate a GIF of the agent's actions. - generate_gif=True, - # Put sensitive information that you don't want the LLM to see in secret_variables. - # These will - # be substituted at action time after the LLM has generated its output. - secret_variables={"paper_name": "LLM Compiler"}, - ) - - print("Response:", response.model_dump_json(indent=2)) + # Create a browser environment. It initializes lazily on the first action. + env = BrowserEnvironment() + agent = Agent(environment=env) + + # Run a task in this browser environment. + response = await agent.run( + prompt="Search for ${paper_name} on Google and open the first arXiv paper on the results page, then open the PDF. Then download the PDF of the paper.", + # Optionally generate a GIF of the agent's actions. + generate_gif=True, + # Put sensitive information that you don't want the LLM to see in secret_variables. + # These will + # be substituted at action time after the LLM has generated its output. + secret_variables={"paper_name": "LLM Compiler"}, + ) + + print("Response:", response.model_dump_json(indent=2)) # Change these to test the different options below. should_quit_browser = False should_close_window = False # The browser runs as an independent process. If you want to close it after the task is - # complete, you can get its process ID from the window object. - pid = window.browser_process_id + # complete, you can get its process ID from the environment object. + pid = env.browser_process_id # Process ID is only available if it was originally launched by Narada. if pid is not None and should_quit_browser: print("Killing browser process with PID:", pid) @@ -39,7 +38,7 @@ async def main() -> None: # You can also close this specific window instead of quitting the entire browser process. if should_close_window: print("Closing window...") - await window.close() + await env.close() if __name__ == "__main__": diff --git a/examples/structured_output.py b/examples/structured_output.py index eebabf1..3c7c973 100644 --- a/examples/structured_output.py +++ b/examples/structured_output.py @@ -1,7 +1,7 @@ import asyncio import rich -from narada import Narada +from narada import Agent, BrowserEnvironment from pydantic import BaseModel, Field @@ -19,13 +19,12 @@ class PaperInfo(BaseModel): async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) + try: # Run a task in this browser window. - response = await window.agent( + response = await agent.run( prompt=( 'Search for "LLM Compiler" on Google and open the first arXiv paper on the results ' "page. Then extract the paper info from the arXiv page in the given format." @@ -34,6 +33,8 @@ async def main() -> None: ) rich.print("Response:", response.structured_output) + finally: + await env.close() if __name__ == "__main__": diff --git a/examples/timeout_handling.py b/examples/timeout_handling.py index 8b9d98f..da83200 100644 --- a/examples/timeout_handling.py +++ b/examples/timeout_handling.py @@ -1,18 +1,17 @@ import asyncio -from narada import Narada, NaradaTimeoutError +from narada import Agent, BrowserEnvironment, NaradaTimeoutError async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + env = BrowserEnvironment() + agent = Agent(environment=env) + try: max_attempts = 2 for attempt in range(max_attempts): try: - response = await window.agent( + response = await agent.run( prompt='Search for "random number between 1 and 5" on Google and extract the generated number from the search result page. Output just the number.', # Force a timeout on the first attempt to demonstrate timeout handling. timeout=3 if attempt == 0 else 120, @@ -28,7 +27,9 @@ async def main() -> None: # Reinitialize the UI agent to cancel any inflight requests. This keeps the browser # pages untouched so we don't lose any progress. - await window.reinitialize() + await agent.reset_agent_state() + finally: + await env.close() if __name__ == "__main__": diff --git a/packages/narada-core/pyproject.toml b/packages/narada-core/pyproject.toml index 4043d6c..c00c276 100644 --- a/packages/narada-core/pyproject.toml +++ b/packages/narada-core/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "narada-core" -version = "0.0.27" +version = "0.1.0" description = "Code shared by the `narada` and `narada-pyodide` packages." license = "Apache-2.0" readme = "README.md" diff --git a/packages/narada-core/src/narada_core/actions/critic.py b/packages/narada-core/src/narada_core/actions/critic.py index 04f5850..0ef109e 100644 --- a/packages/narada-core/src/narada_core/actions/critic.py +++ b/packages/narada-core/src/narada_core/actions/critic.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, create_model from narada_core.actions.models import AgentUsage, CriticResult -from narada_core.models import Agent, CriticConfig +from narada_core.models import AgentKind, CriticConfig from narada_core.tracing.model import parse_action_trace _VALIDATION_VAR = "narada_validation_passed" @@ -38,7 +38,7 @@ async def run_critic( critic_dispatch_response = await dispatch_request( prompt=critic.get("prompt", _DEFAULT_CRITIC_PROMPT), - agent=Agent.PRODUCTIVITY, + agent=AgentKind.PRODUCTIVITY, output_schema=CriticOutputModel, critic_context={ "agentPrompt": original_prompt, diff --git a/packages/narada-core/src/narada_core/errors.py b/packages/narada-core/src/narada_core/errors.py index 055b89d..656bd96 100644 --- a/packages/narada-core/src/narada_core/errors.py +++ b/packages/narada-core/src/narada_core/errors.py @@ -12,7 +12,7 @@ class NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(NaradaTimeoutError): def __init__(self, timeout: int) -> None: super().__init__( f"Request timed out after {timeout} seconds. " - "Try specifying a larger `timeout` value when calling `agent`." + "Try specifying a larger `timeout` value when calling `Agent.run`." ) diff --git a/packages/narada-core/src/narada_core/models.py b/packages/narada-core/src/narada_core/models.py index f5d2ee3..a5ea5ad 100644 --- a/packages/narada-core/src/narada_core/models.py +++ b/packages/narada-core/src/narada_core/models.py @@ -8,25 +8,25 @@ from narada_core.actions.models import ActiveInputRequest -class Agent(Enum): +class AgentKind(Enum): PRODUCTIVITY = 1 OPERATOR = 2 CORE_AGENT = 3 def prompt_prefix(self) -> str: match self: - case Agent.PRODUCTIVITY: + case AgentKind.PRODUCTIVITY: return "" - case Agent.OPERATOR: + case AgentKind.OPERATOR: return "/Operator " - case Agent.CORE_AGENT: + case AgentKind.CORE_AGENT: return "/coreAgent " class ReasoningEffort(StrEnum): """Controls how much reasoning the Core Agent uses before responding. - Only `Agent.CORE_AGENT` supports this option; other agents raise `ValueError`. + Only `AgentKind.CORE_AGENT` supports this option; other agents raise `ValueError`. """ NONE = "none" diff --git a/packages/narada-pyodide/pyproject.toml b/packages/narada-pyodide/pyproject.toml index ab5fcc3..eb14d28 100644 --- a/packages/narada-pyodide/pyproject.toml +++ b/packages/narada-pyodide/pyproject.toml @@ -1,14 +1,14 @@ [project] name = "narada-pyodide" -version = "0.0.59" +version = "0.1.0" description = "Pyodide-compatible Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.27", + "narada-core==0.1.0", # Must be a supported version in https://pyodide.org/en/stable/usage/packages-in-pyodide.html "packaging==24.2", ] diff --git a/packages/narada-pyodide/src/narada/__init__.py b/packages/narada-pyodide/src/narada/__init__.py index bfe0b01..33175d8 100644 --- a/packages/narada-pyodide/src/narada/__init__.py +++ b/packages/narada-pyodide/src/narada/__init__.py @@ -4,7 +4,7 @@ NaradaTimeoutError, ) from narada_core.models import ( - Agent, + AgentKind, CriticConfig, File, ReasoningEffort, @@ -12,31 +12,39 @@ ResponseContent, ) -from narada.client import Narada +from narada.agent import Agent +from narada.environment import ( + BaseBrowserEnvironment, + BrowserEnvironment, + CloudBrowserEnvironment, + Environment, + LambdaEnvironment, + RemoteBrowserEnvironment, + SessionDownloadItem, +) from narada.utils import download_file, render_html from narada.version import __version__ -from narada.window import ( - CloudBrowserWindow, - LocalBrowserWindow, - RemoteBrowserWindow, -) __all__ = [ "__version__", "ActiveInputRequest", "Agent", - "CloudBrowserWindow", + "AgentKind", + "BaseBrowserEnvironment", + "BrowserEnvironment", + "CloudBrowserEnvironment", "CriticConfig", "CriticResult", "download_file", + "Environment", "File", - "LocalBrowserWindow", - "Narada", + "LambdaEnvironment", "NaradaError", "NaradaTimeoutError", "ReasoningEffort", - "RemoteBrowserWindow", + "RemoteBrowserEnvironment", "render_html", "Response", "ResponseContent", + "SessionDownloadItem", ] diff --git a/packages/narada-pyodide/src/narada/_trace.py b/packages/narada-pyodide/src/narada/_trace.py index cef5ef8..460a8c2 100644 --- a/packages/narada-pyodide/src/narada/_trace.py +++ b/packages/narada-pyodide/src/narada/_trace.py @@ -7,8 +7,9 @@ observability dashboard. The module is private: user code should not import from here. The public -surface lives in ``window.py`` and ``utils.py``; instrumentation is applied -at those module boundaries by calling into this module. +surface lives in ``agent.py``, ``environment.py``, and ``utils.py``; +instrumentation is applied at those module boundaries by calling into this +module. """ from __future__ import annotations diff --git a/packages/narada-pyodide/src/narada/agent.py b/packages/narada-pyodide/src/narada/agent.py new file mode 100644 index 0000000..71429f9 --- /dev/null +++ b/packages/narada-pyodide/src/narada/agent.py @@ -0,0 +1,566 @@ +from __future__ import annotations + +from typing import IO, Any, Generic, Literal, Mapping, TypeVar, overload + +from narada_core.actions.critic import run_critic +from narada_core.actions.models import ( + DEFAULT_HITL_TIMEOUT_SECONDS, + AgenticMatchingSelectorsFinderRequest, + AgenticMatchingSelectorsFinderResponse, + AgenticMouseAction, + AgenticMouseActionRequest, + AgenticSelectorAction, + AgenticSelectorRequest, + AgenticSelectorResponse, + AgenticSelectors, + AgentResponse, + AgentUsage, + CriticResult, + GetFullHtmlRequest, + GetFullHtmlResponse, + GetScreenshotRequest, + GetScreenshotResponse, + GetSimplifiedHtmlRequest, + GetSimplifiedHtmlResponse, + GetUrlRequest, + GetUrlResponse, + GoToUrlRequest, + PrintMessageRequest, + PromptForUserInputRequest, + PromptForUserInputResponse, + PromptForUserInputVariable, + ReadExcelSheetRequest, + ReadExcelSheetResponse, + ReadGoogleSheetRequest, + ReadGoogleSheetResponse, + RecordedClick, + UserApprovalRequest, + UserApprovalResponse, + WaitForElementRequest, + WaitForElementResponse, + WriteExcelSheetRequest, + WriteGoogleSheetRequest, +) +from narada_core.models import ( + AgentKind, + CriticConfig, + File, + McpServer, + ReasoningEffort, + RemoteDispatchChatHistoryItem, + Response, + UserResourceCredentials, +) +from narada_core.tracing.model import parse_action_trace +from pydantic import BaseModel + +from narada.environment import ( + BaseBrowserEnvironment, + Environment, + InputRequiredCallback, +) + +from . import _trace + +_StructuredOutput = TypeVar("_StructuredOutput", bound=BaseModel) + + +class Agent(Generic[_StructuredOutput]): + def __init__( + self, + *, + environment: Environment, + kind: AgentKind | str = AgentKind.OPERATOR, + ) -> None: + self.environment = environment + self.kind = kind + + # `reasoning` is only valid with the Core Agent; these two overloads make + # that constraint type-checkable when callers construct a core-agent instance. + @overload + async def run( + self, + prompt: str, + *, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic: CriticConfig | None = None, + timeout: int = 1000, + ) -> AgentResponse[dict[str, Any]]: ... + + @overload + async def run( + self, + prompt: str, + *, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic: CriticConfig | None = None, + timeout: int = 1000, + ) -> AgentResponse[_StructuredOutput]: ... + + async def run( + self, + prompt: str, + *, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[BaseModel] | None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic: CriticConfig | None = None, + timeout: int = 1000, + ) -> AgentResponse: + """Invokes an agent in the bound Narada environment.""" + remote_dispatch_response = await self._dispatch_request( + prompt=prompt, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, + previous_request_id=previous_request_id, + chat_history=chat_history, + additional_context=additional_context, + attachment=attachment, + time_zone=time_zone, + user_resource_credentials=user_resource_credentials, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + callback_url=callback_url, + callback_secret=callback_secret, + callback_headers=callback_headers, + on_input_required=on_input_required, + reasoning=reasoning, + timeout=timeout, + ) + response_content = remote_dispatch_response["response"] + assert response_content is not None + + action_trace_raw = response_content.get("actionTrace") + action_trace = ( + parse_action_trace(action_trace_raw) + if action_trace_raw is not None + else None + ) + workflow_trace = response_content.get("workflowTrace") + parent_request_id = self.environment._current_parent_request_id() + # Preserve the response contract for direct callers, but avoid adding a second + # child node when the backend will stitch the child request into the parent row. + if workflow_trace is not None and parent_request_id is None: + _trace.emit_sub_workflow(workflow_trace=workflow_trace) + + critic_result: CriticResult | None = None + if critic is not None: + critic_result = await run_critic( + dispatch_request=self._dispatch_request, + original_prompt=prompt, + response_content=response_content, + action_trace_raw=action_trace_raw, + critic=critic, + time_zone=time_zone, + timeout=timeout, + ) + + return AgentResponse( + request_id=remote_dispatch_response["requestId"], + status=remote_dispatch_response["status"], + text=response_content["text"], + output=response_content.get("output"), + structured_output=response_content.get("structuredOutput"), + usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), + action_trace=action_trace, + workflow_trace=workflow_trace, + critic_result=critic_result, + ) + + async def _dispatch_request( + self, + *, + prompt: str, + agent: AgentKind | str | None = None, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[BaseModel] | None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic_context: dict[str, Any] | None = None, + timeout: int = 1000, + ) -> Response: + dispatch_agent = self.kind if agent is None else agent + # Branch on `reasoning` so each call site binds a single, typed overload + # of `_dispatch_request`. The validation also lives in `_dispatch_request` + # itself (defense in depth + reachable when callers go straight to the + # low-level API), so the redundancy here is intentional. + if reasoning is None: + remote_dispatch_response = await self.environment._dispatch_request( + prompt=prompt, + agent=dispatch_agent, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, + previous_request_id=previous_request_id, + chat_history=chat_history, + additional_context=additional_context, + attachment=attachment, + time_zone=time_zone, + user_resource_credentials=user_resource_credentials, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + callback_url=callback_url, + callback_secret=callback_secret, + callback_headers=callback_headers, + on_input_required=on_input_required, + critic_context=critic_context, + timeout=timeout, + ) + else: + if dispatch_agent is not AgentKind.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `kind=AgentKind.CORE_AGENT` " + f"(got kind={dispatch_agent!r})" + ) + # The CORE_AGENT-specific overloads of `_dispatch_request` split on + # a narrower `output_schema` discriminator (None vs `type[T]`), + # which the impl's `type[BaseModel] | None` union doesn't cleanly + # narrow into without further branching. The public `run()` + # overloads above already give callers correct return-type + # narrowing, so the internal forward call bypasses overload + # disambiguation on this single dimension. + remote_dispatch_response = await self.environment._dispatch_request( # pyright: ignore[reportCallIssue] + prompt=prompt, + agent=dispatch_agent, + reasoning=reasoning, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, # pyright: ignore[reportArgumentType] + previous_request_id=previous_request_id, + chat_history=chat_history, + additional_context=additional_context, + attachment=attachment, + time_zone=time_zone, + user_resource_credentials=user_resource_credentials, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + callback_url=callback_url, + callback_secret=callback_secret, + callback_headers=callback_headers, + on_input_required=on_input_required, + critic_context=critic_context, + timeout=timeout, + ) + return remote_dispatch_response + + def _browser_environment(self) -> BaseBrowserEnvironment: + if not isinstance(self.environment, BaseBrowserEnvironment): + raise ValueError( + f"{type(self.environment).__name__} does not support browser actions" + ) + return self.environment + + async def agentic_selector( + self, + *, + action: AgenticSelectorAction, + selectors: AgenticSelectors, + fallback_operator_query: str, + # Larger default timeout because Operator can take a bit to run. + timeout: int | None = 300, + ) -> AgenticSelectorResponse: + """Performs an action on an element specified by the given selectors, falling back to using + the Operator agent if the selectors fail to match a unique element. + + Returns AgenticSelectorResponse with the value for 'get_text' and 'get_property' actions, + otherwise returns None. + """ + response_model = ( + AgenticSelectorResponse + if action["type"] in {"get_text", "get_property"} + else None + ) + + result = await self._browser_environment()._run_extension_action( + AgenticSelectorRequest( + action=action, + selectors=selectors, + fallback_operator_query=fallback_operator_query, + ), + response_model, + timeout=timeout, + ) + + if result is None: + return AgenticSelectorResponse(value=None) + + return result + + async def agentic_matching_selectors_finder( + self, + *, + prompt: str, + timeout: int | None = 300, + ) -> list[AgenticSelectors]: + """Finds all visible targets matching a prompt and returns selectors.""" + result = await self._browser_environment()._run_extension_action( + AgenticMatchingSelectorsFinderRequest(prompt=prompt), + AgenticMatchingSelectorsFinderResponse, + timeout=timeout, + ) + return result.selectors + + async def agentic_mouse_action( + self, + *, + action: AgenticMouseAction, + recorded_click: RecordedClick, + fallback_operator_query: str, + resize_window: bool = True, + timeout: int | None = 60, + ) -> None: + """Performs a mouse action at the specified click coordinates, falling back to using + the Operator agent if the click fails. + """ + return await self._browser_environment()._run_extension_action( + AgenticMouseActionRequest( + action=action, + recorded_click=recorded_click, + resize_window=resize_window, + fallback_operator_query=fallback_operator_query, + ), + timeout=timeout, + ) + + async def go_to_url( + self, *, url: str, new_tab: bool = False, timeout: int | None = None + ) -> None: + """Navigates the active page in this window to the given URL.""" + return await self._browser_environment()._run_extension_action( + GoToUrlRequest(url=url, new_tab=new_tab), timeout=timeout + ) + + async def wait_for_element( + self, + *, + selectors: AgenticSelectors, + state: Literal["visible", "hidden"], + timeout: int, + ) -> bool: + """Waits for an element matching the given selectors to reach the specified state. + + Returns True if the element was found, False if no selector matched before timeout. + """ + result = await self._browser_environment()._run_extension_action( + WaitForElementRequest(selectors=selectors, state=state, timeout=timeout), + WaitForElementResponse, + timeout=timeout // 1000 + 30, + ) + if result is None: + return False + return result.found + + async def get_url(self, *, timeout: int | None = None) -> GetUrlResponse: + """Gets the URL of the current active page.""" + result = await self._browser_environment()._run_extension_action( + GetUrlRequest(), + GetUrlResponse, + timeout=timeout, + ) + return result + + async def print_message(self, *, message: str, timeout: int | None = None) -> None: + """Prints a message in the Narada extension side panel chat.""" + return await self._browser_environment()._run_extension_action( + PrintMessageRequest(message=message), timeout=timeout + ) + + async def prompt_for_user_input( + self, + *, + step_id: str, + variables: list[PromptForUserInputVariable], + prompt_message: str | None = None, + timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, + ) -> dict[str, Any]: + """Prompts the user for one or more input values in the extension UI.""" + result = await self._browser_environment()._run_extension_action( + PromptForUserInputRequest( + step_id=step_id, prompt_message=prompt_message, variables=variables + ), + PromptForUserInputResponse, + timeout=timeout, + ) + return result.values_by_name + + async def user_approval( + self, + *, + step_id: str, + prompt_message: str, + approve_label: str, + reject_label: str, + timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, + ) -> bool: + """Prompts the user to approve or reject in the extension UI.""" + result = await self._browser_environment()._run_extension_action( + UserApprovalRequest( + step_id=step_id, + prompt_message=prompt_message, + approve_label=approve_label, + reject_label=reject_label, + ), + UserApprovalResponse, + timeout=timeout, + ) + return result.approved + + async def read_google_sheet( + self, + *, + spreadsheet_id: str, + range: str, + timeout: int | None = None, + ) -> ReadGoogleSheetResponse: + """Reads a range of cells from a Google Sheet.""" + return await self._browser_environment()._run_extension_action( + ReadGoogleSheetRequest(spreadsheet_id=spreadsheet_id, range=range), + ReadGoogleSheetResponse, + timeout=timeout, + ) + + async def read_excel_sheet( + self, + *, + workbook_url: str, + range: str, + microsoft_account_email: str, + timeout: int | None = None, + ) -> ReadExcelSheetResponse: + """Reads a range of cells from a Microsoft Excel workbook.""" + return await self._browser_environment()._run_extension_action( + ReadExcelSheetRequest( + workbook_url=workbook_url, + range=range, + microsoft_account_email=microsoft_account_email, + ), + ReadExcelSheetResponse, + timeout=timeout, + ) + + async def write_google_sheet( + self, + *, + spreadsheet_id: str, + range: str, + values: list[list[str]], + timeout: int | None = None, + ) -> None: + """Writes a range of cells to a Google Sheet.""" + return await self._browser_environment()._run_extension_action( + WriteGoogleSheetRequest( + spreadsheet_id=spreadsheet_id, range=range, values=values + ), + timeout=timeout, + ) + + async def write_excel_sheet( + self, + *, + workbook_url: str, + range: str, + microsoft_account_email: str, + values: list[list[str]], + timeout: int | None = None, + ) -> None: + """Writes a range of cells to a Microsoft Excel workbook.""" + return await self._browser_environment()._run_extension_action( + WriteExcelSheetRequest( + workbook_url=workbook_url, + range=range, + microsoft_account_email=microsoft_account_email, + values=values, + ), + timeout=timeout, + ) + + async def get_full_html(self, *, timeout: int | None = None) -> GetFullHtmlResponse: + """Gets the full HTML content of the current page.""" + return await self._browser_environment()._run_extension_action( + GetFullHtmlRequest(), + GetFullHtmlResponse, + timeout=timeout, + ) + + async def get_simplified_html( + self, *, timeout: int | None = None + ) -> GetSimplifiedHtmlResponse: + """Gets the simplified HTML content of the current page.""" + return await self._browser_environment()._run_extension_action( + GetSimplifiedHtmlRequest(), + GetSimplifiedHtmlResponse, + timeout=timeout, + ) + + async def get_screenshot( + self, *, timeout: int | None = None + ) -> GetScreenshotResponse: + """Takes a screenshot of the current browser window.""" + return await self._browser_environment()._run_extension_action( + GetScreenshotRequest(), + GetScreenshotResponse, + timeout=timeout, + ) diff --git a/packages/narada-pyodide/src/narada/client.py b/packages/narada-pyodide/src/narada/client.py deleted file mode 100644 index 9d6886d..0000000 --- a/packages/narada-pyodide/src/narada/client.py +++ /dev/null @@ -1,137 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -import os -from typing import Any - -from narada_core.models import _SdkConfig -from packaging.version import Version -from pyodide.http import pyfetch - -from narada.version import __version__ -from narada.window import CloudBrowserWindow, _build_auth_headers, _normalize_narada_env - - -class Narada: - def __init__(self, *, api_key: str | None = None) -> None: - self._api_key = api_key or os.environ.get("NARADA_API_KEY") - self._user_id = os.environ.get("NARADA_USER_ID") - self._env = _normalize_narada_env(os.environ.get("NARADA_ENV")) - - if self._api_key is None and (self._user_id is None or self._env is None): - raise ValueError( - "Either `api_key` or all of `NARADA_USER_ID` and `NARADA_ENV` must be provided" - ) - - async def __aenter__(self) -> Narada: - await self._validate_sdk_config() - return self - - async def __aexit__(self, *args: Any) -> None: - pass - - async def _fetch_sdk_config(self) -> _SdkConfig | None: - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - url = f"{base_url}/sdk/config" - headers = await _build_auth_headers( - api_key=self._api_key, - user_id=self._user_id, - env=self._env, - ) - - try: - resp = await pyfetch(url, headers=headers) - if not resp.ok: - logging.warning( - "Failed to fetch SDK config: %s %s", resp.status, await resp.text() - ) - return None - - return _SdkConfig.model_validate(await resp.json()) - except Exception as e: - logging.warning("Failed to fetch SDK config: %s", e) - return None - - async def _validate_sdk_config(self) -> None: - config = await self._fetch_sdk_config() - if config is None: - return - - package_config = config.packages["narada-pyodide"] - current_version = Version(__version__) - min_required_version = Version(package_config.min_required_version) - if current_version < min_required_version: - raise RuntimeError( - f"narada-pyodide<={__version__} is not supported. Please reload the page to " - f"upgrade to version {package_config.min_required_version} or higher." - ) - - async def open_and_initialize_cloud_browser_window( - self, - *, - session_name: str | None = None, - session_timeout: int | None = None, - require_extension: bool = True, - ) -> CloudBrowserWindow: - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - endpoint_url = ( - f"{base_url}/cloud-browser/create-and-initialize-cloud-browser-session" - ) - headers = await _build_auth_headers( - api_key=self._api_key, - user_id=self._user_id, - env=self._env, - ) - request_body: dict[str, Any] = { - "session_name": session_name, - "session_timeout": session_timeout, - "require_extension": require_extension, - } - initiator_remote_dispatch_request_id = os.environ.get( - "NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", "" - ).strip() - if not initiator_remote_dispatch_request_id: - raise ValueError("NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID is required") - request_body["initiator_remote_dispatch_request_id"] = ( - initiator_remote_dispatch_request_id - ) - - response = None - max_attempts = 3 - retry_backoff_seconds = (2.0, 4.0, 0.0) # no wait after last attempt - for attempt in range(max_attempts): - # Due to unknown network issues, sometimes create-and-initialize-cloud-browser-session API call fails. - try: - response = await pyfetch( - endpoint_url, - method="POST", - headers=headers, - body=json.dumps(request_body), - ) - if response.ok: - break - except Exception: - await asyncio.sleep(retry_backoff_seconds[attempt]) - continue - - if response is None or not response.ok: - resp_status = response.status if response is not None else "unknown status" - resp_text = ( - await response.text() if response is not None else "unknown error" - ) - raise RuntimeError( - "Failed to create and initialize cloud browser session after 3 attempts with backoff: " - f"{resp_status}: {resp_text}\n" - f"Endpoint URL: {endpoint_url}" - ) - - response_data = await response.json() - return CloudBrowserWindow( - browser_window_id=response_data["browser_window_id"], - session_id=response_data["session_id"], - api_key=self._api_key, - user_id=self._user_id, - env=self._env, - ) diff --git a/packages/narada-pyodide/src/narada/window.py b/packages/narada-pyodide/src/narada/environment.py similarity index 59% rename from packages/narada-pyodide/src/narada/window.py rename to packages/narada-pyodide/src/narada/environment.py index 2b820da..23b521b 100644 --- a/packages/narada-pyodide/src/narada/window.py +++ b/packages/narada-pyodide/src/narada/environment.py @@ -3,11 +3,14 @@ import inspect import json import logging +import mimetypes import os import time from abc import ABC from dataclasses import dataclass from http import HTTPStatus +from io import IOBase +from pathlib import Path from typing import ( IO, TYPE_CHECKING, @@ -15,7 +18,9 @@ Awaitable, Callable, Literal, - Optional, + Mapping, + TypedDict, + TypeGuard, TypeVar, cast, overload, @@ -24,48 +29,11 @@ from urllib.parse import urlencode from js import AbortController, setTimeout # type: ignore -from narada_core.actions.critic import run_critic from narada_core.actions.models import ( - DEFAULT_HITL_TIMEOUT_SECONDS, ActiveInputRequest, - AgenticMatchingSelectorsFinderRequest, - AgenticMatchingSelectorsFinderResponse, - AgenticMouseAction, - AgenticMouseActionRequest, - AgenticSelectorAction, - AgenticSelectorRequest, - AgenticSelectorResponse, - AgenticSelectors, - AgentResponse, - AgentUsage, CloseWindowRequest, - CriticResult, ExtensionActionRequest, ExtensionActionResponse, - GetFullHtmlRequest, - GetFullHtmlResponse, - GetScreenshotRequest, - GetScreenshotResponse, - GetSimplifiedHtmlRequest, - GetSimplifiedHtmlResponse, - GetUrlRequest, - GetUrlResponse, - GoToUrlRequest, - PrintMessageRequest, - PromptForUserInputRequest, - PromptForUserInputResponse, - PromptForUserInputVariable, - ReadExcelSheetRequest, - ReadExcelSheetResponse, - ReadGoogleSheetRequest, - ReadGoogleSheetResponse, - RecordedClick, - UserApprovalRequest, - UserApprovalResponse, - WaitForElementRequest, - WaitForElementResponse, - WriteExcelSheetRequest, - WriteGoogleSheetRequest, ) from narada_core.errors import ( NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, @@ -74,8 +42,7 @@ UserAbortedError, ) from narada_core.models import ( - Agent, - CriticConfig, + AgentKind, File, McpServer, ReasoningEffort, @@ -83,14 +50,16 @@ Response, UserResourceCredentials, _RemoteDispatchPollResponse, + _SdkConfig, ) -from narada_core.tracing.model import parse_action_trace +from packaging.version import Version from pydantic import BaseModel from pyodide.ffi import JsProxy, create_once_callable from pyodide.http import pyfetch from . import _trace from .retry import pyfetch_with_retries +from .version import __version__ # Magic variable injected by the JavaScript harness that stores the IDs of the current runnables # in the stack on the frontend. @@ -137,6 +106,29 @@ async def _narada_get_id_token() -> str: ... type InputRequiredCallback = Callable[[ActiveInputRequest], Awaitable[None] | None] +class _InputVariableFileReference(TypedDict): + source: Literal["remoteDispatchUpload"] + id: str + filename: str + mimeType: str + + +type _JsonPrimitive = str | int | float | bool | None +type _InputVariableValue = ( + _JsonPrimitive + | IOBase + | list["_InputVariableValue"] + | dict[str, "_InputVariableValue"] +) +type _NormalizedInputVariableValue = ( + _JsonPrimitive + | _InputVariableFileReference + | list["_NormalizedInputVariableValue"] + | dict[str, "_NormalizedInputVariableValue"] +) +type _NormalizedInputVariables = dict[str, _NormalizedInputVariableValue] + + async def _notify_input_required_callback( callback: InputRequiredCallback | None, response: _RemoteDispatchPollResponse, @@ -159,13 +151,13 @@ async def _notify_input_required_callback( await callback_result -def _trace_agent_type(agent: Agent | str) -> str: +def _trace_agent_type(agent: AgentKind | str) -> str: match agent: - case Agent.PRODUCTIVITY: + case AgentKind.PRODUCTIVITY: return "generalist" - case Agent.OPERATOR: + case AgentKind.OPERATOR: return "operator" - case Agent.CORE_AGENT: + case AgentKind.CORE_AGENT: return "coreAgent" case _: return str(agent) @@ -209,48 +201,121 @@ class SessionDownloadItem: download_url: str -class BaseBrowserWindow(ABC): +class Environment(ABC): _api_key: str | None _base_url: str _user_id: str | None _env: Literal["prod", "dev", None] - _browser_window_id: str + _initialized: bool + _init_lock: asyncio.Lock | None def __init__( self, *, - api_key: str | None, - base_url: str, - user_id: str | None, - env: Literal["prod", "dev", None] = "prod", - browser_window_id: str, + api_key: str | None = None, + base_url: str | None = None, + user_id: str | None = None, + env: Literal["prod", "dev", None] = None, ) -> None: + api_key = api_key or os.environ.get("NARADA_API_KEY") + user_id = user_id or os.environ.get("NARADA_USER_ID") + env = _normalize_narada_env(env or os.environ.get("NARADA_ENV")) if api_key is None and (user_id is None or env is None): raise ValueError( "Either `api_key` or all of `user_id`, `user_id_token`, and `env` must be provided" ) self._api_key = api_key - self._base_url = base_url + self._base_url = base_url or os.getenv( + "NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2" + ) self._user_id = user_id self._env = env - self._browser_window_id = browser_window_id - - @property - def browser_window_id(self) -> str: - return self._browser_window_id + self._initialized = False + self._init_lock = None @property def cloud_browser_session_id(self) -> str | None: - """Cloud browser session backing this window, if any. + """Cloud browser session backing this environment, if any. + + Remote dispatch includes this value so backend observability can link a client-mode run to + an existing SDK-owned cloud browser. Plain local environments are not cloud-backed and + return `None`; cloud-backed subclasses override this property with their session ID. + """ + return None + + async def start(self) -> None: + """Initializes the environment eagerly. - `dispatch_request` includes this value in remote-dispatch requests so backend - observability can link a client-mode run to an existing SDK-owned cloud browser. Plain - local windows are not cloud-backed and return `None`; cloud-backed subclasses override this - property with their session ID. + Initialization is also performed lazily by `Agent.run()` and browser actions. Reusing the + same environment instance reuses the initialized target. """ + await self._ensure_initialized() + + async def _ensure_initialized(self) -> None: + if self._initialized: + return + + if self._init_lock is None: + self._init_lock = asyncio.Lock() + + async with self._init_lock: + if self._initialized: + return + + if self._validates_sdk_config: + await self._validate_sdk_config() + await self._initialize() + self._initialized = True + + @property + def _validates_sdk_config(self) -> bool: + return True + + async def _initialize(self) -> None: + pass + + async def close(self, *, timeout: int | None = None) -> None: + await self._close_impl(timeout=timeout) + + async def _close_impl(self, *, timeout: int | None = None) -> None: + pass + + @property + def _dispatch_browser_window_id(self) -> str | None: return None + async def _fetch_sdk_config(self) -> _SdkConfig | None: + url = f"{self._base_url}/sdk/config" + headers = await self._get_auth_headers() + + try: + resp = await pyfetch(url, headers=headers) + if not resp.ok: + logging.warning( + "Failed to fetch SDK config: %s %s", resp.status, await resp.text() + ) + return None + + return _SdkConfig.model_validate(await resp.json()) + except Exception as e: + logging.warning("Failed to fetch SDK config: %s", e) + return None + + async def _validate_sdk_config(self) -> None: + config = await self._fetch_sdk_config() + if config is None: + return + + package_config = config.packages["narada-pyodide"] + current_version = Version(__version__) + min_required_version = Version(package_config.min_required_version) + if current_version < min_required_version: + raise RuntimeError( + f"narada-pyodide<={__version__} is not supported. Please reload the page to " + f"upgrade to version {package_config.min_required_version} or higher." + ) + def _current_parent_run_ids(self) -> list[str] | None: """Returns the runnable stack to forward with SDK requests. @@ -272,25 +337,79 @@ async def _get_auth_headers(self) -> dict[str, str]: env=self._env, ) - async def upload_file(self, *, file: IO) -> File: - """Uploads a file that can be used as an attachment in a subsequent `agent` request. - - The file is temporarily saved in Narada cloud and expires after 1 day. It can only be - accessed by the user who uploaded it. - """ + async def _upload_file_impl(self, *, file: IO[Any]) -> File: + # Uploading file contents is not supported in the browser: the Pyodide runtime has no + # access to the user's filesystem, so there is no reliable local file to upload. File + # input variables that already reference an uploaded object (e.g. `agentStudioAttachment` + # references) are plain dicts and pass through `_normalize_input_variables` unchanged + # without reaching this path. raise NotImplementedError( "Uploading files is not supported in the browser environment" ) + async def _normalize_input_variables( + self, *, input_variables: Mapping[str, Any] + ) -> _NormalizedInputVariables: + normalized: _NormalizedInputVariables = {} + for key, value in input_variables.items(): + normalized[key] = await self._normalize_input_variables_value_impl( + input_variable_value=value + ) + return normalized + + async def _normalize_input_variables_value_impl( + self, *, input_variable_value: Any + ) -> _NormalizedInputVariableValue: + if isinstance(input_variable_value, list): + return [ + await self._normalize_input_variables_value_impl( + input_variable_value=item + ) + for item in input_variable_value + ] + + if self._is_uploadable_file(input_variable_value): + return await self._upload_input_variable_file( + input_variable_value=input_variable_value + ) + + if isinstance(input_variable_value, dict): + normalized: dict[str, _NormalizedInputVariableValue] = {} + for key, value in input_variable_value.items(): + normalized[key] = await self._normalize_input_variables_value_impl( + input_variable_value=value + ) + return normalized + + return input_variable_value + + @staticmethod + def _is_uploadable_file(value: Any) -> TypeGuard[IO[Any]]: + # Keep runtime eligibility aligned with the existing file-upload transport. + return isinstance(value, IOBase) and hasattr(value, "name") + + async def _upload_input_variable_file( + self, *, input_variable_value: IO[Any] + ) -> _InputVariableFileReference: + filename = Path(input_variable_value.name).name + uploaded_file = await self._upload_file_impl(file=input_variable_value) + mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream" + return { + "source": "remoteDispatchUpload", + "id": uploaded_file["key"], + "filename": filename, + "mimeType": mime_type, + } + # `reasoning` is only valid with the Core Agent; these two overloads make # that constraint type-checkable. Generic-agent calls fall through to the # general overloads below, which do not accept a `reasoning` argument. @overload - async def dispatch_request( + async def _dispatch_request( self, *, prompt: str, - agent: Literal[Agent.CORE_AGENT], + agent: Literal[AgentKind.CORE_AGENT], reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, @@ -298,11 +417,12 @@ async def dispatch_request( previous_request_id: str | None = None, chat_history: list[RemoteDispatchChatHistoryItem] | None = None, additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, time_zone: str = "America/Los_Angeles", user_resource_credentials: UserResourceCredentials | None = None, mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, + input_variables: Mapping[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -311,11 +431,11 @@ async def dispatch_request( ) -> Response[None]: ... @overload - async def dispatch_request( + async def _dispatch_request( self, *, prompt: str, - agent: Literal[Agent.CORE_AGENT], + agent: Literal[AgentKind.CORE_AGENT], reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, @@ -323,11 +443,12 @@ async def dispatch_request( previous_request_id: str | None = None, chat_history: list[RemoteDispatchChatHistoryItem] | None = None, additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, time_zone: str = "America/Los_Angeles", user_resource_credentials: UserResourceCredentials | None = None, mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, + input_variables: Mapping[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, callback_headers: dict[str, Any] | None = None, @@ -336,22 +457,23 @@ async def dispatch_request( ) -> Response[_StructuredOutput]: ... @overload - async def dispatch_request( + async def _dispatch_request( self, *, prompt: str, - agent: Agent | str = Agent.OPERATOR, + agent: AgentKind | str = AgentKind.OPERATOR, clear_chat: bool | None = None, generate_gif: bool | None = None, output_schema: None = None, previous_request_id: str | None = None, chat_history: list[RemoteDispatchChatHistoryItem] | None = None, additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, time_zone: str = "America/Los_Angeles", user_resource_credentials: UserResourceCredentials | None = None, mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, + input_variables: Mapping[str, Any] | None = None, critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, @@ -361,22 +483,23 @@ async def dispatch_request( ) -> Response[None]: ... @overload - async def dispatch_request( + async def _dispatch_request( self, *, prompt: str, - agent: Agent | str = Agent.OPERATOR, + agent: AgentKind | str = AgentKind.OPERATOR, clear_chat: bool | None = None, generate_gif: bool | None = None, output_schema: type[_StructuredOutput], previous_request_id: str | None = None, chat_history: list[RemoteDispatchChatHistoryItem] | None = None, additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, time_zone: str = "America/Los_Angeles", user_resource_credentials: UserResourceCredentials | None = None, mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, + input_variables: Mapping[str, Any] | None = None, critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, @@ -385,11 +508,11 @@ async def dispatch_request( timeout: int = 1000, ) -> Response[_StructuredOutput]: ... - async def dispatch_request( + async def _dispatch_request( self, *, prompt: str, - agent: Agent | str = Agent.OPERATOR, + agent: AgentKind | str = AgentKind.OPERATOR, reasoning: ReasoningEffort | None = None, clear_chat: bool | None = None, generate_gif: bool | None = None, @@ -397,11 +520,12 @@ async def dispatch_request( previous_request_id: str | None = None, chat_history: list[RemoteDispatchChatHistoryItem] | None = None, additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, time_zone: str = "America/Los_Angeles", user_resource_credentials: UserResourceCredentials | None = None, mcp_servers: list[McpServer] | None = None, secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, + input_variables: Mapping[str, Any] | None = None, critic_context: dict[str, Any] | None = None, callback_url: str | None = None, callback_secret: str | None = None, @@ -411,14 +535,16 @@ async def dispatch_request( ) -> Response: """Low-level API for invoking an agent in the Narada extension side panel chat. - The higher-level `agent` method should be preferred for most use cases. + The higher-level `Agent.run` method should be preferred for most use cases. """ # The overloads enforce this at type-check time when callers use - # ``Agent.CORE_AGENT``; the runtime check covers string-form agents + # ``AgentKind.CORE_AGENT``; the runtime check covers string-form agents # (``agent="..."``) and callers without a type checker. - if reasoning is not None and agent is not Agent.CORE_AGENT: + await self._ensure_initialized() + + if reasoning is not None and agent is not AgentKind.CORE_AGENT: raise ValueError( - "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " + "`reasoning` is only supported with `agent=AgentKind.CORE_AGENT` " f"(got agent={agent!r})" ) # Trace instrumentation: the entire method body is wrapped so that any @@ -432,13 +558,15 @@ async def dispatch_request( headers = await self._get_auth_headers() agent_prefix = ( - agent.prompt_prefix() if isinstance(agent, Agent) else f"{agent} " + agent.prompt_prefix() if isinstance(agent, AgentKind) else f"{agent} " ) body: dict[str, Any] = { "prompt": agent_prefix + prompt, - "browserWindowId": self.browser_window_id, "timeZone": time_zone, } + browser_window_id = self._dispatch_browser_window_id + if browser_window_id is not None: + body["browserWindowId"] = browser_window_id parent_run_ids = self._current_parent_run_ids() if parent_run_ids: body["parentRunIds"] = parent_run_ids @@ -463,6 +591,11 @@ async def dispatch_request( body["chatHistory"] = chat_history if additional_context is not None: body["additionalContext"] = additional_context + if attachment is not None: + if hasattr(attachment, "read") and hasattr(attachment, "name"): + body["attachment"] = await self._upload_file_impl(file=attachment) + else: + body["attachment"] = attachment if user_resource_credentials is not None: body["userResourceCredentials"] = user_resource_credentials if mcp_servers is not None: @@ -472,7 +605,9 @@ async def dispatch_request( if secret_variables is not None: body["secretVariables"] = secret_variables if input_variables is not None: - body["inputVariables"] = input_variables + body["inputVariables"] = await self._normalize_input_variables( + input_variables=input_variables + ) if critic_context is not None: body["criticContext"] = critic_context if callback_url is not None: @@ -612,439 +747,10 @@ async def dispatch_request( ) raise - # `reasoning` is only valid with the Core Agent. See `dispatch_request` - # above for the rationale; the same overload pattern is mirrored here. - @overload - async def agent( - self, - *, - prompt: str, - agent: Literal[Agent.CORE_AGENT], - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> AgentResponse[dict[str, Any]]: ... - - @overload - async def agent( - self, - *, - prompt: str, - agent: Literal[Agent.CORE_AGENT], - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[_StructuredOutput], - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> AgentResponse[_StructuredOutput]: ... - - @overload - async def agent( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - critic: CriticConfig | None = None, - timeout: int = 1000, - ) -> AgentResponse[dict[str, Any]]: ... - - @overload - async def agent( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[_StructuredOutput], - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - critic: CriticConfig | None = None, - timeout: int = 1000, - ) -> AgentResponse[_StructuredOutput]: ... - - async def agent( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[BaseModel] | None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: dict[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - critic: CriticConfig | None = None, - timeout: int = 1000, - ) -> AgentResponse: - """Invokes an agent in the Narada extension side panel chat.""" - # Branch on `reasoning` so each call site binds a single, typed overload - # of `dispatch_request`. The validation also lives in `dispatch_request` - # itself (defense in depth + reachable when callers go straight to the - # low-level API), so the redundancy here is intentional. - if reasoning is None: - remote_dispatch_response = await self.dispatch_request( - prompt=prompt, - agent=agent, - clear_chat=clear_chat, - generate_gif=generate_gif, - output_schema=output_schema, - time_zone=time_zone, - mcp_servers=mcp_servers, - secret_variables=secret_variables, - input_variables=input_variables, - on_input_required=on_input_required, - timeout=timeout, - ) - else: - if agent is not Agent.CORE_AGENT: - raise ValueError( - "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " - f"(got agent={agent!r})" - ) - # The CORE_AGENT-specific overloads of `dispatch_request` split on - # a narrower `output_schema` discriminator (None vs `type[T]`), - # which the impl's `type[BaseModel] | None` union doesn't cleanly - # narrow into without further branching. The public `agent()` - # overloads above already give callers correct return-type - # narrowing, so the internal forward call bypasses overload - # disambiguation on this single dimension. - remote_dispatch_response = await self.dispatch_request( # pyright: ignore[reportCallIssue] - prompt=prompt, - agent=agent, - reasoning=reasoning, - clear_chat=clear_chat, - generate_gif=generate_gif, - output_schema=output_schema, # pyright: ignore[reportArgumentType] - time_zone=time_zone, - mcp_servers=mcp_servers, - secret_variables=secret_variables, - input_variables=input_variables, - on_input_required=on_input_required, - timeout=timeout, - ) - response_content = remote_dispatch_response["response"] - assert response_content is not None - - action_trace_raw = response_content.get("actionTrace") - action_trace = ( - parse_action_trace(action_trace_raw) - if action_trace_raw is not None - else None - ) - workflow_trace = response_content.get("workflowTrace") - parent_request_id = self._current_parent_request_id() - # Preserve the response contract for direct callers, but avoid adding a second - # child node when the backend will stitch the child request into the parent row. - if workflow_trace is not None and parent_request_id is None: - _trace.emit_sub_workflow(workflow_trace=workflow_trace) - - critic_result: CriticResult | None = None - if critic is not None: - critic_result = await run_critic( - dispatch_request=self.dispatch_request, - original_prompt=prompt, - response_content=response_content, - action_trace_raw=action_trace_raw, - critic=critic, - time_zone=time_zone, - timeout=timeout, - ) - - return AgentResponse( - request_id=remote_dispatch_response["requestId"], - status=remote_dispatch_response["status"], - text=response_content["text"], - output=response_content.get("output"), - structured_output=response_content.get("structuredOutput"), - usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), - action_trace=action_trace, - workflow_trace=workflow_trace, - critic_result=critic_result, - ) - - async def agentic_selector( - self, - *, - action: AgenticSelectorAction, - selectors: AgenticSelectors, - fallback_operator_query: str, - # Larger default timeout because Operator can take a bit to run. - timeout: int | None = 300, - ) -> AgenticSelectorResponse: - """Performs an action on an element specified by the given selectors, falling back to using - the Operator agent if the selectors fail to match a unique element. - - Returns AgenticSelectorResponse with the value for 'get_text' and 'get_property' actions, - otherwise returns None. - """ - response_model = ( - AgenticSelectorResponse - if action["type"] in {"get_text", "get_property"} - else None - ) - - result = await self._run_extension_action( - AgenticSelectorRequest( - action=action, - selectors=selectors, - fallback_operator_query=fallback_operator_query, - ), - response_model, - timeout=timeout, - ) - - if result is None: - return AgenticSelectorResponse(value=None) - - return result - - async def agentic_matching_selectors_finder( - self, - *, - prompt: str, - timeout: int | None = 300, - ) -> list[AgenticSelectors]: - """Finds all visible targets matching a prompt and returns selectors.""" - result = await self._run_extension_action( - AgenticMatchingSelectorsFinderRequest(prompt=prompt), - AgenticMatchingSelectorsFinderResponse, - timeout=timeout, - ) - return result.selectors - - async def agentic_mouse_action( - self, - *, - action: AgenticMouseAction, - recorded_click: RecordedClick, - resize_window: Optional[bool] = True, - fallback_operator_query: str, - timeout: int | None = 60, - ) -> None: - """Performs a mouse action at the specified click coordinates, falling back to using - the Operator agent if the click fails. - """ - return await self._run_extension_action( - AgenticMouseActionRequest( - action=action, - recorded_click=recorded_click, - resize_window=resize_window or True, - fallback_operator_query=fallback_operator_query, - ), - timeout=timeout, - ) - - async def close(self, *, timeout: int | None = None) -> None: + async def _close_browser_window(self, *, timeout: int | None = None) -> None: """Gracefully closes the current browser window.""" return await self._run_extension_action(CloseWindowRequest(), timeout=timeout) - async def go_to_url( - self, *, url: str, new_tab: bool = False, timeout: int | None = None - ) -> None: - """Navigates the active page in this window to the given URL.""" - return await self._run_extension_action( - GoToUrlRequest(url=url, new_tab=new_tab), timeout=timeout - ) - - async def wait_for_element( - self, - *, - selectors: AgenticSelectors, - state: Literal["visible", "hidden"], - timeout: int, - ) -> bool: - """Waits for an element matching the given selectors to reach the specified state. - - Returns True if the element was found, False if no selector matched before timeout. - """ - result = await self._run_extension_action( - WaitForElementRequest(selectors=selectors, state=state, timeout=timeout), - WaitForElementResponse, - timeout=timeout // 1000 + 30, - ) - if result is None: - return False - return result.found - - async def get_url(self, *, timeout: int | None = None) -> str: - """Gets the URL of the current active page.""" - result = await self._run_extension_action( - GetUrlRequest(), - GetUrlResponse, - timeout=timeout, - ) - return result.url - - async def print_message(self, *, message: str, timeout: int | None = None) -> None: - """Prints a message in the Narada extension side panel chat.""" - return await self._run_extension_action( - PrintMessageRequest(message=message), timeout=timeout - ) - - async def prompt_for_user_input( - self, - *, - step_id: str, - variables: list[PromptForUserInputVariable], - prompt_message: str | None = None, - timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, - ) -> dict[str, Any]: - """Prompts the user for one or more input values in the extension UI.""" - result = await self._run_extension_action( - PromptForUserInputRequest( - step_id=step_id, prompt_message=prompt_message, variables=variables - ), - PromptForUserInputResponse, - timeout=timeout, - ) - return result.values_by_name - - async def user_approval( - self, - *, - step_id: str, - prompt_message: str, - approve_label: str, - reject_label: str, - timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, - ) -> bool: - """Prompts the user to approve or reject in the extension UI.""" - result = await self._run_extension_action( - UserApprovalRequest( - step_id=step_id, - prompt_message=prompt_message, - approve_label=approve_label, - reject_label=reject_label, - ), - UserApprovalResponse, - timeout=timeout, - ) - return result.approved - - async def read_google_sheet( - self, - *, - spreadsheet_id: str, - range: str, - timeout: int | None = None, - ) -> ReadGoogleSheetResponse: - """Reads a range of cells from a Google Sheet.""" - return await self._run_extension_action( - ReadGoogleSheetRequest(spreadsheet_id=spreadsheet_id, range=range), - ReadGoogleSheetResponse, - timeout=timeout, - ) - - async def read_excel_sheet( - self, - *, - workbook_url: str, - range: str, - microsoft_account_email: str, - timeout: int | None = None, - ) -> ReadExcelSheetResponse: - """Reads a range of cells from a Microsoft Excel workbook.""" - return await self._run_extension_action( - ReadExcelSheetRequest( - workbook_url=workbook_url, - range=range, - microsoft_account_email=microsoft_account_email, - ), - ReadExcelSheetResponse, - timeout=timeout, - ) - - async def write_google_sheet( - self, - *, - spreadsheet_id: str, - range: str, - values: list[list[str]], - timeout: int | None = None, - ) -> None: - """Writes a range of cells to a Google Sheet.""" - return await self._run_extension_action( - WriteGoogleSheetRequest( - spreadsheet_id=spreadsheet_id, range=range, values=values - ), - timeout=timeout, - ) - - async def write_excel_sheet( - self, - *, - workbook_url: str, - range: str, - microsoft_account_email: str, - values: list[list[str]], - timeout: int | None = None, - ) -> None: - """Writes a range of cells to a Microsoft Excel workbook.""" - return await self._run_extension_action( - WriteExcelSheetRequest( - workbook_url=workbook_url, - range=range, - microsoft_account_email=microsoft_account_email, - values=values, - ), - timeout=timeout, - ) - - async def get_full_html(self, *, timeout: int | None = None) -> GetFullHtmlResponse: - """Gets the full HTML content of the current page.""" - return await self._run_extension_action( - GetFullHtmlRequest(), - GetFullHtmlResponse, - timeout=timeout, - ) - - async def get_simplified_html( - self, *, timeout: int | None = None - ) -> GetSimplifiedHtmlResponse: - """Gets the simplified HTML content of the current page.""" - return await self._run_extension_action( - GetSimplifiedHtmlRequest(), - GetSimplifiedHtmlResponse, - timeout=timeout, - ) - - async def get_screenshot( - self, *, timeout: int | None = None - ) -> GetScreenshotResponse: - """Takes a screenshot of the current browser window.""" - return await self._run_extension_action( - GetScreenshotRequest(), - GetScreenshotResponse, - timeout=timeout, - ) - @overload async def _run_extension_action( self, @@ -1070,6 +776,13 @@ async def _run_extension_action( *, timeout: int | None = None, ) -> _ResponseModel | None: + await self._ensure_initialized() + browser_window_id = self._dispatch_browser_window_id + if browser_window_id is None: + raise NaradaError( + f"{type(self).__name__} does not support browser extension actions" + ) + # Trace instrumentation: every exit path emits an ``extensionAction`` # trace event with a status matching the outcome. See `_trace.py`. trace_start_ms = _trace.now_ms() @@ -1079,7 +792,7 @@ async def _run_extension_action( body = { "action": request.model_dump(), - "browserWindowId": self.browser_window_id, + "browserWindowId": browser_window_id, } remote_dispatch_request_id = os.environ.get( _REMOTE_DISPATCH_REQUEST_ID_ENV_VAR @@ -1166,25 +879,70 @@ async def _run_extension_action( raise -class LocalBrowserWindow(BaseBrowserWindow): - def __init__(self) -> None: +class BaseBrowserEnvironment(Environment): + _browser_window_id: str | None + + def __init__( + self, + *, + api_key: str | None = None, + user_id: str | None = None, + env: Literal["prod", "dev", None] = None, + browser_window_id: str | None = None, + initialized: bool = False, + ) -> None: + super().__init__( + api_key=api_key, + user_id=user_id, + env=env, + ) + self._browser_window_id = browser_window_id + self._initialized = initialized + + @property + def browser_window_id(self) -> str: + if self._browser_window_id is None: + raise RuntimeError( + "Browser environment is not initialized yet. Call `await env.start()` " + "or run an agent action first." + ) + return self._browser_window_id + + @property + def _dispatch_browser_window_id(self) -> str | None: + return self.browser_window_id + + +class BrowserEnvironment(BaseBrowserEnvironment): + def __init__( + self, + *, + api_key: str | None = None, + user_id: str | None = None, + env: Literal["prod", "dev", None] = None, + browser_window_id: str | None = None, + ) -> None: super().__init__( - api_key=os.environ.get("NARADA_API_KEY"), - base_url=os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2"), - user_id=os.environ.get("NARADA_USER_ID"), - env=_normalize_narada_env(os.environ.get("NARADA_ENV")), - browser_window_id=os.environ["NARADA_BROWSER_WINDOW_ID"], + api_key=api_key, + user_id=user_id, + env=env, + browser_window_id=browser_window_id + or os.environ["NARADA_BROWSER_WINDOW_ID"], ) def __str__(self) -> str: - return f"LocalBrowserWindow(browser_window_id={self.browser_window_id})" + return f"BrowserEnvironment(browser_window_id={self.browser_window_id})" @override def _current_parent_run_ids(self) -> list[str] | None: return _parent_run_ids() + @override + async def _close_impl(self, *, timeout: int | None = None) -> None: + await self._close_browser_window(timeout=timeout) + -class RemoteBrowserWindow(BaseBrowserWindow): +class RemoteBrowserEnvironment(BaseBrowserEnvironment): def __init__( self, *, @@ -1195,22 +953,27 @@ def __init__( env: Literal["prod", "dev", None] = None, ) -> None: super().__init__( - api_key=api_key or os.environ.get("NARADA_API_KEY"), - base_url=os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2"), - user_id=user_id or os.environ.get("NARADA_USER_ID"), - env=_normalize_narada_env(env or os.environ.get("NARADA_ENV")), + api_key=api_key, + user_id=user_id, + env=env, browser_window_id=browser_window_id, + initialized=True, ) self._cloud_browser_session_id = cloud_browser_session_id + @property + def _validates_sdk_config(self) -> bool: + return False + @property def cloud_browser_session_id(self) -> str | None: return self._cloud_browser_session_id - async def close(self, *, timeout: int | None = None) -> None: - """Closes the browser window or stops the backing cloud session.""" + @override + async def _close_impl(self, *, timeout: int | None = None) -> None: + """Closes the browser environment or stops the backing cloud session.""" if self._cloud_browser_session_id is None: - return await super().close(timeout=timeout) + return await self._close_browser_window(timeout=timeout) await _stop_cloud_browser_session( base_url=self._base_url, @@ -1233,58 +996,195 @@ async def get_downloaded_files(self) -> list[SessionDownloadItem]: ) def __str__(self) -> str: - return f"RemoteBrowserWindow(browser_window_id={self.browser_window_id})" + return f"RemoteBrowserEnvironment(browser_window_id={self.browser_window_id})" -class CloudBrowserWindow(BaseBrowserWindow): +class CloudBrowserEnvironment(BaseBrowserEnvironment): def __init__( self, *, - browser_window_id: str, - session_id: str, + session_name: str | None = None, + session_timeout: int | None = None, api_key: str | None = None, user_id: str | None = None, env: Literal["prod", "dev", None] = None, ) -> None: super().__init__( - api_key=api_key or os.environ.get("NARADA_API_KEY"), - base_url=os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2"), - user_id=user_id or os.environ.get("NARADA_USER_ID"), - env=_normalize_narada_env(env or os.environ.get("NARADA_ENV")), - browser_window_id=browser_window_id, + api_key=api_key, + user_id=user_id, + env=env, ) - self._session_id = session_id + self._session_name = session_name + self._session_timeout = session_timeout + self._session_id: str | None = None @property def cloud_browser_session_id(self) -> str: + if self._session_id is None: + raise RuntimeError( + "Cloud browser environment is not initialized yet. Call `await env.start()` " + "or run an agent action first." + ) return self._session_id - async def close(self, *, timeout: int | None = None) -> None: - """Stops the cloud browser session.""" - await _stop_cloud_browser_session( + async def _initialize(self) -> None: + response_data = await _create_and_initialize_cloud_browser_session( base_url=self._base_url, auth_headers=await self._get_auth_headers(), - session_id=self._session_id, - timeout=timeout, + session_name=self._session_name, + session_timeout=self._session_timeout, + require_extension=True, ) + self._browser_window_id = response_data["browser_window_id"] + self._session_id = response_data["session_id"] + + @override + async def _close_impl(self, *, timeout: int | None = None) -> None: + """Stops the cloud browser session.""" + if self._session_id is not None: + await _stop_cloud_browser_session( + base_url=self._base_url, + auth_headers=await self._get_auth_headers(), + session_id=self._session_id, + timeout=timeout, + ) async def get_downloaded_files(self) -> list[SessionDownloadItem]: """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" return await _get_cloud_browser_downloads( base_url=self._base_url, auth_headers=await self._get_auth_headers(), - session_id=self._session_id, + session_id=self.cloud_browser_session_id, ) def __str__(self) -> str: return ( - "CloudBrowserWindow(" + "CloudBrowserEnvironment(" f"cloud_browser_session_id={self._session_id}, " f"browser_window_id={self.browser_window_id}" ")" ) +class LambdaEnvironment(Environment): + """Cloud execution environment without browser actions.""" + + def __init__( + self, + *, + session_name: str | None = None, + session_timeout: int | None = None, + api_key: str | None = None, + user_id: str | None = None, + env: Literal["prod", "dev", None] = None, + ) -> None: + super().__init__(api_key=api_key, user_id=user_id, env=env) + self._session_name = session_name + self._session_timeout = session_timeout + self._session_id: str | None = None + self._browser_window_id: str | None = None + + @property + def session_id(self) -> str: + if self._session_id is None: + raise RuntimeError( + "Lambda environment is not initialized yet. Call `await env.start()` " + "or run an agent first." + ) + return self._session_id + + @property + def cloud_browser_session_id(self) -> str | None: + return self._session_id + + @property + def _dispatch_browser_window_id(self) -> str | None: + return self._browser_window_id + + async def _initialize(self) -> None: + response_data = await _create_and_initialize_cloud_browser_session( + base_url=self._base_url, + auth_headers=await self._get_auth_headers(), + session_name=self._session_name, + session_timeout=self._session_timeout, + require_extension=False, + ) + self._browser_window_id = response_data["browser_window_id"] + self._session_id = response_data["session_id"] + + async def _close_impl(self, *, timeout: int | None = None) -> None: + if self._session_id is not None: + await _stop_cloud_browser_session( + base_url=self._base_url, + auth_headers=await self._get_auth_headers(), + session_id=self._session_id, + timeout=timeout, + ) + + async def get_downloaded_files(self) -> list[SessionDownloadItem]: + """Return files downloaded during this lambda session (file name, size, presigned GET URL per file).""" + return await _get_cloud_browser_downloads( + base_url=self._base_url, + auth_headers=await self._get_auth_headers(), + session_id=self.session_id, + ) + + +async def _create_and_initialize_cloud_browser_session( + *, + base_url: str, + auth_headers: dict[str, str], + session_name: str | None, + session_timeout: int | None, + require_extension: bool, +) -> dict[str, Any]: + endpoint_url = ( + f"{base_url}/cloud-browser/create-and-initialize-cloud-browser-session" + ) + request_body: dict[str, Any] = { + "session_name": session_name, + "session_timeout": session_timeout, + "require_extension": require_extension, + } + initiator_remote_dispatch_request_id = os.environ.get( + "NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", "" + ).strip() + if not initiator_remote_dispatch_request_id: + raise ValueError("NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID is required") + request_body["initiator_remote_dispatch_request_id"] = ( + initiator_remote_dispatch_request_id + ) + + response = None + max_attempts = 3 + retry_backoff_seconds = (2.0, 4.0, 0.0) # no wait after last attempt + for attempt in range(max_attempts): + # Due to unknown network issues, sometimes create-and-initialize-cloud-browser-session API call fails. + try: + response = await pyfetch( + endpoint_url, + method="POST", + headers=auth_headers, + body=json.dumps(request_body), + ) + if response.ok: + break + except Exception: + await asyncio.sleep(retry_backoff_seconds[attempt]) + continue + + if response is None or not response.ok: + resp_status = response.status if response is not None else "unknown status" + resp_text = await response.text() if response is not None else "unknown error" + raise RuntimeError( + "Failed to create and initialize cloud browser session after 3 attempts with backoff: " + f"{resp_status}: {resp_text}\n" + f"Endpoint URL: {endpoint_url}" + ) + + return await response.json() + + def _build_cloud_browser_url( base_url: str, path: str, *, params: dict[str, str] | None = None ) -> str: diff --git a/packages/narada-pyodide/tests/test_cloud_browser.py b/packages/narada-pyodide/tests/test_cloud_browser.py index 6da879c..72d9d07 100644 --- a/packages/narada-pyodide/tests/test_cloud_browser.py +++ b/packages/narada-pyodide/tests/test_cloud_browser.py @@ -8,6 +8,10 @@ from unittest.mock import AsyncMock import pytest +from narada_core.actions.models import ( + DEFAULT_HITL_TIMEOUT_SECONDS, + PromptForUserInputVariable, +) from packaging.version import InvalidVersion PROJECT_ROOT = Path(__file__).resolve().parents[3] @@ -44,6 +48,12 @@ def to_py(self) -> object: return self._value +def _sdk_config_response() -> _FakeResponse: + return _FakeResponse( + json_data={"packages": {"narada-pyodide": {"min_required_version": "0.0.1"}}} + ) + + def _clear_modules() -> None: for name in list(sys.modules): if name == "narada" or name.startswith("narada."): @@ -83,110 +93,116 @@ def new() -> SimpleNamespace: import importlib narada_pkg = importlib.import_module("narada") - client_module = importlib.import_module("narada.client") - window_module = importlib.import_module("narada.window") - window_module._narada_parent_run_ids = _FakeJsProxy([]) - window_module._narada_request_id = None + env_module = importlib.import_module("narada.environment") + env_module._narada_parent_run_ids = _FakeJsProxy([]) + env_module._narada_request_id = None monkeypatch.setattr(builtins, "_narada_request_id", None, raising=False) - window_module._narada_get_id_token = AsyncMock(return_value="frontend-id-token") - return narada_pkg, client_module, window_module + env_module._narada_get_id_token = AsyncMock(return_value="frontend-id-token") + return narada_pkg, env_module @pytest.mark.asyncio -async def test_open_and_initialize_cloud_browser_window_maps_response( +async def test_cloud_browser_environment_maps_backend_response( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv( "NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", "request-maps-123" ) pyfetch = AsyncMock( - return_value=_FakeResponse( - json_data={ - "session_id": "session-123", - "session_name": "demo", - "browser_window_id": "browser-window-123", - } - ) + side_effect=[ + _sdk_config_response(), + _FakeResponse( + json_data={ + "session_id": "session-123", + "session_name": "demo", + "browser_window_id": "browser-window-123", + } + ), + ] ) - narada_pkg, _, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - client = narada_pkg.Narada(api_key="test-api-key") - window = await client.open_and_initialize_cloud_browser_window( + env = narada_pkg.CloudBrowserEnvironment( + api_key="test-api-key", session_name="demo", session_timeout=321, - require_extension=False, ) + await env.start() - assert isinstance(window, narada_pkg.CloudBrowserWindow) - assert window.browser_window_id == "browser-window-123" - assert window.cloud_browser_session_id == "session-123" + assert env.browser_window_id == "browser-window-123" + assert env.cloud_browser_session_id == "session-123" - call = pyfetch.await_args - assert call is not None - assert call.args[0].endswith( + create_call = pyfetch.await_args_list[1] + assert create_call.args[0].endswith( "/cloud-browser/create-and-initialize-cloud-browser-session" ) - assert call.kwargs["method"] == "POST" - assert call.kwargs["headers"] == { + assert create_call.kwargs["method"] == "POST" + assert create_call.kwargs["headers"] == { "Content-Type": "application/json", "x-api-key": "test-api-key", } - assert json.loads(call.kwargs["body"]) == { + assert json.loads(create_call.kwargs["body"]) == { "session_name": "demo", "session_timeout": 321, - "require_extension": False, + "require_extension": True, "initiator_remote_dispatch_request_id": "request-maps-123", } @pytest.mark.asyncio -async def test_open_and_initialize_cloud_browser_window_requires_initiator_env( +async def test_lambda_environment_uses_extensionless_initialization( monkeypatch: pytest.MonkeyPatch, ) -> None: - monkeypatch.delenv("NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", raising=False) - pyfetch = AsyncMock() - narada_pkg, _, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + monkeypatch.setenv( + "NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", "request-lambda-123" + ) + pyfetch = AsyncMock( + side_effect=[ + _sdk_config_response(), + _FakeResponse( + json_data={ + "session_id": "session-123", + "browser_window_id": "browser-window-123", + } + ), + ] + ) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - client = narada_pkg.Narada(api_key="test-api-key") - with pytest.raises(ValueError, match="NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID"): - await client.open_and_initialize_cloud_browser_window() + env = narada_pkg.LambdaEnvironment( + api_key="test-api-key", + session_name="lambda-session", + session_timeout=300, + ) + await env.start() - pyfetch.assert_not_awaited() + assert env.session_id == "session-123" + create_call = pyfetch.await_args_list[1] + assert json.loads(create_call.kwargs["body"]) == { + "session_name": "lambda-session", + "session_timeout": 300, + "require_extension": False, + "initiator_remote_dispatch_request_id": "request-lambda-123", + } @pytest.mark.asyncio -async def test_open_and_initialize_cloud_browser_window_includes_initiator_request( +async def test_cloud_browser_environment_requires_initiator_env( monkeypatch: pytest.MonkeyPatch, ) -> None: - monkeypatch.setenv( - "NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", " request-local-123 " - ) - pyfetch = AsyncMock( - return_value=_FakeResponse( - json_data={ - "session_id": "session-123", - "session_name": "demo", - "browser_window_id": "browser-window-123", - } - ) - ) - narada_pkg, _, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + monkeypatch.delenv("NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID", raising=False) + pyfetch = AsyncMock(side_effect=[_sdk_config_response()]) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - client = narada_pkg.Narada(api_key="test-api-key") - await client.open_and_initialize_cloud_browser_window() + env = narada_pkg.CloudBrowserEnvironment(api_key="test-api-key") + with pytest.raises(ValueError, match="NARADA_INITIATOR_REMOTE_DISPATCH_REQUEST_ID"): + await env.start() - call = pyfetch.await_args - assert call is not None - assert json.loads(call.kwargs["body"]) == { - "session_name": None, - "session_timeout": None, - "require_extension": True, - "initiator_remote_dispatch_request_id": "request-local-123", - } + assert pyfetch.await_count == 1 @pytest.mark.asyncio -async def test_open_and_initialize_cloud_browser_window_supports_frontend_bearer_auth( +async def test_cloud_browser_environment_supports_frontend_bearer_auth( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.delenv("NARADA_API_KEY", raising=False) @@ -198,11 +214,7 @@ async def test_open_and_initialize_cloud_browser_window_supports_frontend_bearer pyfetch = AsyncMock( side_effect=[ - _FakeResponse( - json_data={ - "packages": {"narada-pyodide": {"min_required_version": "0.0.1"}} - } - ), + _sdk_config_response(), _FakeResponse( json_data={ "session_id": "session-456", @@ -213,50 +225,23 @@ async def test_open_and_initialize_cloud_browser_window_supports_frontend_bearer _FakeResponse(json_data={"success": True}), ] ) - narada_pkg, _, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - async with narada_pkg.Narada() as client: - window = await client.open_and_initialize_cloud_browser_window( - session_name="demo", - session_timeout=321, - require_extension=True, - ) - - assert isinstance(window, narada_pkg.CloudBrowserWindow) - assert window.browser_window_id == "browser-window-456" - assert window.cloud_browser_session_id == "session-456" - - sdk_config_call, create_call = pyfetch.await_args_list - assert sdk_config_call.args[0].endswith("/sdk/config") - assert sdk_config_call.kwargs["headers"] == { - "Content-Type": "application/json", - "Authorization": "Bearer frontend-id-token", - "X-Narada-User-ID": "user-123", - "X-Narada-Env": "dev", - } - assert create_call.args[0].endswith( - "/cloud-browser/create-and-initialize-cloud-browser-session" - ) - assert create_call.kwargs["headers"] == { - "Content-Type": "application/json", - "Authorization": "Bearer frontend-id-token", - "X-Narada-User-ID": "user-123", - "X-Narada-Env": "dev", - } + env = narada_pkg.CloudBrowserEnvironment(session_name="demo", session_timeout=321) + await env.start() + await env.close() - await window.close() - stop_call = pyfetch.await_args_list[-1] - assert stop_call.args[0].endswith("/cloud-browser/stop-cloud-browser-session") - assert stop_call.kwargs["headers"] == { - "Content-Type": "application/json", - "Authorization": "Bearer frontend-id-token", - "X-Narada-User-ID": "user-123", - "X-Narada-Env": "dev", - } + for call in pyfetch.await_args_list: + assert call.kwargs["headers"] == { + "Content-Type": "application/json", + "Authorization": "Bearer frontend-id-token", + "X-Narada-User-ID": "user-123", + "X-Narada-Env": "dev", + } @pytest.mark.asyncio -async def test_open_and_initialize_cloud_browser_window_raises_when_version_is_unknown( +async def test_cloud_browser_environment_raises_when_version_is_unknown( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -266,44 +251,40 @@ async def test_open_and_initialize_cloud_browser_window_raises_when_version_is_u } ) ) - narada_pkg, client_module, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - monkeypatch.setattr(client_module, "__version__", "unknown") + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + monkeypatch.setattr(env_module, "__version__", "unknown") + env = narada_pkg.CloudBrowserEnvironment(api_key="test-api-key") with pytest.raises(InvalidVersion) as exc_info: - async with narada_pkg.Narada(api_key="test-api-key"): - pass + await env.start() assert "Invalid version: 'unknown'" in str(exc_info.value) assert pyfetch.await_count == 1 @pytest.mark.asyncio -async def test_cloud_browser_window_close_stops_cloud_session( +async def test_remote_browser_environment_close_stops_cloud_session( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock(return_value=_FakeResponse(json_data={"success": True})) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - await window.close() + await env.close() call = pyfetch.await_args assert call is not None assert call.args[0].endswith("/cloud-browser/stop-cloud-browser-session") assert call.kwargs["method"] == "POST" - assert call.kwargs["headers"] == { - "x-api-key": "test-api-key", - "Content-Type": "application/json", - } assert json.loads(call.kwargs["body"]) == {"session_id": "session-123"} @pytest.mark.asyncio -async def test_cloud_browser_window_dispatch_request_omits_parent_run_ids( +async def test_remote_browser_environment_dispatch_omits_parent_run_ids( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -319,21 +300,18 @@ async def test_cloud_browser_window_dispatch_request_omits_parent_run_ids( ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + env_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request(prompt="hello from cloud browser") + response = await env._dispatch_request(prompt="hello from cloud browser") assert response["status"] == "success" - post_call = pyfetch.await_args_list[0] - assert post_call.args[0].endswith("/remote-dispatch") - assert post_call.kwargs["method"] == "POST" - payload = json.loads(post_call.kwargs["body"]) + payload = json.loads(pyfetch.await_args_list[0].kwargs["body"]) assert payload["browserWindowId"] == "browser-window-123" assert payload["cloudBrowserSessionId"] == "session-123" assert payload["prompt"] == "/Operator hello from cloud browser" @@ -341,7 +319,7 @@ async def test_cloud_browser_window_dispatch_request_omits_parent_run_ids( @pytest.mark.asyncio -async def test_cloud_browser_window_dispatch_request_waits_through_active_input_required( +async def test_dispatch_request_waits_through_active_input_required( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -399,17 +377,17 @@ async def test_cloud_browser_window_dispatch_request_waits_through_active_input_ ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) sleep = AsyncMock() on_input_required = AsyncMock() - monkeypatch.setattr(window_module.asyncio, "sleep", sleep) + monkeypatch.setattr(env_module.asyncio, "sleep", sleep) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request( + response = await env._dispatch_request( prompt="hello from cloud browser", on_input_required=on_input_required, ) @@ -421,14 +399,10 @@ async def test_cloud_browser_window_dispatch_request_waits_through_active_input_ active_input_request = on_input_required.await_args.args[0] assert active_input_request.input_id == "input-123" assert active_input_request.action.name == "prompt_for_user_input" - first_poll_call = pyfetch.await_args_list[1] - second_poll_call = pyfetch.await_args_list[3] - assert first_poll_call.args[0].endswith("/remote-dispatch/responses/req-123") - assert second_poll_call.args[0].endswith("/remote-dispatch/responses/req-123") @pytest.mark.asyncio -async def test_cloud_browser_window_dispatch_request_keeps_parent_request_id( +async def test_agent_run_keeps_parent_request_id_from_injected_builtins( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -437,35 +411,39 @@ async def test_cloud_browser_window_dispatch_request_keeps_parent_request_id( _FakeResponse( json_data={ "status": "success", + "response": { + "text": "done", + "output": {"type": "text", "content": "done"}, + }, "completedAt": "2026-05-08T00:00:00+00:00", - "response": None, + "usage": {"actions": 0, "credits": 0}, "activeInputRequest": None, } ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + env_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) monkeypatch.setattr( builtins, "_narada_request_id", "parent-request-123", raising=False ) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request(prompt="hello from cloud browser") + response = await narada_pkg.Agent(environment=env, kind="/$USER/gui-child").run( + "run gui child" + ) - assert response["status"] == "success" - post_call = pyfetch.await_args_list[0] - payload = json.loads(post_call.kwargs["body"]) + assert response.status == "success" + payload = json.loads(pyfetch.await_args_list[0].kwargs["body"]) assert payload["parentRequestId"] == "parent-request-123" - assert "parentRunIds" not in payload @pytest.mark.asyncio -async def test_window_agent_keeps_parent_request_id_from_injected_builtins( +async def test_agent_run_forwards_clear_chat( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -485,27 +463,21 @@ async def test_window_agent_keeps_parent_request_id_from_injected_builtins( ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) - monkeypatch.setattr( - builtins, "_narada_request_id", "parent-request-123", raising=False - ) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.agent(prompt="run gui child", agent="/$USER/gui-child") + await narada_pkg.Agent(environment=env).run("fresh task", clear_chat=True) - assert response.status == "success" - post_call = pyfetch.await_args_list[0] - payload = json.loads(post_call.kwargs["body"]) - assert payload["parentRequestId"] == "parent-request-123" + payload = json.loads(pyfetch.await_args_list[0].kwargs["body"]) + assert payload["clearChat"] is True @pytest.mark.asyncio -async def test_window_agent_exposes_workflow_trace_alias( +async def test_agent_run_exposes_workflow_trace_alias( monkeypatch: pytest.MonkeyPatch, ) -> None: workflow_trace = {"step_type": "workflow", "children": []} @@ -527,7 +499,7 @@ async def test_window_agent_exposes_workflow_trace_alias( ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) emitted_events: list[str] = [] monkeypatch.setattr( sys.modules["narada._trace"], @@ -536,12 +508,12 @@ async def test_window_agent_exposes_workflow_trace_alias( raising=False, ) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.agent(prompt="return a trace") + response = await narada_pkg.Agent(environment=env).run("return a trace") assert response.workflow_trace == workflow_trace assert response.model_dump(by_alias=True)["workflowTrace"] == workflow_trace @@ -556,7 +528,7 @@ async def test_window_agent_exposes_workflow_trace_alias( @pytest.mark.asyncio -async def test_cloud_browser_window_dispatch_request_retries_poll_fetch_failures( +async def test_dispatch_request_retries_poll_fetch_failures( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -574,7 +546,7 @@ async def test_cloud_browser_window_dispatch_request_retries_poll_fetch_failures ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) sleep_delays: list[float] = [] async def fake_sleep(delay: float) -> None: @@ -583,12 +555,12 @@ async def fake_sleep(delay: float) -> None: retry_module = sys.modules["narada.retry"] monkeypatch.setattr(retry_module.asyncio, "sleep", fake_sleep) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request(prompt="hello from cloud browser") + response = await env._dispatch_request(prompt="hello from cloud browser") assert response["status"] == "success" assert pyfetch.await_count == 4 @@ -636,7 +608,7 @@ async def test_dispatch_request_emits_string_trace_agent_type_for_sdk_enum( ), ] ) - narada_pkg, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) emitted_events: list[str] = [] monkeypatch.setattr( sys.modules["narada._trace"], @@ -645,14 +617,14 @@ async def test_dispatch_request_emits_string_trace_agent_type_for_sdk_enum( raising=False, ) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request( + response = await env._dispatch_request( prompt="hello from cloud browser", - agent=narada_pkg.Agent.OPERATOR, + agent=narada_pkg.AgentKind.OPERATOR, ) assert response["status"] == "success" @@ -664,7 +636,7 @@ async def test_dispatch_request_emits_string_trace_agent_type_for_sdk_enum( @pytest.mark.asyncio -async def test_dispatch_request_emits_success_text_in_sub_agent_trace( +async def test_dispatch_request_preserves_current_file_variable_shape( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -674,166 +646,116 @@ async def test_dispatch_request_emits_success_text_in_sub_agent_trace( json_data={ "status": "success", "completedAt": "2026-05-08T00:00:00+00:00", - "response": { - "text": "TRACE_CORE_AGENT_DONE", - "actionTrace": [], - }, + "response": None, "activeInputRequest": None, } ), ] ) - narada_pkg, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - emitted_events: list[str] = [] - monkeypatch.setattr( - sys.modules["narada._trace"], - "_narada_emit_trace_event", - emitted_events.append, - raising=False, - ) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request( - prompt="reply with marker", - agent=narada_pkg.Agent.CORE_AGENT, - ) + file_variable = { + "source": "agentStudioAttachment", + "id": "file-123", + "filename": "report.pdf", + "mimeType": "application/pdf", + "itemId": "workflow-123", + } - from narada_core.tracing.model import PythonSubAgentCallEvent + response = await env._dispatch_request( + prompt="summarize {{ $doc }}", + input_variables={"doc": file_variable}, + ) assert response["status"] == "success" - assert len(emitted_events) == 1 - event = json.loads(emitted_events[0]) - parsed_event = PythonSubAgentCallEvent.model_validate(event) - assert parsed_event.agent_type == "coreAgent" - assert parsed_event.text == "TRACE_CORE_AGENT_DONE" - assert parsed_event.action_trace == [] + payload = json.loads(pyfetch.await_args_list[0].kwargs["body"]) + assert payload["inputVariables"] == {"doc": file_variable} @pytest.mark.asyncio -async def test_dispatch_request_emits_input_required_sub_agent_trace( +async def test_dispatch_request_rejects_file_uploads_in_browser( monkeypatch: pytest.MonkeyPatch, ) -> None: - pyfetch = AsyncMock( - side_effect=[ - _FakeResponse(json_data={"requestId": "req-123"}), - _FakeResponse( - json_data={ - "status": "input-required", - "completedAt": "2026-05-08T00:00:00+00:00", - "response": { - "text": "TRACE_INPUT_REQUIRED", - "output": {"type": "text", "content": "TRACE_INPUT_REQUIRED"}, - }, - "activeInputRequest": None, - } - ), - ] - ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - emitted_events: list[str] = [] - monkeypatch.setattr( - sys.modules["narada._trace"], - "_narada_emit_trace_event", - emitted_events.append, - raising=False, - ) + from io import BytesIO + + pyfetch = AsyncMock() + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - response = await window.dispatch_request(prompt="needs input") - - from narada_core.tracing.model import PythonSubAgentCallEvent - - assert response["status"] == "input-required" - assert len(emitted_events) == 1 - parsed_event = PythonSubAgentCallEvent.model_validate(json.loads(emitted_events[0])) - assert parsed_event.status == "input-required" - assert parsed_event.text == "TRACE_INPUT_REQUIRED" + # Reading file contents from disk is not possible in the browser, so passing a file-like + # object (rather than an already-uploaded reference) must fail fast instead of attempting + # an upload over the network. + file_obj = BytesIO(b"hello") + file_obj.name = "report.txt" -def test_parse_action_trace_preserves_run_custom_agent_children( - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.syspath_prepend(str(CORE_SRC)) - - from narada_core.tracing.model import parse_action_trace - - parsed_trace = parse_action_trace( - [ - { - "step_type": "runCustomAgent", - "url": "https://example.com", - "workflow_id": "workflow-parent", - "workflow_name": "Parent workflow", - "status": "success", - "children": [ - { - "step_type": "print", - "url": "https://example.com", - "message": "TRACE_GUI_CHILD_DONE", - } - ], - } - ] - ) + with pytest.raises( + NotImplementedError, match="not supported in the browser environment" + ): + await env._dispatch_request( + prompt="summarize {{ $doc }}", + input_variables={"doc": file_obj}, + ) - assert parsed_trace[0].step_type == "runCustomAgent" - assert parsed_trace[0].children is not None - assert parsed_trace[0].children[0].step_type == "print" - assert parsed_trace[0].children[0].message == "TRACE_GUI_CHILD_DONE" + pyfetch.assert_not_awaited() @pytest.mark.asyncio -async def test_cloud_browser_window_dispatch_request_preserves_current_file_variable_shape( +async def test_cloud_browser_downloads_return_presigned_urls( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( side_effect=[ - _FakeResponse(json_data={"requestId": "req-123"}), _FakeResponse( json_data={ - "status": "success", - "completedAt": "2026-05-08T00:00:00+00:00", - "response": None, - "activeInputRequest": None, + "downloaded_files": [ + { + "file_name": "report.pdf", + "key": "downloads/session-123/report.pdf", + "size": 42, + } + ] } ), + _FakeResponse( + json_data={"presigned_url": "https://example.com/report.pdf"} + ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.CloudBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", api_key="test-api-key", ) - file_variable = { - "source": "agentStudioAttachment", - "id": "file-123", - "filename": "report.pdf", - "mimeType": "application/pdf", - "itemId": "workflow-123", - } + files = await env.get_downloaded_files() - response = await window.dispatch_request( - prompt="summarize {{ $doc }}", - input_variables={"doc": file_variable}, + assert files == [ + env_module.SessionDownloadItem( + file_name="report.pdf", + size=42, + download_url="https://example.com/report.pdf", + ) + ] + first_call, second_call = pyfetch.await_args_list + assert first_call.args[0].endswith( + "/cloud-browser/replay/downloads?session_id=session-123" ) - - assert response["status"] == "success" - payload = json.loads(pyfetch.await_args_list[0].kwargs["body"]) - assert payload["inputVariables"] == {"doc": file_variable} + assert "key=downloads%2Fsession-123%2Freport.pdf" in second_call.args[0] @pytest.mark.asyncio -async def test_cloud_browser_window_get_downloaded_files_returns_presigned_urls( +async def test_lambda_environment_downloads_return_presigned_urls( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -854,34 +776,28 @@ async def test_cloud_browser_window_get_downloaded_files_returns_presigned_urls( ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.CloudBrowserWindow( - browser_window_id="browser-window-123", - session_id="session-123", - api_key="test-api-key", - ) - files = await window.get_downloaded_files() + env = narada_pkg.LambdaEnvironment(api_key="test-api-key") + env._session_id = "session-123" + files = await env.get_downloaded_files() assert files == [ - window_module.SessionDownloadItem( + env_module.SessionDownloadItem( file_name="report.pdf", size=42, download_url="https://example.com/report.pdf", ) ] - assert pyfetch.await_count == 2 first_call, second_call = pyfetch.await_args_list - assert "session_id=session-123" in first_call.args[0] assert first_call.args[0].endswith( "/cloud-browser/replay/downloads?session_id=session-123" ) - assert "session_id=session-123" in second_call.args[0] assert "key=downloads%2Fsession-123%2Freport.pdf" in second_call.args[0] @pytest.mark.asyncio -async def test_remote_browser_window_prompt_for_user_input_uses_hitl_default_timeout( +async def test_agent_prompt_for_user_input_uses_hitl_default_timeout( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -892,30 +808,51 @@ async def test_remote_browser_window_prompt_for_user_input_uses_hitl_default_tim } ) ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.RemoteBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", api_key="test-api-key", ) - values = await window.prompt_for_user_input( + agent = narada_pkg.Agent(environment=env) + values = await agent.prompt_for_user_input( step_id="input-step", variables=[ - window_module.PromptForUserInputVariable( - name="name", type="string", required=True - ), + PromptForUserInputVariable(name="name", type="string", required=True), ], ) assert values == {"name": "Narada"} - call = pyfetch.await_args - assert call is not None - payload = json.loads(call.kwargs["body"]) - assert payload["timeout"] == window_module.DEFAULT_HITL_TIMEOUT_SECONDS + payload = json.loads(pyfetch.await_args.kwargs["body"]) + assert payload["timeout"] == DEFAULT_HITL_TIMEOUT_SECONDS @pytest.mark.asyncio -async def test_remote_browser_window_user_approval_respects_explicit_timeout( +async def test_agentic_mouse_action_preserves_resize_window_false( + monkeypatch: pytest.MonkeyPatch, +) -> None: + pyfetch = AsyncMock( + return_value=_FakeResponse(json_data={"status": "success", "data": None}) + ) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + + env = narada_pkg.RemoteBrowserEnvironment( + browser_window_id="browser-window-123", + api_key="test-api-key", + ) + await narada_pkg.Agent(environment=env).agentic_mouse_action( + action={"type": "click"}, + recorded_click={"x": 500, "y": 300, "viewport": {"width": 1280, "height": 720}}, + fallback_operator_query="click the target", + resize_window=False, + ) + + payload = json.loads(pyfetch.await_args.kwargs["body"]) + assert payload["action"]["resize_window"] is False + + +@pytest.mark.asyncio +async def test_agent_user_approval_respects_explicit_timeout( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( @@ -923,13 +860,13 @@ async def test_remote_browser_window_user_approval_respects_explicit_timeout( json_data={"status": "success", "data": '{"approved":true}'} ) ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.RemoteBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", api_key="test-api-key", ) - approved = await window.user_approval( + approved = await narada_pkg.Agent(environment=env).user_approval( step_id="approval-step", prompt_message="Proceed?", approve_label="Approve", @@ -938,33 +875,27 @@ async def test_remote_browser_window_user_approval_respects_explicit_timeout( ) assert approved is True - call = pyfetch.await_args - assert call is not None - payload = json.loads(call.kwargs["body"]) + payload = json.loads(pyfetch.await_args.kwargs["body"]) assert payload["timeout"] == 600 @pytest.mark.asyncio -async def test_remote_browser_window_without_cloud_session_keeps_extension_action_close( +async def test_remote_browser_environment_without_cloud_session_uses_extension_close( monkeypatch: pytest.MonkeyPatch, ) -> None: pyfetch = AsyncMock( return_value=_FakeResponse(json_data={"status": "success", "data": None}) ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + env_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) - window = window_module.RemoteBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", api_key="test-api-key", ) - await window.close() + await env.close() - call = pyfetch.await_args - assert call is not None - assert call.args[0].endswith("/extension-actions") - assert call.kwargs["method"] == "POST" - payload = json.loads(call.kwargs["body"]) + payload = json.loads(pyfetch.await_args.kwargs["body"]) assert payload["browserWindowId"] == "browser-window-123" assert payload["action"]["name"] == "close_window" assert "parentRunIds" not in payload @@ -979,88 +910,28 @@ async def test_extension_action_includes_remote_dispatch_context( pyfetch = AsyncMock( return_value=_FakeResponse(json_data={"status": "success", "data": None}) ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, _ = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.RemoteBrowserWindow( + env = narada_pkg.RemoteBrowserEnvironment( browser_window_id="browser-window-123", api_key="test-api-key", ) - await window.close() + await env.close() - call = pyfetch.await_args - assert call is not None - payload = json.loads(call.kwargs["body"]) + payload = json.loads(pyfetch.await_args.kwargs["body"]) assert payload["requestId"] == "request-123" assert payload["apiKeyId"] == "api-key-123" @pytest.mark.asyncio -async def test_extension_action_prefers_remote_dispatch_request_id_over_parent_request_id( - monkeypatch: pytest.MonkeyPatch, -) -> None: - # In a nested remote-dispatch run, the env-injected request id (the request the - # external caller polls and the frontend status reporter targets) differs from the - # builtins parent request id (a separate observability dispatch id). The env value - # must win so input-required status is reported to the request the caller is polling. - monkeypatch.setenv( - "NARADA_REMOTE_DISPATCH_REQUEST_ID", "remote-dispatch-request-123" - ) - monkeypatch.setenv("NARADA_REMOTE_DISPATCH_API_KEY_ID", "api-key-123") - pyfetch = AsyncMock( - return_value=_FakeResponse(json_data={"status": "success", "data": None}) - ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - monkeypatch.setattr( - builtins, "_narada_request_id", "observability-dispatch-456", raising=False - ) - - window = window_module.RemoteBrowserWindow( - browser_window_id="browser-window-123", - api_key="test-api-key", - ) - await window.close() - - call = pyfetch.await_args - assert call is not None - payload = json.loads(call.kwargs["body"]) - assert payload["requestId"] == "remote-dispatch-request-123" - assert payload["apiKeyId"] == "api-key-123" - - -@pytest.mark.asyncio -async def test_remote_browser_window_extension_action_keeps_parent_request_id( - monkeypatch: pytest.MonkeyPatch, -) -> None: - pyfetch = AsyncMock( - return_value=_FakeResponse(json_data={"status": "success", "data": None}) - ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["outer-run", "inner-run"]) - monkeypatch.setattr( - builtins, "_narada_request_id", "parent-request-123", raising=False - ) - - window = window_module.RemoteBrowserWindow( - browser_window_id="browser-window-123", - api_key="test-api-key", - ) - await window.close() - - call = pyfetch.await_args - assert call is not None - payload = json.loads(call.kwargs["body"]) - assert payload["requestId"] == "parent-request-123" - assert "parentRunIds" not in payload - - -@pytest.mark.asyncio -async def test_local_browser_window_dispatch_request_uses_latest_parent_run_ids( +async def test_local_browser_environment_dispatch_uses_latest_parent_run_ids( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("NARADA_API_KEY", "test-api-key") monkeypatch.setenv("NARADA_BROWSER_WINDOW_ID", "browser-window-123") pyfetch = AsyncMock( side_effect=[ + _sdk_config_response(), _FakeResponse(json_data={"requestId": "req-1"}), _FakeResponse( json_data={ @@ -1081,77 +952,46 @@ async def test_local_browser_window_dispatch_request_uses_latest_parent_run_ids( ), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window = window_module.LocalBrowserWindow() + env = narada_pkg.BrowserEnvironment() - window_module._narada_parent_run_ids = _FakeJsProxy(["run-a"]) - first_response = await window.dispatch_request(prompt="first prompt") + env_module._narada_parent_run_ids = _FakeJsProxy(["run-a"]) + first_response = await env._dispatch_request(prompt="first prompt") - window_module._narada_parent_run_ids = _FakeJsProxy(["run-b", "run-c"]) - second_response = await window.dispatch_request(prompt="second prompt") + env_module._narada_parent_run_ids = _FakeJsProxy(["run-b", "run-c"]) + second_response = await env._dispatch_request(prompt="second prompt") assert first_response["status"] == "success" assert second_response["status"] == "success" - first_post = json.loads(pyfetch.await_args_list[0].kwargs["body"]) - second_post = json.loads(pyfetch.await_args_list[2].kwargs["body"]) + first_post = json.loads(pyfetch.await_args_list[1].kwargs["body"]) + second_post = json.loads(pyfetch.await_args_list[3].kwargs["body"]) assert first_post["parentRunIds"] == ["run-a"] assert second_post["parentRunIds"] == ["run-b", "run-c"] @pytest.mark.asyncio -async def test_local_browser_window_dispatch_request_includes_parent_request_id( +async def test_local_browser_environment_extension_action_includes_parent_request_id( monkeypatch: pytest.MonkeyPatch, ) -> None: monkeypatch.setenv("NARADA_API_KEY", "test-api-key") monkeypatch.setenv("NARADA_BROWSER_WINDOW_ID", "browser-window-123") pyfetch = AsyncMock( side_effect=[ - _FakeResponse(json_data={"requestId": "child-request-123"}), - _FakeResponse( - json_data={ - "status": "success", - "completedAt": "2026-05-08T00:00:00+00:00", - "response": None, - "activeInputRequest": None, - } - ), + _sdk_config_response(), + _FakeResponse(json_data={"status": "success", "data": None}), ] ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["run-a"]) - monkeypatch.setattr( - builtins, "_narada_request_id", "parent-request-123", raising=False - ) - - window = window_module.LocalBrowserWindow() - response = await window.dispatch_request(prompt="child prompt") - - assert response["status"] == "success" - post_payload = json.loads(pyfetch.await_args_list[0].kwargs["body"]) - assert post_payload["parentRequestId"] == "parent-request-123" - assert post_payload["parentRunIds"] == ["run-a"] - - -@pytest.mark.asyncio -async def test_local_browser_window_extension_action_includes_parent_request_id( - monkeypatch: pytest.MonkeyPatch, -) -> None: - monkeypatch.setenv("NARADA_API_KEY", "test-api-key") - monkeypatch.setenv("NARADA_BROWSER_WINDOW_ID", "browser-window-123") - pyfetch = AsyncMock( - return_value=_FakeResponse(json_data={"status": "success", "data": None}) - ) - _, _, window_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) - window_module._narada_parent_run_ids = _FakeJsProxy(["run-a"]) + narada_pkg, env_module = _import_pyodide_narada(monkeypatch, pyfetch=pyfetch) + env_module._narada_parent_run_ids = _FakeJsProxy(["run-a"]) monkeypatch.setattr( builtins, "_narada_request_id", "parent-request-123", raising=False ) - window = window_module.LocalBrowserWindow() - await window.close() + env = narada_pkg.BrowserEnvironment() + await env.close() - post_payload = json.loads(pyfetch.await_args.kwargs["body"]) + post_payload = json.loads(pyfetch.await_args_list[1].kwargs["body"]) assert post_payload["requestId"] == "parent-request-123" assert post_payload["parentRunIds"] == ["run-a"] diff --git a/packages/narada/README.md b/packages/narada/README.md index b19fda4..29e5cce 100644 --- a/packages/narada/README.md +++ b/packages/narada/README.md @@ -33,28 +33,31 @@ After installation and login, create a Narada API Key (see [this link](https://d export NARADA_API_KEY= ``` -That's it. Now you can run the following code to spin up Narada to go and download a file for you from arxiv: +That's it. Now you can run the following code to create a browser environment and ask an +agent to download a file for you from arxiv: ```python import asyncio -from narada import Narada +from narada import Agent, BrowserEnvironment async def main() -> None: - # Initialize the Narada client. - async with Narada() as narada: - # Open a new browser window and initialize the Narada UI agent. - window = await narada.open_and_initialize_browser_window() + # Create the browser environment. It initializes lazily on the first action. + env = BrowserEnvironment() + agent = Agent(environment=env) - # Run a task in this browser window. - response = await window.agent( + try: + # Run a task in this browser environment. + response = await agent.run( prompt='Search for "LLM Compiler" on Google and open the first arXiv paper on the results page, then open the PDF. Then download the PDF of the paper.', # Optionally generate a GIF of the agent's actions. generate_gif=True, ) print("Response:", response.model_dump_json(indent=2)) + finally: + await env.close() if __name__ == "__main__": @@ -73,10 +76,15 @@ You can use the SDK to launch browsers and run automated tasks using natural lan ## Migration note -For releases `0.1.38` and later: +This version introduces a non-backward-compatible, agent-centered API: -- `variables` has been renamed to `secret_variables`. -- Use `input_variables` to pass structured values (objects/arrays) into custom agents. +- Create an execution target with an environment, such as `BrowserEnvironment`, + `CloudBrowserEnvironment`, `RemoteBrowserEnvironment`, or `LambdaEnvironment`. +- Create an `Agent(environment=env, kind=...)` and call `await agent.run(prompt=...)`. +- Browser actions such as `go_to_url`, `agentic_selector`, and sheet operations are now methods on + `Agent`. +- Environments keep lifecycle/bookkeeping APIs such as `start()`, `close()`, + `browser_window_id`, and `cloud_browser_session_id`. ## Features diff --git a/packages/narada/pyproject.toml b/packages/narada/pyproject.toml index ad1aa4b..bf7e817 100644 --- a/packages/narada/pyproject.toml +++ b/packages/narada/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "narada" -version = "0.1.53a7" +version = "0.2.0" description = "Python client SDK for Narada" license = "Apache-2.0" readme = "README.md" authors = [{ name = "Narada", email = "support@narada.ai" }] requires-python = ">=3.12" dependencies = [ - "narada-core==0.0.27", + "narada-core==0.1.0", "aiohttp>=3.12.13", "playwright>=1.53.0", "rich>=14.0.0", diff --git a/packages/narada/src/narada/__init__.py b/packages/narada/src/narada/__init__.py index bf9c3a2..606236c 100644 --- a/packages/narada/src/narada/__init__.py +++ b/packages/narada/src/narada/__init__.py @@ -9,7 +9,7 @@ UserAbortedError, ) from narada_core.models import ( - Agent, + AgentKind, CriticConfig, File, ReasoningEffort, @@ -17,24 +17,35 @@ ResponseContent, ) -from narada.client import Narada +from narada.agent import Agent from narada.config import BrowserConfig, ProxyConfig +from narada.environment import ( + BaseBrowserEnvironment, + BrowserEnvironment, + CloudBrowserEnvironment, + Environment, + LambdaEnvironment, + RemoteBrowserEnvironment, + SessionDownloadItem, +) from narada.utils import download_file, render_html from narada.version import __version__ -from narada.window import CloudBrowserWindow, LocalBrowserWindow, RemoteBrowserWindow __all__ = [ "__version__", "ActiveInputRequest", "Agent", + "AgentKind", + "BaseBrowserEnvironment", "BrowserConfig", - "CloudBrowserWindow", + "BrowserEnvironment", + "CloudBrowserEnvironment", "CriticConfig", "CriticResult", "download_file", + "Environment", "File", - "LocalBrowserWindow", - "Narada", + "LambdaEnvironment", "NaradaError", "NaradaExtensionMissingError", "NaradaExtensionUnauthenticatedError", @@ -43,9 +54,10 @@ "NaradaUnsupportedBrowserError", "ProxyConfig", "ReasoningEffort", - "RemoteBrowserWindow", + "RemoteBrowserEnvironment", "render_html", "Response", "ResponseContent", + "SessionDownloadItem", "UserAbortedError", ] diff --git a/packages/narada/src/narada/agent.py b/packages/narada/src/narada/agent.py new file mode 100644 index 0000000..205b864 --- /dev/null +++ b/packages/narada/src/narada/agent.py @@ -0,0 +1,562 @@ +from __future__ import annotations + +from typing import IO, Any, Generic, Literal, Mapping, TypeVar, overload + +from narada_core.actions.critic import run_critic +from narada_core.actions.models import ( + DEFAULT_HITL_TIMEOUT_SECONDS, + AgenticMatchingSelectorsFinderRequest, + AgenticMatchingSelectorsFinderResponse, + AgenticMouseAction, + AgenticMouseActionRequest, + AgenticSelectorAction, + AgenticSelectorRequest, + AgenticSelectorResponse, + AgenticSelectors, + AgentResponse, + AgentUsage, + CriticResult, + GetFullHtmlRequest, + GetFullHtmlResponse, + GetScreenshotRequest, + GetScreenshotResponse, + GetSimplifiedHtmlRequest, + GetSimplifiedHtmlResponse, + GetUrlRequest, + GetUrlResponse, + GoToUrlRequest, + PrintMessageRequest, + PromptForUserInputRequest, + PromptForUserInputResponse, + PromptForUserInputVariable, + ReadExcelSheetRequest, + ReadExcelSheetResponse, + ReadGoogleSheetRequest, + ReadGoogleSheetResponse, + RecordedClick, + UserApprovalRequest, + UserApprovalResponse, + WaitForElementRequest, + WaitForElementResponse, + WriteExcelSheetRequest, + WriteGoogleSheetRequest, +) +from narada_core.models import ( + AgentKind, + CriticConfig, + File, + McpServer, + ReasoningEffort, + RemoteDispatchChatHistoryItem, + Response, + UserResourceCredentials, +) +from narada_core.tracing.model import parse_action_trace +from pydantic import BaseModel + +from narada.environment import ( + BaseBrowserEnvironment, + Environment, + InputRequiredCallback, +) + +_StructuredOutput = TypeVar("_StructuredOutput", bound=BaseModel) + + +class Agent(Generic[_StructuredOutput]): + def __init__( + self, + *, + environment: Environment, + kind: AgentKind | str = AgentKind.OPERATOR, + ) -> None: + self.environment = environment + self.kind = kind + + # `reasoning` is only valid with the Core Agent; these two overloads make + # that constraint type-checkable when callers construct a core-agent instance. + @overload + async def run( + self, + prompt: str, + *, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic: CriticConfig | None = None, + timeout: int = 1000, + ) -> AgentResponse[dict[str, Any]]: ... + + @overload + async def run( + self, + prompt: str, + *, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic: CriticConfig | None = None, + timeout: int = 1000, + ) -> AgentResponse[_StructuredOutput]: ... + + async def run( + self, + prompt: str, + *, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[BaseModel] | None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic: CriticConfig | None = None, + timeout: int = 1000, + ) -> AgentResponse: + """Invokes an agent in the bound Narada environment.""" + remote_dispatch_response = await self._dispatch_request( + prompt=prompt, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, + previous_request_id=previous_request_id, + chat_history=chat_history, + additional_context=additional_context, + attachment=attachment, + time_zone=time_zone, + user_resource_credentials=user_resource_credentials, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + callback_url=callback_url, + callback_secret=callback_secret, + callback_headers=callback_headers, + on_input_required=on_input_required, + reasoning=reasoning, + timeout=timeout, + ) + response_content = remote_dispatch_response["response"] + assert response_content is not None + + action_trace_raw = response_content.get("actionTrace") + action_trace = ( + parse_action_trace(action_trace_raw) + if action_trace_raw is not None + else None + ) + workflow_trace = response_content.get("workflowTrace") + + critic_result: CriticResult | None = None + if critic is not None: + critic_result = await run_critic( + dispatch_request=self._dispatch_request, + original_prompt=prompt, + response_content=response_content, + action_trace_raw=action_trace_raw, + critic=critic, + time_zone=time_zone, + timeout=timeout, + ) + + return AgentResponse( + request_id=remote_dispatch_response["requestId"], + status=remote_dispatch_response["status"], + text=response_content["text"], + output=response_content["output"], + structured_output=response_content.get("structuredOutput"), + usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), + action_trace=action_trace, + workflow_trace=workflow_trace, + critic_result=critic_result, + ) + + async def _dispatch_request( + self, + *, + prompt: str, + agent: AgentKind | str | None = None, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[BaseModel] | None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + critic_context: dict[str, Any] | None = None, + timeout: int = 1000, + ) -> Response: + dispatch_agent = self.kind if agent is None else agent + # Branch on `reasoning` so each call site binds a single, typed overload + # of `_dispatch_request`. The validation also lives in `_dispatch_request` + # itself (defense in depth + reachable when callers go straight to the + # low-level API), so the redundancy here is intentional. + if reasoning is None: + remote_dispatch_response = await self.environment._dispatch_request( + prompt=prompt, + agent=dispatch_agent, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, + previous_request_id=previous_request_id, + chat_history=chat_history, + additional_context=additional_context, + attachment=attachment, + time_zone=time_zone, + user_resource_credentials=user_resource_credentials, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + callback_url=callback_url, + callback_secret=callback_secret, + callback_headers=callback_headers, + on_input_required=on_input_required, + critic_context=critic_context, + timeout=timeout, + ) + else: + if dispatch_agent is not AgentKind.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `kind=AgentKind.CORE_AGENT` " + f"(got kind={dispatch_agent!r})" + ) + # The CORE_AGENT-specific overloads of `_dispatch_request` split on + # a narrower `output_schema` discriminator (None vs `type[T]`), + # which the impl's `type[BaseModel] | None` union doesn't cleanly + # narrow into without further branching. The public `run()` + # overloads above already give callers correct return-type + # narrowing, so the internal forward call bypasses overload + # disambiguation on this single dimension. + remote_dispatch_response = await self.environment._dispatch_request( # pyright: ignore[reportCallIssue] + prompt=prompt, + agent=dispatch_agent, + reasoning=reasoning, + clear_chat=clear_chat, + generate_gif=generate_gif, + output_schema=output_schema, # pyright: ignore[reportArgumentType] + previous_request_id=previous_request_id, + chat_history=chat_history, + additional_context=additional_context, + attachment=attachment, + time_zone=time_zone, + user_resource_credentials=user_resource_credentials, + mcp_servers=mcp_servers, + secret_variables=secret_variables, + input_variables=input_variables, + callback_url=callback_url, + callback_secret=callback_secret, + callback_headers=callback_headers, + on_input_required=on_input_required, + critic_context=critic_context, + timeout=timeout, + ) + return remote_dispatch_response + + def _browser_environment(self) -> BaseBrowserEnvironment: + if not isinstance(self.environment, BaseBrowserEnvironment): + raise ValueError( + f"{type(self.environment).__name__} does not support browser actions" + ) + return self.environment + + async def agentic_selector( + self, + *, + action: AgenticSelectorAction, + selectors: AgenticSelectors, + fallback_operator_query: str, + # Larger default timeout because Operator can take a bit to run. + timeout: int | None = 300, + ) -> AgenticSelectorResponse: + """Performs an action on an element specified by the given selectors, falling back to using + the Operator agent if the selectors fail to match a unique element. + """ + response_model = ( + AgenticSelectorResponse + if action["type"] in {"get_text", "get_property"} + else None + ) + result = await self._browser_environment()._run_extension_action( + AgenticSelectorRequest( + action=action, + selectors=selectors, + fallback_operator_query=fallback_operator_query, + ), + response_model=response_model, + timeout=timeout, + ) + + if result is None: + return AgenticSelectorResponse(value=None) + + return result + + async def agentic_matching_selectors_finder( + self, + *, + prompt: str, + timeout: int | None = 300, + ) -> list[AgenticSelectors]: + """Finds all visible targets matching a prompt and returns selectors.""" + result = await self._browser_environment()._run_extension_action( + AgenticMatchingSelectorsFinderRequest(prompt=prompt), + AgenticMatchingSelectorsFinderResponse, + timeout=timeout, + ) + return result.selectors + + async def agentic_mouse_action( + self, + *, + action: AgenticMouseAction, + recorded_click: RecordedClick, + fallback_operator_query: str, + resize_window: bool = True, + timeout: int | None = 60, + ) -> None: + """Performs a mouse action at the specified click coordinates, falling back to using + the Operator agent if the click fails. + """ + return await self._browser_environment()._run_extension_action( + AgenticMouseActionRequest( + action=action, + recorded_click=recorded_click, + resize_window=resize_window, + fallback_operator_query=fallback_operator_query, + ), + timeout=timeout, + ) + + async def go_to_url( + self, *, url: str, new_tab: bool = False, timeout: int | None = None + ) -> None: + """Navigates the active page in this window to the given URL.""" + return await self._browser_environment()._run_extension_action( + GoToUrlRequest(url=url, new_tab=new_tab), timeout=timeout + ) + + async def wait_for_element( + self, + *, + selectors: AgenticSelectors, + state: Literal["visible", "hidden"], + timeout: int, + ) -> bool: + """Waits for an element matching the given selectors to reach the specified state. + + Returns True if the element was found, False if no selector matched before timeout. + """ + result = await self._browser_environment()._run_extension_action( + WaitForElementRequest(selectors=selectors, state=state, timeout=timeout), + WaitForElementResponse, + timeout=timeout // 1000 + 30, + ) + if result is None: + return False + return result.found + + async def get_url(self, *, timeout: int | None = None) -> GetUrlResponse: + """Gets the URL of the current active page.""" + return await self._browser_environment()._run_extension_action( + GetUrlRequest(), + GetUrlResponse, + timeout=timeout, + ) + + async def print_message(self, *, message: str, timeout: int | None = None) -> None: + """Prints a message in the Narada extension side panel chat.""" + return await self._browser_environment()._run_extension_action( + PrintMessageRequest(message=message), timeout=timeout + ) + + async def prompt_for_user_input( + self, + *, + step_id: str, + variables: list[PromptForUserInputVariable], + prompt_message: str | None = None, + timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, + ) -> dict[str, Any]: + """Prompts the user for one or more input values in the extension UI.""" + result = await self._browser_environment()._run_extension_action( + PromptForUserInputRequest( + step_id=step_id, prompt_message=prompt_message, variables=variables + ), + PromptForUserInputResponse, + timeout=timeout, + ) + return result.values_by_name + + async def user_approval( + self, + *, + step_id: str, + prompt_message: str, + approve_label: str, + reject_label: str, + timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, + ) -> bool: + """Prompts the user to approve or reject in the extension UI.""" + result = await self._browser_environment()._run_extension_action( + UserApprovalRequest( + step_id=step_id, + prompt_message=prompt_message, + approve_label=approve_label, + reject_label=reject_label, + ), + UserApprovalResponse, + timeout=timeout, + ) + return result.approved + + async def read_google_sheet( + self, + *, + spreadsheet_id: str, + range: str, + timeout: int | None = None, + ) -> ReadGoogleSheetResponse: + """Reads a range of cells from a Google Sheet.""" + return await self._browser_environment()._run_extension_action( + ReadGoogleSheetRequest(spreadsheet_id=spreadsheet_id, range=range), + ReadGoogleSheetResponse, + timeout=timeout, + ) + + async def read_excel_sheet( + self, + *, + workbook_url: str, + range: str, + microsoft_account_email: str, + timeout: int | None = None, + ) -> ReadExcelSheetResponse: + """Reads a range of cells from a Microsoft Excel workbook.""" + return await self._browser_environment()._run_extension_action( + ReadExcelSheetRequest( + workbook_url=workbook_url, + range=range, + microsoft_account_email=microsoft_account_email, + ), + ReadExcelSheetResponse, + timeout=timeout, + ) + + async def write_google_sheet( + self, + *, + spreadsheet_id: str, + range: str, + values: list[list[str]], + timeout: int | None = None, + ) -> None: + """Writes a range of cells to a Google Sheet.""" + return await self._browser_environment()._run_extension_action( + WriteGoogleSheetRequest( + spreadsheet_id=spreadsheet_id, range=range, values=values + ), + timeout=timeout, + ) + + async def write_excel_sheet( + self, + *, + workbook_url: str, + range: str, + microsoft_account_email: str, + values: list[list[str]], + timeout: int | None = None, + ) -> None: + """Writes a range of cells to a Microsoft Excel workbook.""" + return await self._browser_environment()._run_extension_action( + WriteExcelSheetRequest( + workbook_url=workbook_url, + range=range, + microsoft_account_email=microsoft_account_email, + values=values, + ), + timeout=timeout, + ) + + async def get_full_html(self, *, timeout: int | None = None) -> GetFullHtmlResponse: + """Gets the full HTML content of the current page.""" + return await self._browser_environment()._run_extension_action( + GetFullHtmlRequest(), + GetFullHtmlResponse, + timeout=timeout, + ) + + async def get_simplified_html( + self, *, timeout: int | None = None + ) -> GetSimplifiedHtmlResponse: + """Gets the simplified HTML content of the current page.""" + return await self._browser_environment()._run_extension_action( + GetSimplifiedHtmlRequest(), + GetSimplifiedHtmlResponse, + timeout=timeout, + ) + + async def get_screenshot( + self, *, timeout: int | None = None + ) -> GetScreenshotResponse: + """Takes a screenshot of the current browser window.""" + return await self._browser_environment()._run_extension_action( + GetScreenshotRequest(), + GetScreenshotResponse, + timeout=timeout, + ) + + async def reset_agent_state(self) -> None: + env = self._browser_environment() + if not hasattr(env, "reset_agent_state"): + raise ValueError( + f"{type(self.environment).__name__} does not support agent state reset" + ) + await env.reset_agent_state() # type: ignore[attr-defined] diff --git a/packages/narada/src/narada/client.py b/packages/narada/src/narada/client.py deleted file mode 100644 index 1068201..0000000 --- a/packages/narada/src/narada/client.py +++ /dev/null @@ -1,773 +0,0 @@ -from __future__ import annotations - -import asyncio -import json -import logging -import os -import subprocess -import sys -from dataclasses import dataclass -from http import HTTPStatus -from typing import Any -from uuid import uuid4 - -import aiohttp -from narada_core.errors import ( - NaradaExtensionMissingError, - NaradaExtensionUnauthenticatedError, - NaradaInitializationError, - NaradaTimeoutError, - NaradaUnsupportedBrowserError, -) -from narada_core.models import _SdkConfig -from packaging.version import Version -from playwright._impl._errors import Error as PlaywrightError -from playwright.async_api import ( - Browser, - CDPSession, - ElementHandle, - Page, - Playwright, - async_playwright, -) -from playwright.async_api import TimeoutError as PlaywrightTimeoutError -from playwright.async_api._context_manager import PlaywrightContextManager -from pydantic import BaseModel, ValidationError -from rich.console import Console - -from narada.config import BrowserConfig, ProxyConfig -from narada.utils import assert_never, assert_not_none -from narada.version import __version__ -from narada.window import ( - CloudBrowserWindow, - LocalBrowserWindow, - create_side_panel_url, -) - - -@dataclass -class _LaunchBrowserResult: - browser_process_id: int - browser_window_id: str - side_panel_page: Page - - -class ApiErrorPayload(BaseModel): - detail: Any | None = None - - @classmethod - def from_error_text(cls, error_text: str | None) -> ApiErrorPayload: - if not error_text: - return cls() - - try: - return cls.model_validate_json(error_text) - except ValidationError: - try: - body = json.loads(error_text) - except (ValueError, TypeError): - return cls() - - if isinstance(body, dict): - return cls(detail=body.get("detail", body)) - - return cls() - - -class Narada: - _BROWSER_WINDOW_ID_SELECTOR = "#narada-browser-window-id" - _UNSUPPORTED_BROWSER_INDICATOR_SELECTOR = "#narada-unsupported-browser" - _EXTENSION_MISSING_INDICATOR_SELECTOR = "#narada-extension-missing" - _EXTENSION_UNAUTHENTICATED_INDICATOR_SELECTOR = "#narada-extension-unauthenticated" - _INITIALIZATION_ERROR_INDICATOR_SELECTOR = "#narada-initialization-error" - - _auth_headers: dict[str, str] - _console: Console - _playwright_context_manager: PlaywrightContextManager | None = None - _playwright: Playwright | None = None - - def __init__( - self, - *, - api_key: str | None = None, - auth_headers: dict[str, str] | None = None, - ) -> None: - if auth_headers is not None: - self._auth_headers = auth_headers - else: - api_key = api_key or os.environ["NARADA_API_KEY"] - self._auth_headers = {"x-api-key": api_key} - self._console = Console() - - async def __aenter__(self) -> Narada: - await self._validate_sdk_config() - - self._playwright_context_manager = async_playwright() - self._playwright = await self._playwright_context_manager.__aenter__() - return self - - async def __aexit__(self, *args: Any) -> None: - if self._playwright_context_manager is None: - return - - await self._playwright_context_manager.__aexit__(*args) - self._playwright_context_manager = None - self._playwright = None - - async def _fetch_sdk_config(self) -> _SdkConfig | None: - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - url = f"{base_url}/sdk/config" - - try: - async with aiohttp.ClientSession() as session: - async with session.get(url, headers=self._auth_headers) as resp: - if not resp.ok: - logging.warning( - "Failed to fetch SDK config: %s %s", - resp.status, - await resp.text(), - ) - return None - - return _SdkConfig.model_validate(await resp.json()) - except Exception as e: - logging.warning("Failed to fetch SDK config: %s", e) - return None - - async def _validate_sdk_config(self) -> None: - config = await self._fetch_sdk_config() - if config is None: - return - - package_config = config.packages["narada"] - current_version = Version(__version__) - min_required_version = Version(package_config.min_required_version) - if current_version < min_required_version: - raise RuntimeError( - f"narada<={__version__} is not supported. Please upgrade to version " - f"{package_config.min_required_version} or higher." - ) - - async def open_and_initialize_browser_window( - self, config: BrowserConfig | None = None - ) -> LocalBrowserWindow: - assert self._playwright is not None - playwright = self._playwright - - config = config or BrowserConfig() - - launch_browser_result = await self._launch_browser(playwright, config) - side_panel_page = launch_browser_result.side_panel_page - browser_window_id = launch_browser_result.browser_window_id - - await self._fix_download_behavior(side_panel_page) - - return LocalBrowserWindow( - auth_headers=self._auth_headers, - browser_process_id=launch_browser_result.browser_process_id, - browser_window_id=browser_window_id, - config=config, - context=side_panel_page.context, - ) - - async def open_and_initialize_cloud_browser_window( - self, - config: BrowserConfig | None = None, - session_name: str | None = None, - session_timeout: int | None = None, - require_extension: bool = True, - ) -> CloudBrowserWindow: - """Create a cloud browser session and return a ``CloudBrowserWindow``. - - With ``require_extension=True`` (default), calls - ``POST /cloud-browser/create-cloud-browser-session``, then connects local Playwright - over CDP, opens ``login_url``, and waits for ``#narada-browser-window-id`` (extension - install retries apply). ``config`` controls interactive prompts and related behavior. - - With ``require_extension=False``, calls - ``POST /cloud-browser/create-and-initialize-cloud-browser-session`` instead: the API - provisions the browser and runs the same CDP initialization on the server, returning - ``session_id`` and ``browser_window_id`` in the JSON body. Local Playwright is not used - for that path, and ``config`` is ignored. - """ - config = config or BrowserConfig() - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - request_body = { - "require_extension": require_extension, - "session_name": session_name, - "session_timeout": session_timeout, - } - - if not require_extension: - endpoint_url = ( - f"{base_url}/cloud-browser/create-and-initialize-cloud-browser-session" - ) - async with aiohttp.ClientSession() as session: - async with session.post( - endpoint_url, - headers=self._auth_headers, - json=request_body, - timeout=aiohttp.ClientTimeout(total=180), - ) as resp: - if not resp.ok: - error_text = await resp.text() - if resp.status == HTTPStatus.FORBIDDEN: - error = ApiErrorPayload.from_error_text(error_text) - err = RuntimeError( - f"Failed to create cloud browser session: {resp.status} {error_text}\n" - f"Endpoint URL: {endpoint_url}" - ) - err.status_code = resp.status # type: ignore[attr-defined] - err.detail = error.detail # type: ignore[attr-defined] - raise err - raise RuntimeError( - f"Failed to create cloud browser session: {resp.status} {error_text}\n" - f"Endpoint URL: {endpoint_url}" - ) - response_data = await resp.json() - - return CloudBrowserWindow( - browser_window_id=response_data["browser_window_id"], - session_id=response_data["session_id"], - auth_headers=self._auth_headers, - ) - - endpoint_url = f"{base_url}/cloud-browser/create-cloud-browser-session" - - async with aiohttp.ClientSession() as session: - async with session.post( - endpoint_url, - headers=self._auth_headers, - json=request_body, - timeout=aiohttp.ClientTimeout( - total=180 - ), # 3 minutes for session startup - ) as resp: - if not resp.ok: - error_text = await resp.text() - if resp.status == HTTPStatus.FORBIDDEN: - error = ApiErrorPayload.from_error_text(error_text) - err = RuntimeError( - f"Failed to create cloud browser session: {resp.status} {error_text}\n" - f"Endpoint URL: {endpoint_url}" - ) - err.status_code = resp.status # type: ignore[attr-defined] - err.detail = error.detail # type: ignore[attr-defined] - raise err - raise RuntimeError( - f"Failed to create cloud browser session: {resp.status} {error_text}\n" - f"Endpoint URL: {endpoint_url}" - ) - response_data = await resp.json() - - cdp_websocket_url = response_data["cdp_websocket_url"] - session_id = response_data["session_id"] - login_url = response_data["login_url"] - cdp_auth_headers = response_data["cdp_auth_headers"] - - # Connect to browser via CDP with authentication headers and log the user in. - try: - return await self._initialize_cloud_browser_window( - config=config, - cdp_websocket_url=cdp_websocket_url, - session_id=session_id, - login_url=login_url, - cdp_auth_headers=cdp_auth_headers, - ) - except Exception: - # Clean up the session if CDP connection fails - try: - async with aiohttp.ClientSession() as cleanup_session: - async with cleanup_session.post( - f"{base_url}/cloud-browser/stop-cloud-browser-session", - headers=self._auth_headers, - json={"session_id": session_id, "status": "failed"}, - timeout=aiohttp.ClientTimeout(total=10), - ) as resp: - if resp.ok: - logging.info( - "Cleaned up session %s after CDP connection failure", - session_id, - ) - else: - logging.warning( - "Failed to cleanup session %s: %s", - session_id, - resp.status, - ) - except Exception as cleanup_error: - logging.warning( - "Error cleaning up session %s: %s", session_id, cleanup_error - ) - # Re-raise the original connection error - raise - - async def _initialize_cloud_browser_window( - self, - *, - config: BrowserConfig, - cdp_websocket_url: str, - session_id: str, - login_url: str, - cdp_auth_headers: dict[str, str], - ) -> CloudBrowserWindow: - assert self._playwright is not None - - # Connect to browser via CDP with authentication headers - browser = await self._playwright.chromium.connect_over_cdp( - cdp_websocket_url, headers=cdp_auth_headers - ) - - # Navigate to login URL (provided by backend with custom token) - context = browser.contexts[0] - initialization_page = context.pages[0] - await initialization_page.goto( - login_url, timeout=15_000, wait_until="domcontentloaded" - ) - - # Wait for browser window ID. The extension can take a bit to be installed, so we retry a - # few times. - max_attempts = 10 - for attempt in range(max_attempts): - try: - browser_window_id = await self._wait_for_browser_window_id( - initialization_page, - config, - timeout=30_000, - ) - break - except NaradaExtensionMissingError: - if attempt == max_attempts - 1: - raise - logging.info("Waiting for Narada extension to be installed...") - await asyncio.sleep(1) - except NaradaTimeoutError: - if attempt == max_attempts - 1: - raise - # If browser window ID is not found, reload the page and try again - # try to go to the login URL again (with customToken query param) - await initialization_page.goto( - login_url, timeout=15_000, wait_until="domcontentloaded" - ) - - cloud_window = CloudBrowserWindow( - browser_window_id=browser_window_id, - session_id=session_id, - auth_headers=self._auth_headers, - ) - - if config.interactive: - self._print_success_message(browser_window_id) - - return cloud_window - - async def initialize_in_existing_browser_window( - self, config: BrowserConfig | None = None - ) -> LocalBrowserWindow: - """Initializes the Narada extension in an existing browser window. - - This method connects to an existing browser process via CDP and performs the same - initialization logic as `open_and_initialize_browser_window`, but without launching a new - browser process. - """ - assert self._playwright is not None - playwright = self._playwright - - config = config or BrowserConfig() - - if config.proxy is not None: - raise ValueError( - "Proxy configuration is not supported for `initialize_in_existing_browser_window`. " - "Proxy settings must be specified when launching Chrome. " - "Use `open_and_initialize_browser_window` instead." - ) - - browser = await playwright.chromium.connect_over_cdp(config.cdp_url) - - # Generate a unique tag for the initialization URL - window_tag = uuid4().hex - tagged_initialization_url = f"{config.initialization_url}?t={window_tag}" - - # Open the initialization page in a new tab in the default context. - context = browser.contexts[0] - initialization_page = await context.new_page() - await initialization_page.goto(tagged_initialization_url) - - browser_window_id = await self._wait_for_browser_window_id( - initialization_page, config - ) - - # Playwright seems unable to pick up the side panel page that is automatically opened by the - # initialization page. We need to establish a new CDP connection to the browser *after* the - # side panel page is opened for Playwright to see it. - await browser.close() - browser = await playwright.chromium.connect_over_cdp(config.cdp_url) - context = browser.contexts[0] - - side_panel_url = create_side_panel_url(config, browser_window_id) - side_panel_page = next(p for p in context.pages if p.url == side_panel_url) - - await self._fix_download_behavior(side_panel_page) - - if config.interactive: - self._print_success_message(browser_window_id) - - return LocalBrowserWindow( - auth_headers=self._auth_headers, - browser_process_id=None, - browser_window_id=browser_window_id, - config=config, - context=context, - ) - - async def _launch_browser( - self, playwright: Playwright, config: BrowserConfig - ) -> _LaunchBrowserResult: - # A unique tag is appended to the initialization URL so that we can find the new page that - # was opened, since otherwise when more than one initialization page is opened in the same - # browser instance, we wouldn't be able to tell them apart. - window_tag = uuid4().hex - tagged_initialization_url = f"{config.initialization_url}?t={window_tag}" - - # When proxy auth is needed, launch with about:blank to avoid Chrome's startup auth prompt. - # We'll set up the CDP auth handler and then navigate to the init URL. - proxy_requires_auth = ( - config.proxy is not None and config.proxy.requires_authentication - ) - launch_url = "about:blank" if proxy_requires_auth else tagged_initialization_url - - browser_args = [ - f"--user-data-dir={config.user_data_dir}", - f"--profile-directory={config.profile_directory}", - f"--remote-debugging-port={config.cdp_port}", - "--no-default-browser-check", - "--no-first-run", - "--new-window", - launch_url, - ] - - # Add proxy arguments if configured. - if config.proxy is not None: - config.proxy.validate() - browser_args.append(f"--proxy-server={config.proxy.server}") - - if config.proxy.bypass: - browser_args.append(f"--proxy-bypass-list={config.proxy.bypass}") - - if config.proxy.ignore_cert_errors: - browser_args.append("--ignore-certificate-errors") - - # Launch an independent browser process which will not be killed when the current program - # exits. - if sys.platform == "win32": - browser_process = subprocess.Popen( - [config.executable_path, *browser_args], - stdin=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - creationflags=subprocess.CREATE_NEW_PROCESS_GROUP - | subprocess.DETACHED_PROCESS, - ) - else: - browser_process = await asyncio.create_subprocess_exec( - config.executable_path, - *browser_args, - stdin=asyncio.subprocess.DEVNULL, - stdout=asyncio.subprocess.DEVNULL, - stderr=asyncio.subprocess.DEVNULL, - start_new_session=True, - ) - - logging.debug("Browser process started with PID: %s", browser_process.pid) - - # We need to wait a bit for the initial page to open before connecting to the browser over - # CDP, otherwise Playwright can see an empty context with no pages. - await asyncio.sleep(2) - - browser_window_id = None - side_panel_page = None - max_cdp_connect_attempts = 10 - - # Track whether we've already navigated from about:blank to the initialization URL. - # This is only relevant when proxy auth is enabled, where we launch with about:blank - # to set up CDP auth handlers before any network traffic. We must only navigate once, - # because on retry iterations context.pages[0] could be any page (side panel, devtools, - # etc.) and navigating it would break the initialization flow. - did_initial_navigation = False - - for attempt in range(max_cdp_connect_attempts): - try: - browser = await playwright.chromium.connect_over_cdp(config.cdp_url) - except Exception: - # The browser process might not be immediately ready to accept CDP connections. - # Retry a few times before giving up. - if attempt == max_cdp_connect_attempts - 1: - raise - await asyncio.sleep(2) - continue - - context = browser.contexts[0] - - # If proxy auth is needed, set up the handler at browser level then navigate to the - # initialization page. After navigation succeeds, Chrome has cached the proxy - # credentials, so we can detach the CDP session. - if proxy_requires_auth and not did_initial_navigation: - proxy_cdp_session = ( - await self._setup_proxy_authentication_browser_level( - browser, - # Not None because `proxy_requires_auth` is True. - assert_not_none(config.proxy), - ) - ) - blank_page = context.pages[0] - await blank_page.goto(tagged_initialization_url) - await proxy_cdp_session.detach() - did_initial_navigation = True - - # Grab the browser window ID from the page we just opened. - initialization_page = next( - (p for p in context.pages if p.url == tagged_initialization_url), None - ) - if initialization_page is not None: - browser_window_id = await self._wait_for_browser_window_id( - initialization_page, config - ) - - side_panel_url = create_side_panel_url(config, browser_window_id) - side_panel_page = next( - (p for p in context.pages if p.url == side_panel_url), None - ) - if side_panel_page is not None: - break - - if attempt == max_cdp_connect_attempts - 1: - raise NaradaTimeoutError("Timed out waiting for initialization page") - - # Close the current CDP connection and try again. - await browser.close() - await asyncio.sleep(3) - - # These are impossible as we would've raised an exception above otherwise. - assert browser_window_id is not None - assert side_panel_page is not None - - if config.interactive: - self._print_success_message(browser_window_id) - - return _LaunchBrowserResult( - browser_process_id=browser_process.pid, - browser_window_id=browser_window_id, - side_panel_page=side_panel_page, - ) - - @staticmethod - async def _wait_for_selector_attached( - page: Page, selector: str, *, timeout: int - ) -> ElementHandle | None: - try: - return await page.wait_for_selector( - selector, state="attached", timeout=timeout - ) - except PlaywrightTimeoutError: - return None - - @staticmethod - async def _wait_for_browser_window_id_silently(page: Page, *, timeout: int) -> str: - selectors = [ - Narada._BROWSER_WINDOW_ID_SELECTOR, - Narada._UNSUPPORTED_BROWSER_INDICATOR_SELECTOR, - Narada._EXTENSION_MISSING_INDICATOR_SELECTOR, - Narada._EXTENSION_UNAUTHENTICATED_INDICATOR_SELECTOR, - Narada._INITIALIZATION_ERROR_INDICATOR_SELECTOR, - ] - tasks: list[asyncio.Task[ElementHandle | None]] = [ - asyncio.create_task( - Narada._wait_for_selector_attached(page, selector, timeout=timeout) - ) - for selector in selectors - ] - ( - browser_window_id_task, - unsupported_browser_indicator_task, - extension_missing_indicator_task, - extension_unauthenticated_indicator_task, - initialization_error_indicator_task, - ) = tasks - - done, pending = await asyncio.wait( - tasks, timeout=timeout, return_when=asyncio.FIRST_COMPLETED - ) - - for task in pending: - task.cancel() - - if len(done) == 0: - raise NaradaTimeoutError("Timed out waiting for browser window ID") - - for task in done: - if task == browser_window_id_task: - browser_window_id_elem = task.result() - if browser_window_id_elem is None: - raise NaradaTimeoutError("Timed out waiting for browser window ID") - - browser_window_id = await browser_window_id_elem.text_content() - if browser_window_id is None: - raise NaradaInitializationError("Browser window ID is empty") - - return browser_window_id - - # TODO: Create custom exception types for these cases. - if task == unsupported_browser_indicator_task and task.result() is not None: - raise NaradaUnsupportedBrowserError("Unsupported browser") - - if task == extension_missing_indicator_task and task.result() is not None: - raise NaradaExtensionMissingError("Narada extension missing") - - if ( - task == extension_unauthenticated_indicator_task - and task.result() is not None - ): - raise NaradaExtensionUnauthenticatedError( - "Sign in to the Narada extension first" - ) - - if ( - task == initialization_error_indicator_task - and task.result() is not None - ): - raise NaradaInitializationError("Initialization error") - - assert_never() - - async def _wait_for_browser_window_id_interactively( - self, page: Page, *, per_attempt_timeout: int - ) -> str: - try: - while True: - try: - return await Narada._wait_for_browser_window_id_silently( - page, timeout=per_attempt_timeout - ) - except NaradaExtensionMissingError: - self._console.input( - "\n[bold]>[/bold] [bold blue]The Narada Enterprise extension is not " - "installed. Please follow the instructions in the browser window to " - "install it first, then press Enter to continue.[/bold blue]\n", - ) - except NaradaExtensionUnauthenticatedError: - self._console.input( - "\n[bold]>[/bold] [bold blue]Please sign in to the Narada extension first, " - "then press Enter to continue.[/bold blue]", - ) - - # Bring the page to the front and wait a little bit before refreshing it, as this - # page needs to be the active tab in order to automatically open the side panel. - await page.bring_to_front() - await asyncio.sleep(0.1) - await page.reload() - - except PlaywrightError: - self._console.print( - "\n[bold]>[/bold] [bold red]It seems the Narada automation page was closed. Please " - "retry the action and keep the Narada web page open.[/bold red]", - ) - sys.exit(1) - - async def _wait_for_browser_window_id( - self, - initialization_page: Page, - config: BrowserConfig, - timeout: int = 30_000, - ) -> str: - """Waits for the browser window ID to be available, potentially letting the user respond to - recoverable errors interactively. - """ - if config.interactive: - return await self._wait_for_browser_window_id_interactively( - initialization_page, per_attempt_timeout=timeout - ) - else: - return await Narada._wait_for_browser_window_id_silently( - initialization_page, timeout=timeout - ) - - async def _setup_proxy_authentication_browser_level( - self, browser: Browser, proxy_config: ProxyConfig - ) -> CDPSession: - """Sets up proxy authentication handling via CDP at the browser level. - - This uses a browser-level CDP session which can intercept auth challenges before they reach - individual pages, preventing Chrome from showing the proxy authentication dialog. - - Chrome caches proxy credentials for the session after the first successful authentication. - The caller should detach the returned CDP session after the first navigation succeeds. - """ - cdp_session = await browser.new_browser_cdp_session() - - # Enable Fetch domain with a catch-all pattern to intercept auth challenges. - await cdp_session.send( - "Fetch.enable", - { - "handleAuthRequests": True, - "patterns": [{"urlPattern": "*"}], - }, - ) - - async def handle_auth(params: dict[str, Any]) -> None: - request_id = params.get("requestId") - auth_challenge = params.get("authChallenge", {}) - - # Only handle proxy auth challenges - if auth_challenge.get("source") != "Proxy": - return - - try: - await cdp_session.send( - "Fetch.continueWithAuth", - { - "requestId": request_id, - "authChallengeResponse": { - "response": "ProvideCredentials", - "username": proxy_config.username, - "password": proxy_config.password, - }, - }, - ) - logging.debug("Browser-level proxy authentication credentials provided") - except Exception as e: - logging.error("Failed to respond to proxy auth challenge: %s", e) - - async def handle_request_paused(params: dict[str, Any]) -> None: - # Continue all paused requests immediately - request_id = params.get("requestId") - try: - await cdp_session.send( - "Fetch.continueRequest", {"requestId": request_id} - ) - except Exception: - pass - - cdp_session.on( - "Fetch.authRequired", - lambda params: asyncio.create_task(handle_auth(params)), - ) - cdp_session.on( - "Fetch.requestPaused", - lambda params: asyncio.create_task(handle_request_paused(params)), - ) - - return cdp_session - - async def _fix_download_behavior(self, side_panel_page: Page) -> None: - """Reverts the download behavior to the default behavior for the extension, otherwise our - extension cannot download files. - """ - cdp_session = await side_panel_page.context.new_cdp_session(side_panel_page) - await cdp_session.send("Page.setDownloadBehavior", {"behavior": "default"}) - await cdp_session.detach() - - def _print_success_message(self, browser_window_id: str) -> None: - self._console.print( - "\n[bold]>[/bold] [bold green]Initialization successful. Browser window ID: " - f"{browser_window_id}[/bold green]\n", - ) diff --git a/packages/narada/src/narada/environment.py b/packages/narada/src/narada/environment.py new file mode 100644 index 0000000..f8b26b4 --- /dev/null +++ b/packages/narada/src/narada/environment.py @@ -0,0 +1,1782 @@ +from __future__ import annotations + +import asyncio +import inspect +import json +import logging +import mimetypes +import os +import subprocess +import sys +import time +from abc import ABC +from dataclasses import dataclass +from http import HTTPStatus +from io import IOBase +from pathlib import Path +from typing import ( + IO, + Any, + Awaitable, + Callable, + Literal, + Mapping, + TypedDict, + TypeGuard, + TypeVar, + cast, + overload, + override, +) +from uuid import uuid4 + +import aiohttp +from narada_core.actions.models import ( + ActiveInputRequest, + CloseWindowRequest, + ExtensionActionRequest, + ExtensionActionResponse, +) +from narada_core.errors import ( + NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, + NaradaError, + NaradaExtensionMissingError, + NaradaExtensionUnauthenticatedError, + NaradaInitializationError, + NaradaTimeoutError, + NaradaUnsupportedBrowserError, + UserAbortedError, +) +from narada_core.models import ( + AgentKind, + File, + McpServer, + ReasoningEffort, + RemoteDispatchChatHistoryItem, + Response, + UserResourceCredentials, + _RemoteDispatchPollResponse, + _SdkConfig, +) +from packaging.version import Version +from playwright._impl._errors import Error as PlaywrightError +from playwright.async_api import ( + Browser, + BrowserContext, + CDPSession, + ElementHandle, + Page, + Playwright, + async_playwright, +) +from playwright.async_api import TimeoutError as PlaywrightTimeoutError +from playwright.async_api._context_manager import PlaywrightContextManager +from pydantic import BaseModel, ValidationError +from rich.console import Console + +from narada.config import BrowserConfig, ProxyConfig +from narada.utils import assert_never, assert_not_none +from narada.version import __version__ + +logger = logging.getLogger(__name__) + +_StructuredOutput = TypeVar("_StructuredOutput", bound=BaseModel) + + +_ResponseModel = TypeVar("_ResponseModel", bound=BaseModel) + +# Optional remote-dispatch context. In frontend Pyodide runs, these are generated +# by prepare-code.ts; extension-action calls forward them so the parent request +# can report active input-required status. +_REMOTE_DISPATCH_REQUEST_ID_ENV_VAR = "NARADA_REMOTE_DISPATCH_REQUEST_ID" +_REMOTE_DISPATCH_API_KEY_ID_ENV_VAR = "NARADA_REMOTE_DISPATCH_API_KEY_ID" +_BROWSER_WINDOW_ID_SELECTOR = "#narada-browser-window-id" +_UNSUPPORTED_BROWSER_INDICATOR_SELECTOR = "#narada-unsupported-browser" +_EXTENSION_MISSING_INDICATOR_SELECTOR = "#narada-extension-missing" +_EXTENSION_UNAUTHENTICATED_INDICATOR_SELECTOR = "#narada-extension-unauthenticated" +_INITIALIZATION_ERROR_INDICATOR_SELECTOR = "#narada-initialization-error" + +type InputRequiredCallback = Callable[[ActiveInputRequest], Awaitable[None] | None] + + +async def _notify_input_required_callback( + callback: InputRequiredCallback | None, + response: _RemoteDispatchPollResponse, + seen_input_ids: set[str], +) -> None: + if callback is None or response.get("status") != "input-required": + return + + active_input_request_data = response.get("activeInputRequest") + if active_input_request_data is None: + return + + active_input_request = ActiveInputRequest.model_validate(active_input_request_data) + if active_input_request.input_id in seen_input_ids: + return + + seen_input_ids.add(active_input_request.input_id) + callback_result = callback(active_input_request) + if inspect.isawaitable(callback_result): + await callback_result + + +class _InputVariableFileReference(TypedDict): + source: Literal["remoteDispatchUpload"] + id: str + filename: str + mimeType: str + + +type _JsonPrimitive = str | int | float | bool | None +type _InputVariableValue = ( + _JsonPrimitive + | IOBase + | list["_InputVariableValue"] + | dict[str, "_InputVariableValue"] +) +type _InputVariables = dict[str, _InputVariableValue] +type _NormalizedInputVariableValue = ( + _JsonPrimitive + | _InputVariableFileReference + | list["_NormalizedInputVariableValue"] + | dict[str, "_NormalizedInputVariableValue"] +) +type _NormalizedInputVariables = dict[str, _NormalizedInputVariableValue] + + +class _PresignedPost(BaseModel): + url: str + fields: dict[str, Any] + + +@dataclass +class SessionDownloadItem: + """A file downloaded during a cloud browser session (file name, size, presigned GET URL).""" + + file_name: str + size: int + download_url: str + + +@dataclass +class _LaunchBrowserResult: + browser_process_id: int + browser_window_id: str + side_panel_page: Page + + +class ApiErrorPayload(BaseModel): + detail: Any | None = None + + @classmethod + def from_error_text(cls, error_text: str | None) -> ApiErrorPayload: + if not error_text: + return cls() + + try: + return cls.model_validate_json(error_text) + except ValidationError: + try: + body = json.loads(error_text) + except (ValueError, TypeError): + return cls() + + if isinstance(body, dict): + return cls(detail=body.get("detail", body)) + + return cls() + + +class _BrowserInitializationHelper: + def __init__(self, *, console: Console) -> None: + self._console = console + + @staticmethod + async def wait_for_selector_attached( + page: Page, selector: str, *, timeout: int + ) -> ElementHandle | None: + try: + return await page.wait_for_selector( + selector, state="attached", timeout=timeout + ) + except PlaywrightTimeoutError: + return None + + @staticmethod + async def wait_for_browser_window_id_silently(page: Page, *, timeout: int) -> str: + selectors = [ + _BROWSER_WINDOW_ID_SELECTOR, + _UNSUPPORTED_BROWSER_INDICATOR_SELECTOR, + _EXTENSION_MISSING_INDICATOR_SELECTOR, + _EXTENSION_UNAUTHENTICATED_INDICATOR_SELECTOR, + _INITIALIZATION_ERROR_INDICATOR_SELECTOR, + ] + tasks: list[asyncio.Task[ElementHandle | None]] = [ + asyncio.create_task( + _BrowserInitializationHelper.wait_for_selector_attached( + page, selector, timeout=timeout + ) + ) + for selector in selectors + ] + ( + browser_window_id_task, + unsupported_browser_indicator_task, + extension_missing_indicator_task, + extension_unauthenticated_indicator_task, + initialization_error_indicator_task, + ) = tasks + + done, pending = await asyncio.wait( + tasks, timeout=timeout, return_when=asyncio.FIRST_COMPLETED + ) + + for task in pending: + task.cancel() + + if len(done) == 0: + raise NaradaTimeoutError("Timed out waiting for browser window ID") + + for task in done: + if task == browser_window_id_task: + browser_window_id_elem = task.result() + if browser_window_id_elem is None: + raise NaradaTimeoutError("Timed out waiting for browser window ID") + + browser_window_id = await browser_window_id_elem.text_content() + if browser_window_id is None: + raise NaradaInitializationError("Browser window ID is empty") + + return browser_window_id + + # TODO: Create custom exception types for these cases. + if task == unsupported_browser_indicator_task and task.result() is not None: + raise NaradaUnsupportedBrowserError("Unsupported browser") + + if task == extension_missing_indicator_task and task.result() is not None: + raise NaradaExtensionMissingError("Narada extension missing") + + if ( + task == extension_unauthenticated_indicator_task + and task.result() is not None + ): + raise NaradaExtensionUnauthenticatedError( + "Sign in to the Narada extension first" + ) + + if ( + task == initialization_error_indicator_task + and task.result() is not None + ): + raise NaradaInitializationError("Initialization error") + + assert_never() + + async def wait_for_browser_window_id_interactively( + self, page: Page, *, per_attempt_timeout: int + ) -> str: + try: + while True: + try: + return await _BrowserInitializationHelper.wait_for_browser_window_id_silently( + page, timeout=per_attempt_timeout + ) + except NaradaExtensionMissingError: + self._console.input( + "\n[bold]>[/bold] [bold blue]The Narada Enterprise extension is not " + "installed. Please follow the instructions in the browser window to " + "install it first, then press Enter to continue.[/bold blue]\n", + ) + except NaradaExtensionUnauthenticatedError: + self._console.input( + "\n[bold]>[/bold] [bold blue]Please sign in to the Narada extension first, " + "then press Enter to continue.[/bold blue]", + ) + + # Bring the page to the front and wait a little bit before refreshing it, as this + # page needs to be the active tab in order to automatically open the side panel. + await page.bring_to_front() + await asyncio.sleep(0.1) + await page.reload() + + except PlaywrightError: + self._console.print( + "\n[bold]>[/bold] [bold red]It seems the Narada automation page was closed. Please " + "retry the action and keep the Narada web page open.[/bold red]", + ) + sys.exit(1) + + async def wait_for_browser_window_id( + self, + initialization_page: Page, + config: BrowserConfig, + timeout: int = 30_000, + ) -> str: + """Waits for the browser window ID to be available, potentially letting the user respond to + recoverable errors interactively. + """ + if config.interactive: + return await self.wait_for_browser_window_id_interactively( + initialization_page, per_attempt_timeout=timeout + ) + else: + return ( + await _BrowserInitializationHelper.wait_for_browser_window_id_silently( + initialization_page, timeout=timeout + ) + ) + + def print_success_message(self, browser_window_id: str) -> None: + self._console.print( + "\n[bold]>[/bold] [bold green]Initialization successful. Browser window ID: " + f"{browser_window_id}[/bold green]\n", + ) + + +class Environment(ABC): + _auth_headers: dict[str, str] + _base_url: str + _initialized: bool + _init_lock: asyncio.Lock | None + + def __init__( + self, + *, + api_key: str | None = None, + auth_headers: dict[str, str] | None = None, + base_url: str | None = None, + ) -> None: + if auth_headers is not None: + self._auth_headers = auth_headers + else: + api_key = api_key or os.environ["NARADA_API_KEY"] + self._auth_headers = {"x-api-key": api_key} + self._base_url = base_url or os.getenv( + "NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2" + ) + self._console = Console() + self._initialized = False + self._init_lock = None + + @property + def cloud_browser_session_id(self) -> str | None: + """Cloud browser session backing this environment, if any. + + Remote dispatch includes this value so backend observability can link a client-mode run to + an existing SDK-owned cloud browser. Plain local environments are not cloud-backed and + return `None`; cloud-backed subclasses override this property with their session ID. + """ + return None + + async def start(self) -> None: + """Initializes the environment eagerly. + + Initialization is also performed lazily by `Agent.run()` and browser actions. Reusing the + same environment instance reuses the initialized target. + """ + await self._ensure_initialized() + + async def _ensure_initialized(self) -> None: + if self._initialized: + return + + if self._init_lock is None: + self._init_lock = asyncio.Lock() + + async with self._init_lock: + if self._initialized: + return + + if self._validates_sdk_config: + await self._validate_sdk_config() + await self._initialize() + self._initialized = True + + @property + def _validates_sdk_config(self) -> bool: + return True + + async def _initialize(self) -> None: + pass + + async def close(self, *, timeout: int | None = None) -> None: + await self._close_impl(timeout=timeout) + + async def _close_impl(self, *, timeout: int | None = None) -> None: + pass + + @property + def _dispatch_browser_window_id(self) -> str | None: + return None + + async def _fetch_sdk_config(self) -> _SdkConfig | None: + url = f"{self._base_url}/sdk/config" + + try: + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=self._auth_headers) as resp: + if not resp.ok: + logging.warning( + "Failed to fetch SDK config: %s %s", + resp.status, + await resp.text(), + ) + return None + + return _SdkConfig.model_validate(await resp.json()) + except Exception as e: + logging.warning("Failed to fetch SDK config: %s", e) + return None + + async def _validate_sdk_config(self) -> None: + config = await self._fetch_sdk_config() + if config is None: + return + + package_config = config.packages["narada"] + current_version = Version(__version__) + min_required_version = Version(package_config.min_required_version) + if current_version < min_required_version: + raise RuntimeError( + f"narada<={__version__} is not supported. Please upgrade to version " + f"{package_config.min_required_version} or higher." + ) + + async def _upload_file_impl(self, *, file: IO[Any]) -> File: + await self._ensure_initialized() + # Get the base filename without directories. + filename = Path(file.name).name + + seekable = getattr(file, "seekable", None) + if callable(seekable) and seekable(): + file.seek(0) + + async with aiohttp.ClientSession() as session: + # First generate a presigned POST for uploading the file. + async with session.post( + f"{self._base_url}/remote-dispatch/generate-file-upload-presigned-post", + headers=self._auth_headers, + json={"filename": filename}, + ) as resp: + resp.raise_for_status() + resp_json = await resp.json() + + presigned_post = _PresignedPost.model_validate(resp_json) + object_key: str = presigned_post.fields["key"] + + # Upload the file with a POST request where: + # - The URL is the presigned POST URL. + # - The form fields are the presigned POST fields. + # - The form data has an addition 'file' field that contains the file contents. + form_data = aiohttp.FormData(presigned_post.fields) + form_data.add_field("file", file) + async with session.post(presigned_post.url, data=form_data) as resp: + resp.raise_for_status() + + return File(key=object_key) + + async def _normalize_input_variables( + self, *, input_variables: Mapping[str, Any] + ) -> _NormalizedInputVariables: + normalized: _NormalizedInputVariables = {} + for key, value in input_variables.items(): + normalized[key] = await self._normalize_input_variables_value_impl( + input_variable_value=value + ) + return normalized + + async def _normalize_input_variables_value_impl( + self, *, input_variable_value: Any + ) -> _NormalizedInputVariableValue: + if isinstance(input_variable_value, list): + return [ + await self._normalize_input_variables_value_impl( + input_variable_value=item + ) + for item in input_variable_value + ] + + if self._is_uploadable_file(input_variable_value): + return await self._upload_input_variable_file( + input_variable_value=input_variable_value + ) + + if isinstance(input_variable_value, dict): + normalized: dict[str, _NormalizedInputVariableValue] = {} + for key, value in input_variable_value.items(): + normalized[key] = await self._normalize_input_variables_value_impl( + input_variable_value=value + ) + return normalized + + return input_variable_value + + @staticmethod + def _is_uploadable_file(value: Any) -> TypeGuard[IO[Any]]: + # Keep runtime eligibility aligned with the existing file-upload transport. + return isinstance(value, IOBase) and hasattr(value, "name") + + async def _upload_input_variable_file( + self, *, input_variable_value: IO[Any] + ) -> _InputVariableFileReference: + filename = Path(input_variable_value.name).name + uploaded_file = await self._upload_file_impl(file=input_variable_value) + mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream" + return { + "source": "remoteDispatchUpload", + "id": uploaded_file["key"], + "filename": filename, + "mimeType": mime_type, + } + + # `reasoning` is only valid with the Core Agent; these two overloads make + # that constraint type-checkable. Generic-agent calls fall through to the + # general overloads below, which do not accept a `reasoning` argument. + @overload + async def _dispatch_request( + self, + *, + prompt: str, + agent: Literal[AgentKind.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + timeout: int = 1000, + ) -> Response[None]: ... + + @overload + async def _dispatch_request( + self, + *, + prompt: str, + agent: Literal[AgentKind.CORE_AGENT], + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + timeout: int = 1000, + ) -> Response[_StructuredOutput]: ... + + @overload + async def _dispatch_request( + self, + *, + prompt: str, + agent: AgentKind | str = AgentKind.OPERATOR, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + timeout: int = 1000, + ) -> Response[None]: ... + + @overload + async def _dispatch_request( + self, + *, + prompt: str, + agent: AgentKind | str = AgentKind.OPERATOR, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[_StructuredOutput], + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + timeout: int = 1000, + ) -> Response[_StructuredOutput]: ... + + async def _dispatch_request( + self, + *, + prompt: str, + agent: AgentKind | str = AgentKind.OPERATOR, + reasoning: ReasoningEffort | None = None, + clear_chat: bool | None = None, + generate_gif: bool | None = None, + output_schema: type[BaseModel] | None = None, + previous_request_id: str | None = None, + chat_history: list[RemoteDispatchChatHistoryItem] | None = None, + additional_context: dict[str, str] | None = None, + attachment: File | IO[Any] | None = None, + time_zone: str = "America/Los_Angeles", + user_resource_credentials: UserResourceCredentials | None = None, + mcp_servers: list[McpServer] | None = None, + secret_variables: dict[str, str] | None = None, + input_variables: Mapping[str, Any] | None = None, + critic_context: dict[str, Any] | None = None, + callback_url: str | None = None, + callback_secret: str | None = None, + callback_headers: Mapping[str, Any] | None = None, + on_input_required: InputRequiredCallback | None = None, + timeout: int = 1000, + ) -> Response: + """Low-level API for invoking an agent in the Narada extension side panel chat. + + The higher-level `Agent.run` method should be preferred for most use cases. + """ + await self._ensure_initialized() + + # The overloads enforce this at type-check time when callers use + # ``AgentKind.CORE_AGENT``; the runtime check covers string-form agents + # (``agent="..."``) and callers without a type checker. + if reasoning is not None and agent is not AgentKind.CORE_AGENT: + raise ValueError( + "`reasoning` is only supported with `agent=AgentKind.CORE_AGENT` " + f"(got agent={agent!r})" + ) + deadline = time.monotonic() + timeout + + agent_prefix = ( + agent.prompt_prefix() if isinstance(agent, AgentKind) else f"{agent} " + ) + body: dict[str, Any] = { + "prompt": agent_prefix + prompt, + "timeZone": time_zone, + } + browser_window_id = self._dispatch_browser_window_id + if browser_window_id is not None: + body["browserWindowId"] = browser_window_id + cloud_browser_session_id = self.cloud_browser_session_id + if cloud_browser_session_id is not None: + body["cloudBrowserSessionId"] = cloud_browser_session_id + if clear_chat is not None: + body["clearChat"] = clear_chat + if generate_gif is not None: + body["saveScreenshots"] = generate_gif + if output_schema is not None: + body["responseFormat"] = { + "type": "jsonSchema", + "jsonSchema": output_schema.model_json_schema(), + } + if previous_request_id is not None: + body["previousRequestId"] = previous_request_id + if chat_history is not None: + body["chatHistory"] = chat_history + if additional_context is not None: + body["additionalContext"] = additional_context + if attachment is not None: + if self._is_uploadable_file(attachment): + body["attachment"] = await self._upload_file_impl(file=attachment) + else: + body["attachment"] = attachment + if user_resource_credentials is not None: + body["userResourceCredentials"] = user_resource_credentials + if mcp_servers is not None: + body["mcpServers"] = [ + server.model_dump(mode="json") for server in mcp_servers + ] + if secret_variables is not None: + body["secretVariables"] = secret_variables + if input_variables is not None: + body["inputVariables"] = await self._normalize_input_variables( + input_variables=input_variables + ) + if critic_context is not None: + body["criticContext"] = critic_context + if callback_url is not None: + body["callbackUrl"] = callback_url + if callback_secret is not None: + body["callbackSecret"] = callback_secret + if callback_headers is not None: + body["callbackHeaders"] = callback_headers + if reasoning is not None: + body["reasoningMode"] = reasoning.value + + try: + seen_input_ids: set[str] = set() + async with aiohttp.ClientSession() as session: + async with session.post( + f"{self._base_url}/remote-dispatch", + headers=self._auth_headers, + json=body, + timeout=aiohttp.ClientTimeout(total=timeout), + ) as resp: + resp.raise_for_status() + request_id = (await resp.json())["requestId"] + + while (now := time.monotonic()) < deadline: + async with session.get( + f"{self._base_url}/remote-dispatch/responses/{request_id}", + headers=self._auth_headers, + timeout=aiohttp.ClientTimeout(total=deadline - now), + ) as resp: + resp.raise_for_status() + response: _RemoteDispatchPollResponse = await resp.json() + + response["requestId"] = request_id + + if response["completedAt"] is None: + await _notify_input_required_callback( + on_input_required, + response, + seen_input_ids, + ) + # Poll every 3 seconds. + await asyncio.sleep(3) + continue + + response_content = response["response"] + if response_content is not None: + # Populate the `structuredOutput` field. This is a client-side field + # that's not directly returned by the API. + output_data = response_content.get("output") + if ( + output_schema is not None + and output_data is not None + and output_data.get("type") == "structured" + ): + response_content["structuredOutput"] = ( + output_schema.model_validate(output_data["content"]) + ) + else: + response_content["structuredOutput"] = None + + return cast(Response, response) + else: + raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) + + except asyncio.TimeoutError: + raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) + + @overload + async def _run_extension_action( + self, + request: ExtensionActionRequest, + response_model: None = None, + *, + timeout: int | None = None, + ) -> None: ... + + @overload + async def _run_extension_action( + self, + request: ExtensionActionRequest, + response_model: type[_ResponseModel], + *, + timeout: int | None = None, + ) -> _ResponseModel: ... + + async def _run_extension_action( + self, + request: ExtensionActionRequest, + response_model: type[_ResponseModel] | None = None, + *, + timeout: int | None = None, + ) -> _ResponseModel | None: + await self._ensure_initialized() + browser_window_id = self._dispatch_browser_window_id + if browser_window_id is None: + raise NaradaError( + f"{type(self).__name__} does not support browser extension actions" + ) + body = { + "action": request.model_dump(), + "browserWindowId": browser_window_id, + } + remote_dispatch_request_id = os.environ.get(_REMOTE_DISPATCH_REQUEST_ID_ENV_VAR) + if remote_dispatch_request_id is not None: + body["requestId"] = remote_dispatch_request_id + remote_dispatch_api_key_id = os.environ.get(_REMOTE_DISPATCH_API_KEY_ID_ENV_VAR) + if remote_dispatch_api_key_id is not None: + body["apiKeyId"] = remote_dispatch_api_key_id + if timeout is not None: + body["timeout"] = timeout + + async with aiohttp.ClientSession() as session: + async with session.post( + f"{self._base_url}/extension-actions", + headers=self._auth_headers, + json=body, + # Don't specify `timeout` here as the (soft) timeout is handled by the server. + ) as resp: + if resp.status == HTTPStatus.GATEWAY_TIMEOUT: + raise NaradaTimeoutError + resp.raise_for_status() + resp_json = await resp.json() + + response = ExtensionActionResponse.model_validate(resp_json) + if response.status == "error": + raise NaradaError(response.error) + if response.status == "aborted": + raise UserAbortedError + + if response_model is None: + return None + + assert response.data is not None + return response_model.model_validate_json(response.data) + + +class BaseBrowserEnvironment(Environment): + _browser_window_id: str | None + + def __init__( + self, + *, + api_key: str | None = None, + auth_headers: dict[str, str] | None = None, + base_url: str | None = None, + browser_window_id: str | None = None, + ) -> None: + super().__init__( + api_key=api_key, + auth_headers=auth_headers, + base_url=base_url, + ) + self._browser_window_id = browser_window_id + if browser_window_id is not None: + self._initialized = True + + @property + def browser_window_id(self) -> str: + if self._browser_window_id is None: + raise RuntimeError( + "Browser environment is not initialized yet. Call `await env.start()` " + "or run an agent action first." + ) + return self._browser_window_id + + @property + def _dispatch_browser_window_id(self) -> str | None: + return self.browser_window_id + + +class BrowserEnvironment(BaseBrowserEnvironment): + _browser_process_id: int | None + _config: BrowserConfig + _context: BrowserContext | None + + def __init__( + self, + *, + api_key: str | None = None, + auth_headers: dict[str, str] | None = None, + config: BrowserConfig | None = None, + attach_to_existing: bool = False, + ) -> None: + super().__init__( + api_key=api_key, + auth_headers=auth_headers, + ) + self._browser_process_id = None + self._config = config or BrowserConfig() + self._context = None + self._attach_to_existing = attach_to_existing + self._playwright_context_manager: PlaywrightContextManager | None = None + self._playwright: Playwright | None = None + self._browser_initialization = _BrowserInitializationHelper( + console=self._console + ) + + @property + def browser_process_id(self) -> int | None: + return self._browser_process_id + + def __str__(self) -> str: + return ( + "BrowserEnvironment(" + f"browser_process_id={self._browser_process_id}, " + f"browser_window_id={self._browser_window_id}" + ")" + ) + + async def _initialize(self) -> None: + self._playwright_context_manager = async_playwright() + self._playwright = await self._playwright_context_manager.__aenter__() + if self._attach_to_existing: + await self._initialize_in_existing_browser_window() + else: + await self._open_and_initialize_browser_window() + + async def reset_agent_state(self) -> None: + await self._ensure_initialized() + assert self._context is not None + side_panel_url = create_side_panel_url(self._config, self.browser_window_id) + side_panel_page = next( + p for p in self._context.pages if p.url == side_panel_url + ) + + # Refresh the extension side panel, which ensures any inflight Narada operations are + # canceled. + await side_panel_page.reload() + + @override + async def _close_impl(self, *, timeout: int | None = None) -> None: + try: + if self._initialized and self._browser_window_id is not None: + await self._run_extension_action(CloseWindowRequest(), timeout=timeout) + finally: + await self._stop_playwright() + + async def _stop_playwright(self) -> None: + if self._playwright_context_manager is None: + return + + await self._playwright_context_manager.__aexit__(None, None, None) + self._playwright_context_manager = None + self._playwright = None + + async def _open_and_initialize_browser_window(self) -> None: + assert self._playwright is not None + launch_browser_result = await self._launch_browser( + self._playwright, self._config + ) + side_panel_page = launch_browser_result.side_panel_page + + await self._fix_download_behavior(side_panel_page) + + self._browser_process_id = launch_browser_result.browser_process_id + self._browser_window_id = launch_browser_result.browser_window_id + self._context = side_panel_page.context + + async def _initialize_in_existing_browser_window(self) -> None: + """Initializes the Narada extension in an existing browser window. + + This method connects to an existing browser process via CDP and performs the same + initialization logic as a launched browser, but without launching a new browser process. + """ + assert self._playwright is not None + + if self._config.proxy is not None: + raise ValueError( + "Proxy configuration is not supported for `BrowserEnvironment(..., " + "attach_to_existing=True)`. Proxy settings must be specified when launching " + "Chrome. Use `BrowserEnvironment` without `attach_to_existing` instead." + ) + + browser = await self._playwright.chromium.connect_over_cdp(self._config.cdp_url) + + # Generate a unique tag for the initialization URL + window_tag = uuid4().hex + tagged_initialization_url = f"{self._config.initialization_url}?t={window_tag}" + + # Open the initialization page in a new tab in the default context. + context = browser.contexts[0] + initialization_page = await context.new_page() + await initialization_page.goto(tagged_initialization_url) + + browser_window_id = await self._wait_for_browser_window_id( + initialization_page, self._config + ) + + # Playwright seems unable to pick up the side panel page that is automatically opened by the + # initialization page. We need to establish a new CDP connection to the browser *after* the + # side panel page is opened for Playwright to see it. + await browser.close() + browser = await self._playwright.chromium.connect_over_cdp(self._config.cdp_url) + context = browser.contexts[0] + + side_panel_url = create_side_panel_url(self._config, browser_window_id) + side_panel_page = next(p for p in context.pages if p.url == side_panel_url) + + await self._fix_download_behavior(side_panel_page) + + if self._config.interactive: + self._print_success_message(browser_window_id) + + self._browser_process_id = None + self._browser_window_id = browser_window_id + self._context = context + + async def _launch_browser( + self, playwright: Playwright, config: BrowserConfig + ) -> _LaunchBrowserResult: + # A unique tag is appended to the initialization URL so that we can find the new page that + # was opened, since otherwise when more than one initialization page is opened in the same + # browser instance, we wouldn't be able to tell them apart. + window_tag = uuid4().hex + tagged_initialization_url = f"{config.initialization_url}?t={window_tag}" + + # When proxy auth is needed, launch with about:blank to avoid Chrome's startup auth prompt. + # We'll set up the CDP auth handler and then navigate to the init URL. + proxy_requires_auth = ( + config.proxy is not None and config.proxy.requires_authentication + ) + launch_url = "about:blank" if proxy_requires_auth else tagged_initialization_url + + browser_args = [ + f"--user-data-dir={config.user_data_dir}", + f"--profile-directory={config.profile_directory}", + f"--remote-debugging-port={config.cdp_port}", + "--no-default-browser-check", + "--no-first-run", + "--new-window", + launch_url, + ] + + # Add proxy arguments if configured. + if config.proxy is not None: + config.proxy.validate() + browser_args.append(f"--proxy-server={config.proxy.server}") + + if config.proxy.bypass: + browser_args.append(f"--proxy-bypass-list={config.proxy.bypass}") + + if config.proxy.ignore_cert_errors: + browser_args.append("--ignore-certificate-errors") + + # Launch an independent browser process which will not be killed when the current program + # exits. + if sys.platform == "win32": + browser_process = subprocess.Popen( + [config.executable_path, *browser_args], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + creationflags=subprocess.CREATE_NEW_PROCESS_GROUP + | subprocess.DETACHED_PROCESS, + ) + else: + browser_process = await asyncio.create_subprocess_exec( + config.executable_path, + *browser_args, + stdin=asyncio.subprocess.DEVNULL, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + start_new_session=True, + ) + + logging.debug("Browser process started with PID: %s", browser_process.pid) + + # We need to wait a bit for the initial page to open before connecting to the browser over + # CDP, otherwise Playwright can see an empty context with no pages. + await asyncio.sleep(2) + + browser_window_id = None + side_panel_page = None + max_cdp_connect_attempts = 10 + + # Track whether we've already navigated from about:blank to the initialization URL. + # This is only relevant when proxy auth is enabled, where we launch with about:blank + # to set up CDP auth handlers before any network traffic. We must only navigate once, + # because on retry iterations context.pages[0] could be any page (side panel, devtools, + # etc.) and navigating it would break the initialization flow. + did_initial_navigation = False + + for attempt in range(max_cdp_connect_attempts): + try: + browser = await playwright.chromium.connect_over_cdp(config.cdp_url) + except Exception: + # The browser process might not be immediately ready to accept CDP connections. + # Retry a few times before giving up. + if attempt == max_cdp_connect_attempts - 1: + raise + await asyncio.sleep(2) + continue + + context = browser.contexts[0] + + # If proxy auth is needed, set up the handler at browser level then navigate to the + # initialization page. After navigation succeeds, Chrome has cached the proxy + # credentials, so we can detach the CDP session. + if proxy_requires_auth and not did_initial_navigation: + proxy_cdp_session = ( + await self._setup_proxy_authentication_browser_level( + browser, + # Not None because `proxy_requires_auth` is True. + assert_not_none(config.proxy), + ) + ) + blank_page = context.pages[0] + await blank_page.goto(tagged_initialization_url) + await proxy_cdp_session.detach() + did_initial_navigation = True + + # Grab the browser window ID from the page we just opened. + initialization_page = next( + (p for p in context.pages if p.url == tagged_initialization_url), None + ) + if initialization_page is not None: + browser_window_id = await self._wait_for_browser_window_id( + initialization_page, config + ) + + side_panel_url = create_side_panel_url(config, browser_window_id) + side_panel_page = next( + (p for p in context.pages if p.url == side_panel_url), None + ) + if side_panel_page is not None: + break + + if attempt == max_cdp_connect_attempts - 1: + raise NaradaTimeoutError("Timed out waiting for initialization page") + + # Close the current CDP connection and try again. + await browser.close() + await asyncio.sleep(3) + + # These are impossible as we would've raised an exception above otherwise. + assert browser_window_id is not None + assert side_panel_page is not None + + if config.interactive: + self._print_success_message(browser_window_id) + + return _LaunchBrowserResult( + browser_process_id=browser_process.pid, + browser_window_id=browser_window_id, + side_panel_page=side_panel_page, + ) + + @staticmethod + async def _wait_for_selector_attached( + page: Page, selector: str, *, timeout: int + ) -> ElementHandle | None: + return await _BrowserInitializationHelper.wait_for_selector_attached( + page, selector, timeout=timeout + ) + + @staticmethod + async def _wait_for_browser_window_id_silently(page: Page, *, timeout: int) -> str: + return await _BrowserInitializationHelper.wait_for_browser_window_id_silently( + page, timeout=timeout + ) + + async def _wait_for_browser_window_id_interactively( + self, page: Page, *, per_attempt_timeout: int + ) -> str: + return ( + await self._browser_initialization.wait_for_browser_window_id_interactively( + page, per_attempt_timeout=per_attempt_timeout + ) + ) + + async def _wait_for_browser_window_id( + self, + initialization_page: Page, + config: BrowserConfig, + timeout: int = 30_000, + ) -> str: + return await self._browser_initialization.wait_for_browser_window_id( + initialization_page, config, timeout=timeout + ) + + async def _setup_proxy_authentication_browser_level( + self, browser: Browser, proxy_config: ProxyConfig + ) -> CDPSession: + """Sets up proxy authentication handling via CDP at the browser level. + + This uses a browser-level CDP session which can intercept auth challenges before they reach + individual pages, preventing Chrome from showing the proxy authentication dialog. + + Chrome caches proxy credentials for the session after the first successful authentication. + The caller should detach the returned CDP session after the first navigation succeeds. + """ + cdp_session = await browser.new_browser_cdp_session() + + # Enable Fetch domain with a catch-all pattern to intercept auth challenges. + await cdp_session.send( + "Fetch.enable", + { + "handleAuthRequests": True, + "patterns": [{"urlPattern": "*"}], + }, + ) + + async def handle_auth(params: dict[str, Any]) -> None: + request_id = params.get("requestId") + auth_challenge = params.get("authChallenge", {}) + + # Only handle proxy auth challenges + if auth_challenge.get("source") != "Proxy": + return + + try: + await cdp_session.send( + "Fetch.continueWithAuth", + { + "requestId": request_id, + "authChallengeResponse": { + "response": "ProvideCredentials", + "username": proxy_config.username, + "password": proxy_config.password, + }, + }, + ) + logging.debug("Browser-level proxy authentication credentials provided") + except Exception as e: + logging.error("Failed to respond to proxy auth challenge: %s", e) + + async def handle_request_paused(params: dict[str, Any]) -> None: + # Continue all paused requests immediately + request_id = params.get("requestId") + try: + await cdp_session.send( + "Fetch.continueRequest", {"requestId": request_id} + ) + except Exception: + pass + + cdp_session.on( + "Fetch.authRequired", + lambda params: asyncio.create_task(handle_auth(params)), + ) + cdp_session.on( + "Fetch.requestPaused", + lambda params: asyncio.create_task(handle_request_paused(params)), + ) + + return cdp_session + + async def _fix_download_behavior(self, side_panel_page: Page) -> None: + """Reverts the download behavior to the default behavior for the extension, otherwise our + extension cannot download files. + """ + cdp_session = await side_panel_page.context.new_cdp_session(side_panel_page) + await cdp_session.send("Page.setDownloadBehavior", {"behavior": "default"}) + await cdp_session.detach() + + def _print_success_message(self, browser_window_id: str) -> None: + self._browser_initialization.print_success_message(browser_window_id) + + +class RemoteBrowserEnvironment(BaseBrowserEnvironment): + def __init__( + self, + *, + browser_window_id: str, + cloud_browser_session_id: str | None = None, + api_key: str | None = None, + auth_headers: dict[str, str] | None = None, + ) -> None: + super().__init__( + api_key=api_key, + auth_headers=auth_headers, + browser_window_id=browser_window_id, + ) + self._cloud_browser_session_id = cloud_browser_session_id + + @property + def _validates_sdk_config(self) -> bool: + return False + + @property + def cloud_browser_session_id(self) -> str | None: + return self._cloud_browser_session_id + + @override + async def _close_impl(self, *, timeout: int | None = None) -> None: + """Closes the remote browser environment. + + If this window is backed by a cloud browser session, this also stops the cloud + session. + """ + if self._cloud_browser_session_id is None: + return await self._run_extension_action( + CloseWindowRequest(), timeout=timeout + ) + + await _stop_cloud_browser_session( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self._cloud_browser_session_id, + timeout=timeout, + ) + + async def get_downloaded_files(self) -> list[SessionDownloadItem]: + """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" + if self._cloud_browser_session_id is None: + raise ValueError( + "Cloud browser session ID is required to get downloaded files" + ) + return await _get_cloud_browser_downloads( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self._cloud_browser_session_id, + ) + + def __str__(self) -> str: + return f"RemoteBrowserEnvironment(browser_window_id={self.browser_window_id})" + + +class CloudBrowserEnvironment(BaseBrowserEnvironment): + """A browser environment that connects to a backend-cloud browser session via CDP. + + This class connects to a cloud browser session created by the backend API and provides + the same transport semantics as other browser environments for agent operations. + """ + + def __init__( + self, + *, + config: BrowserConfig | None = None, + session_name: str | None = None, + session_timeout: int | None = None, + api_key: str | None = None, + auth_headers: dict[str, str] | None = None, + ) -> None: + super().__init__( + api_key=api_key, + auth_headers=auth_headers, + ) + self._config = config or BrowserConfig() + self._session_name = session_name + self._session_timeout = session_timeout + self._session_id: str | None = None + self._context: BrowserContext | None = None + self._playwright_context_manager: PlaywrightContextManager | None = None + self._playwright: Playwright | None = None + self._browser_initialization = _BrowserInitializationHelper( + console=self._console + ) + + @property + def cloud_browser_session_id(self) -> str: + if self._session_id is None: + raise RuntimeError( + "Cloud browser environment is not initialized yet. Call `await env.start()` " + "or run an agent action first." + ) + return self._session_id + + @property + def browser_process_id(self) -> int | None: + # Cloud browser sessions are backend-owned, so there is no local browser process. + return None + + async def _initialize(self) -> None: + """Create a cloud browser session and initialize the browser extension. + + Calls ``POST /cloud-browser/create-cloud-browser-session``, then connects local + Playwright over CDP, opens ``login_url``, and waits for + ``#narada-browser-window-id`` (extension install retries apply). ``config`` controls + interactive prompts and related behavior. + """ + self._playwright_context_manager = async_playwright() + self._playwright = await self._playwright_context_manager.__aenter__() + + request_body = { + "require_extension": True, + "session_name": self._session_name, + "session_timeout": self._session_timeout, + } + endpoint_url = f"{self._base_url}/cloud-browser/create-cloud-browser-session" + + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint_url, + headers=self._auth_headers, + json=request_body, + timeout=aiohttp.ClientTimeout( + total=180 + ), # 3 minutes for session startup + ) as resp: + if not resp.ok: + error_text = await resp.text() + if resp.status == HTTPStatus.FORBIDDEN: + error = ApiErrorPayload.from_error_text(error_text) + err = RuntimeError( + f"Failed to create cloud browser session: {resp.status} {error_text}\n" + f"Endpoint URL: {endpoint_url}" + ) + err.status_code = resp.status # type: ignore[attr-defined] + err.detail = error.detail # type: ignore[attr-defined] + raise err + raise RuntimeError( + f"Failed to create cloud browser session: {resp.status} {error_text}\n" + f"Endpoint URL: {endpoint_url}" + ) + response_data = await resp.json() + + cdp_websocket_url = response_data["cdp_websocket_url"] + session_id = response_data["session_id"] + login_url = response_data["login_url"] + cdp_auth_headers = response_data["cdp_auth_headers"] + + # Connect to browser via CDP with authentication headers and log the user in. + try: + await self._initialize_cloud_browser_window( + cdp_websocket_url=cdp_websocket_url, + session_id=session_id, + login_url=login_url, + cdp_auth_headers=cdp_auth_headers, + ) + except Exception: + # Clean up the session if CDP connection fails + try: + async with aiohttp.ClientSession() as cleanup_session: + async with cleanup_session.post( + f"{self._base_url}/cloud-browser/stop-cloud-browser-session", + headers=self._auth_headers, + json={"session_id": session_id, "status": "failed"}, + timeout=aiohttp.ClientTimeout(total=10), + ) as resp: + if resp.ok: + logging.info( + "Cleaned up session %s after CDP connection failure", + session_id, + ) + else: + logging.warning( + "Failed to cleanup session %s: %s", + session_id, + resp.status, + ) + except Exception as cleanup_error: + logging.warning( + "Error cleaning up session %s: %s", session_id, cleanup_error + ) + # Re-raise the original connection error + raise + + async def _stop_playwright(self) -> None: + if self._playwright_context_manager is None: + return + + await self._playwright_context_manager.__aexit__(None, None, None) + self._playwright_context_manager = None + self._playwright = None + + async def reset_agent_state(self) -> None: + await self._ensure_initialized() + assert self._context is not None + side_panel_url = create_side_panel_url(self._config, self.browser_window_id) + side_panel_page = next( + p for p in self._context.pages if p.url == side_panel_url + ) + + # Refresh the extension side panel, which ensures any inflight Narada operations are + # canceled. + await side_panel_page.reload() + + async def _wait_for_browser_window_id( + self, + initialization_page: Page, + config: BrowserConfig, + timeout: int = 30_000, + ) -> str: + return await self._browser_initialization.wait_for_browser_window_id( + initialization_page, config, timeout=timeout + ) + + def _print_success_message(self, browser_window_id: str) -> None: + self._browser_initialization.print_success_message(browser_window_id) + + async def _initialize_cloud_browser_window( + self, + *, + cdp_websocket_url: str, + session_id: str, + login_url: str, + cdp_auth_headers: dict[str, str], + ) -> None: + assert self._playwright is not None + + # Connect to browser via CDP with authentication headers + browser = await self._playwright.chromium.connect_over_cdp( + cdp_websocket_url, headers=cdp_auth_headers + ) + + # Navigate to login URL (provided by backend with custom token) + context = browser.contexts[0] + initialization_page = context.pages[0] + await initialization_page.goto( + login_url, timeout=15_000, wait_until="domcontentloaded" + ) + + # Wait for browser window ID. The extension can take a bit to be installed, so we retry a + # few times. + max_attempts = 10 + for attempt in range(max_attempts): + try: + browser_window_id = await self._wait_for_browser_window_id( + initialization_page, + self._config, + timeout=30_000, + ) + break + except NaradaExtensionMissingError: + if attempt == max_attempts - 1: + raise + logging.info("Waiting for Narada extension to be installed...") + await asyncio.sleep(1) + except NaradaTimeoutError: + if attempt == max_attempts - 1: + raise + # If browser window ID is not found, reload the page and try again + # try to go to the login URL again (with customToken query param) + await initialization_page.goto( + login_url, timeout=15_000, wait_until="domcontentloaded" + ) + + self._browser_window_id = browser_window_id + self._session_id = session_id + self._context = context + + if self._config.interactive: + self._print_success_message(browser_window_id) + + @override + async def _close_impl(self, *, timeout: int | None = None) -> None: + """Stops the cloud browser session. + + Unlike local browser windows where close() closes a single window, this stops the + entire cloud session since the serverless container manages the browser lifecycle. + """ + try: + if self._session_id is not None: + await _stop_cloud_browser_session( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self._session_id, + timeout=timeout, + ) + finally: + await self._stop_playwright() + + async def get_downloaded_files(self) -> list[SessionDownloadItem]: + """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" + return await _get_cloud_browser_downloads( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self.cloud_browser_session_id, + ) + + def __str__(self) -> str: + return ( + "CloudBrowserEnvironment(" + f"cloud_browser_session_id={self._session_id}, " + f"browser_window_id={self.browser_window_id}" + ")" + ) + + +class LambdaEnvironment(Environment): + """Cloud execution environment without browser actions. + + This uses the same backend endpoint as the old extensionless cloud-browser path: + ``POST /cloud-browser/create-and-initialize-cloud-browser-session``. The backend provisions + and initializes the execution target server-side, so local Playwright is not used. + """ + + def __init__( + self, + *, + session_name: str | None = None, + session_timeout: int | None = None, + api_key: str | None = None, + auth_headers: dict[str, str] | None = None, + ) -> None: + super().__init__(api_key=api_key, auth_headers=auth_headers) + self._session_name = session_name + self._session_timeout = session_timeout + self._session_id: str | None = None + self._browser_window_id: str | None = None + + @property + def session_id(self) -> str: + if self._session_id is None: + raise RuntimeError( + "Lambda environment is not initialized yet. Call `await env.start()` " + "or run an agent first." + ) + return self._session_id + + @property + def cloud_browser_session_id(self) -> str | None: + return self._session_id + + @property + def _dispatch_browser_window_id(self) -> str | None: + return self._browser_window_id + + async def _initialize(self) -> None: + endpoint_url = f"{self._base_url}/cloud-browser/create-and-initialize-cloud-browser-session" + request_body = { + "require_extension": False, + "session_name": self._session_name, + "session_timeout": self._session_timeout, + } + async with aiohttp.ClientSession() as session: + async with session.post( + endpoint_url, + headers=self._auth_headers, + json=request_body, + timeout=aiohttp.ClientTimeout(total=180), + ) as resp: + if not resp.ok: + error_text = await resp.text() + if resp.status == HTTPStatus.FORBIDDEN: + error = ApiErrorPayload.from_error_text(error_text) + err = RuntimeError( + f"Failed to create lambda environment: {resp.status} {error_text}\n" + f"Endpoint URL: {endpoint_url}" + ) + err.status_code = resp.status # type: ignore[attr-defined] + err.detail = error.detail # type: ignore[attr-defined] + raise err + raise RuntimeError( + f"Failed to create lambda environment: {resp.status} {error_text}\n" + f"Endpoint URL: {endpoint_url}" + ) + response_data = await resp.json() + + self._browser_window_id = response_data["browser_window_id"] + self._session_id = response_data["session_id"] + + async def _close_impl(self, *, timeout: int | None = None) -> None: + if self._session_id is None: + return + + await _stop_cloud_browser_session( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self._session_id, + timeout=timeout, + ) + + async def get_downloaded_files(self) -> list[SessionDownloadItem]: + """Return files downloaded during this lambda session (file name, size, presigned GET URL per file).""" + return await _get_cloud_browser_downloads( + base_url=self._base_url, + auth_headers=self._auth_headers, + session_id=self.session_id, + ) + + +async def _fetch_presigned_download_url( + http_session: aiohttp.ClientSession, + *, + base_url: str, + auth_headers: dict[str, str], + session_id: str, + key: str, + timeout: aiohttp.ClientTimeout, +) -> str: + async with http_session.get( + f"{base_url}/cloud-browser/replay/download-url", + params={"session_id": session_id, "key": key}, + headers=auth_headers, + timeout=timeout, + ) as resp: + resp.raise_for_status() + data = await resp.json() + return data["presigned_url"] + + +async def _get_cloud_browser_downloads( + *, + base_url: str, + auth_headers: dict[str, str], + session_id: str, +) -> list[SessionDownloadItem]: + """GET cloud-browser session downloads and return list of SessionDownloadItem with presigned URLs.""" + timeout = aiohttp.ClientTimeout(total=60) + async with aiohttp.ClientSession() as http_session: + async with http_session.get( + f"{base_url}/cloud-browser/replay/downloads", + params={"session_id": session_id}, + headers=auth_headers, + timeout=timeout, + ) as resp: + resp.raise_for_status() + data = await resp.json() + files = data.get("downloaded_files") or [] + if not files: + return [] + + presigned_urls = await asyncio.gather( + *[ + _fetch_presigned_download_url( + http_session, + base_url=base_url, + auth_headers=auth_headers, + session_id=session_id, + key=f["key"], + timeout=timeout, + ) + for f in files + ] + ) + return [ + SessionDownloadItem( + file_name=item["file_name"], + size=item["size"], + download_url=presigned_urls[i], + ) + for i, item in enumerate(files) + ] + + +async def _stop_cloud_browser_session( + *, + base_url: str, + auth_headers: dict[str, str], + session_id: str, + timeout: int | None = None, +) -> None: + try: + async with aiohttp.ClientSession() as session: + async with session.post( + f"{base_url}/cloud-browser/stop-cloud-browser-session", + headers=auth_headers, + json={"session_id": session_id}, + timeout=aiohttp.ClientTimeout(total=timeout or 40), + ) as resp: + if resp.ok: + response_data = await resp.json() + if not response_data.get("success"): + logger.warning( + "Failed to stop session: %s", + response_data.get("message"), + ) + else: + logger.warning("Failed to stop session: %s", resp.status) + except Exception as e: + logger.warning("Error calling stop session endpoint: %s", e) + + +def create_side_panel_url(config: BrowserConfig, browser_window_id: str) -> str: + return f"chrome-extension://{config.extension_id}/sidepanel.html?browserWindowId={browser_window_id}" diff --git a/packages/narada/src/narada/window.py b/packages/narada/src/narada/window.py deleted file mode 100644 index b1eb2f2..0000000 --- a/packages/narada/src/narada/window.py +++ /dev/null @@ -1,1299 +0,0 @@ -import asyncio -import inspect -import logging -import mimetypes -import os -import time -from abc import ABC -from dataclasses import dataclass -from http import HTTPStatus -from io import IOBase -from pathlib import Path -from typing import ( - IO, - Any, - Awaitable, - Callable, - Literal, - Mapping, - TypedDict, - TypeGuard, - TypeVar, - cast, - overload, - override, -) - -import aiohttp -from narada_core.actions.critic import run_critic -from narada_core.actions.models import ( - DEFAULT_HITL_TIMEOUT_SECONDS, - ActiveInputRequest, - AgenticMatchingSelectorsFinderRequest, - AgenticMatchingSelectorsFinderResponse, - AgenticMouseAction, - AgenticMouseActionRequest, - AgenticSelectorAction, - AgenticSelectorRequest, - AgenticSelectorResponse, - AgenticSelectors, - AgentResponse, - AgentUsage, - CloseWindowRequest, - CriticResult, - ExtensionActionRequest, - ExtensionActionResponse, - GetFullHtmlRequest, - GetFullHtmlResponse, - GetScreenshotRequest, - GetScreenshotResponse, - GetSimplifiedHtmlRequest, - GetSimplifiedHtmlResponse, - GetUrlRequest, - GetUrlResponse, - GoToUrlRequest, - PrintMessageRequest, - PromptForUserInputRequest, - PromptForUserInputResponse, - PromptForUserInputVariable, - ReadExcelSheetRequest, - ReadExcelSheetResponse, - ReadGoogleSheetRequest, - ReadGoogleSheetResponse, - RecordedClick, - UserApprovalRequest, - UserApprovalResponse, - WaitForElementRequest, - WaitForElementResponse, - WriteExcelSheetRequest, - WriteGoogleSheetRequest, -) -from narada_core.errors import ( - NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE, - NaradaError, - NaradaTimeoutError, - UserAbortedError, -) -from narada_core.models import ( - Agent, - CriticConfig, - File, - McpServer, - ReasoningEffort, - RemoteDispatchChatHistoryItem, - Response, - UserResourceCredentials, - _RemoteDispatchPollResponse, -) -from narada_core.tracing.model import parse_action_trace -from playwright.async_api import ( - BrowserContext, -) -from pydantic import BaseModel - -from narada.config import BrowserConfig - -logger = logging.getLogger(__name__) - -_StructuredOutput = TypeVar("_StructuredOutput", bound=BaseModel) - - -_ResponseModel = TypeVar("_ResponseModel", bound=BaseModel) - -# Optional remote-dispatch context. In frontend Pyodide runs, these are generated -# by prepare-code.ts; extension-action calls forward them so the parent request -# can report active input-required status. -_REMOTE_DISPATCH_REQUEST_ID_ENV_VAR = "NARADA_REMOTE_DISPATCH_REQUEST_ID" -_REMOTE_DISPATCH_API_KEY_ID_ENV_VAR = "NARADA_REMOTE_DISPATCH_API_KEY_ID" - -type InputRequiredCallback = Callable[[ActiveInputRequest], Awaitable[None] | None] - - -async def _notify_input_required_callback( - callback: InputRequiredCallback | None, - response: _RemoteDispatchPollResponse, - seen_input_ids: set[str], -) -> None: - if callback is None or response.get("status") != "input-required": - return - - active_input_request_data = response.get("activeInputRequest") - if active_input_request_data is None: - return - - active_input_request = ActiveInputRequest.model_validate(active_input_request_data) - if active_input_request.input_id in seen_input_ids: - return - - seen_input_ids.add(active_input_request.input_id) - callback_result = callback(active_input_request) - if inspect.isawaitable(callback_result): - await callback_result - - -class _InputVariableFileReference(TypedDict): - source: Literal["remoteDispatchUpload"] - id: str - filename: str - mimeType: str - - -type _JsonPrimitive = str | int | float | bool | None -type _InputVariableValue = ( - _JsonPrimitive - | IOBase - | list["_InputVariableValue"] - | dict[str, "_InputVariableValue"] -) -type _InputVariables = dict[str, _InputVariableValue] -type _NormalizedInputVariableValue = ( - _JsonPrimitive - | _InputVariableFileReference - | list["_NormalizedInputVariableValue"] - | dict[str, "_NormalizedInputVariableValue"] -) -type _NormalizedInputVariables = dict[str, _NormalizedInputVariableValue] - - -class _PresignedPost(BaseModel): - url: str - fields: dict[str, Any] - - -@dataclass -class SessionDownloadItem: - """A file downloaded during a cloud browser session (file name, size, presigned GET URL).""" - - file_name: str - size: int - download_url: str - - -class BaseBrowserWindow(ABC): - _auth_headers: dict[str, str] - _base_url: str - _browser_window_id: str - - def __init__( - self, - *, - auth_headers: dict[str, str], - base_url: str, - browser_window_id: str, - ) -> None: - self._auth_headers = auth_headers - self._base_url = base_url - self._browser_window_id = browser_window_id - - @property - def browser_window_id(self) -> str: - return self._browser_window_id - - @property - def cloud_browser_session_id(self) -> str | None: - """Cloud browser session backing this window, if any. - - `dispatch_request` includes this value in remote-dispatch requests so backend - observability can link a client-mode run to an existing SDK-owned cloud browser. Plain - local windows are not cloud-backed and return `None`; cloud-backed subclasses override this - property with their session ID. - """ - return None - - async def upload_file(self, *, file: IO) -> File: - """Uploads a file that can be used as an attachment in a subsequent `agent` request. - - The file is temporarily saved in Narada cloud and expires after 1 day. It can only be - accessed by the user who uploaded it. - """ - # TODO: We will deprecate this public method in favor of automatic upload via - # input_variables file objects. - return await self._upload_file_impl(file=file) - - async def _upload_file_impl(self, *, file: IO[Any]) -> File: - # Get the base filename without directories. - filename = Path(file.name).name - - async with aiohttp.ClientSession() as session: - # First generate a presigned POST for uploading the file. - async with session.post( - f"{self._base_url}/remote-dispatch/generate-file-upload-presigned-post", - headers=self._auth_headers, - json={"filename": filename}, - ) as resp: - resp.raise_for_status() - resp_json = await resp.json() - - presigned_post = _PresignedPost.model_validate(resp_json) - object_key: str = presigned_post.fields["key"] - - # Upload the file with a POST request where: - # - The URL is the presigned POST URL. - # - The form fields are the presigned POST fields. - # - The form data has an addition 'file' field that contains the file contents. - form_data = aiohttp.FormData(presigned_post.fields) - form_data.add_field("file", file) - async with session.post(presigned_post.url, data=form_data) as resp: - resp.raise_for_status() - - return File(key=object_key) - - async def _normalize_input_variables( - self, *, input_variables: Mapping[str, Any] - ) -> _NormalizedInputVariables: - normalized: _NormalizedInputVariables = {} - for key, value in input_variables.items(): - normalized[key] = await self._normalize_input_variables_value_impl( - input_variable_value=value - ) - return normalized - - async def _normalize_input_variables_value_impl( - self, *, input_variable_value: Any - ) -> _NormalizedInputVariableValue: - if isinstance(input_variable_value, list): - return [ - await self._normalize_input_variables_value_impl( - input_variable_value=item - ) - for item in input_variable_value - ] - - if self._is_uploadable_file(input_variable_value): - return await self._upload_input_variable_file( - input_variable_value=input_variable_value - ) - - if isinstance(input_variable_value, dict): - normalized: dict[str, _NormalizedInputVariableValue] = {} - for key, value in input_variable_value.items(): - normalized[key] = await self._normalize_input_variables_value_impl( - input_variable_value=value - ) - return normalized - - return input_variable_value - - @staticmethod - def _is_uploadable_file(value: Any) -> TypeGuard[IO[Any]]: - # Keep runtime eligibility aligned with current upload_file expectations. - return isinstance(value, IOBase) and hasattr(value, "name") - - async def _upload_input_variable_file( - self, *, input_variable_value: IO[Any] - ) -> _InputVariableFileReference: - filename = Path(input_variable_value.name).name - uploaded_file = await self._upload_file_impl(file=input_variable_value) - mime_type = mimetypes.guess_type(filename)[0] or "application/octet-stream" - return { - "source": "remoteDispatchUpload", - "id": uploaded_file["key"], - "filename": filename, - "mimeType": mime_type, - } - - # `reasoning` is only valid with the Core Agent; these two overloads make - # that constraint type-checkable. Generic-agent calls fall through to the - # general overloads below, which do not accept a `reasoning` argument. - @overload - async def dispatch_request( - self, - *, - prompt: str, - agent: Literal[Agent.CORE_AGENT], - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: None = None, - previous_request_id: str | None = None, - chat_history: list[RemoteDispatchChatHistoryItem] | None = None, - additional_context: dict[str, str] | None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - user_resource_credentials: UserResourceCredentials | None = None, - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - callback_url: str | None = None, - callback_secret: str | None = None, - callback_headers: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> Response[None]: ... - - @overload - async def dispatch_request( - self, - *, - prompt: str, - agent: Literal[Agent.CORE_AGENT], - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[_StructuredOutput], - previous_request_id: str | None = None, - chat_history: list[RemoteDispatchChatHistoryItem] | None = None, - additional_context: dict[str, str] | None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - user_resource_credentials: UserResourceCredentials | None = None, - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - callback_url: str | None = None, - callback_secret: str | None = None, - callback_headers: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> Response[_StructuredOutput]: ... - - @overload - async def dispatch_request( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: None = None, - previous_request_id: str | None = None, - chat_history: list[RemoteDispatchChatHistoryItem] | None = None, - additional_context: dict[str, str] | None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - user_resource_credentials: UserResourceCredentials | None = None, - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - critic_context: dict[str, Any] | None = None, - callback_url: str | None = None, - callback_secret: str | None = None, - callback_headers: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> Response[None]: ... - - @overload - async def dispatch_request( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[_StructuredOutput], - previous_request_id: str | None = None, - chat_history: list[RemoteDispatchChatHistoryItem] | None = None, - additional_context: dict[str, str] | None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - user_resource_credentials: UserResourceCredentials | None = None, - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - critic_context: dict[str, Any] | None = None, - callback_url: str | None = None, - callback_secret: str | None = None, - callback_headers: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> Response[_StructuredOutput]: ... - - async def dispatch_request( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[BaseModel] | None = None, - previous_request_id: str | None = None, - chat_history: list[RemoteDispatchChatHistoryItem] | None = None, - additional_context: dict[str, str] | None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - user_resource_credentials: UserResourceCredentials | None = None, - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - critic_context: dict[str, Any] | None = None, - callback_url: str | None = None, - callback_secret: str | None = None, - callback_headers: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> Response: - """Low-level API for invoking an agent in the Narada extension side panel chat. - - The higher-level `agent` method should be preferred for most use cases. - """ - # The overloads enforce this at type-check time when callers use - # ``Agent.CORE_AGENT``; the runtime check covers string-form agents - # (``agent="..."``) and callers without a type checker. - if reasoning is not None and agent is not Agent.CORE_AGENT: - raise ValueError( - "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " - f"(got agent={agent!r})" - ) - deadline = time.monotonic() + timeout - - agent_prefix = ( - agent.prompt_prefix() if isinstance(agent, Agent) else f"{agent} " - ) - body: dict[str, Any] = { - "prompt": agent_prefix + prompt, - "browserWindowId": self.browser_window_id, - "timeZone": time_zone, - } - cloud_browser_session_id = self.cloud_browser_session_id - if cloud_browser_session_id is not None: - body["cloudBrowserSessionId"] = cloud_browser_session_id - if clear_chat is not None: - body["clearChat"] = clear_chat - if generate_gif is not None: - body["saveScreenshots"] = generate_gif - if output_schema is not None: - body["responseFormat"] = { - "type": "jsonSchema", - "jsonSchema": output_schema.model_json_schema(), - } - if previous_request_id is not None: - body["previousRequestId"] = previous_request_id - if chat_history is not None: - body["chatHistory"] = chat_history - if additional_context is not None: - body["additionalContext"] = additional_context - if attachment is not None: - body["attachment"] = attachment - if user_resource_credentials is not None: - body["userResourceCredentials"] = user_resource_credentials - if mcp_servers is not None: - body["mcpServers"] = [ - server.model_dump(mode="json") for server in mcp_servers - ] - if secret_variables is not None: - body["secretVariables"] = secret_variables - if input_variables is not None: - body["inputVariables"] = await self._normalize_input_variables( - input_variables=input_variables - ) - if critic_context is not None: - body["criticContext"] = critic_context - if callback_url is not None: - body["callbackUrl"] = callback_url - if callback_secret is not None: - body["callbackSecret"] = callback_secret - if callback_headers is not None: - body["callbackHeaders"] = callback_headers - if reasoning is not None: - body["reasoningMode"] = reasoning.value - - try: - seen_input_ids: set[str] = set() - async with aiohttp.ClientSession() as session: - async with session.post( - f"{self._base_url}/remote-dispatch", - headers=self._auth_headers, - json=body, - timeout=aiohttp.ClientTimeout(total=timeout), - ) as resp: - resp.raise_for_status() - request_id = (await resp.json())["requestId"] - - while (now := time.monotonic()) < deadline: - async with session.get( - f"{self._base_url}/remote-dispatch/responses/{request_id}", - headers=self._auth_headers, - timeout=aiohttp.ClientTimeout(total=deadline - now), - ) as resp: - resp.raise_for_status() - response: _RemoteDispatchPollResponse = await resp.json() - - response["requestId"] = request_id - - if response["completedAt"] is None: - await _notify_input_required_callback( - on_input_required, - response, - seen_input_ids, - ) - # Poll every 3 seconds. - await asyncio.sleep(3) - continue - - response_content = response["response"] - if response_content is not None: - # Populate the `structuredOutput` field. This is a client-side field - # that's not directly returned by the API. - output_data = response_content.get("output") - if ( - output_schema is not None - and output_data is not None - and output_data.get("type") == "structured" - ): - response_content["structuredOutput"] = ( - output_schema.model_validate(output_data["content"]) - ) - else: - response_content["structuredOutput"] = None - - return cast(Response, response) - else: - raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) - - except asyncio.TimeoutError: - raise NaradaAgentTimeoutError_INTERNAL_DO_NOT_USE(timeout) - - # `reasoning` is only valid with the Core Agent. See `dispatch_request` - # above for the rationale; the same overload pattern is mirrored here. - @overload - async def agent( - self, - *, - prompt: str, - agent: Literal[Agent.CORE_AGENT], - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> AgentResponse[dict[str, Any]]: ... - - @overload - async def agent( - self, - *, - prompt: str, - agent: Literal[Agent.CORE_AGENT], - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[_StructuredOutput], - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - timeout: int = 1000, - ) -> AgentResponse[_StructuredOutput]: ... - - @overload - async def agent( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - critic: CriticConfig | None = None, - timeout: int = 1000, - ) -> AgentResponse[dict[str, Any]]: ... - - @overload - async def agent( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[_StructuredOutput], - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - critic: CriticConfig | None = None, - timeout: int = 1000, - ) -> AgentResponse[_StructuredOutput]: ... - - async def agent( - self, - *, - prompt: str, - agent: Agent | str = Agent.OPERATOR, - reasoning: ReasoningEffort | None = None, - clear_chat: bool | None = None, - generate_gif: bool | None = None, - output_schema: type[BaseModel] | None = None, - attachment: File | None = None, - time_zone: str = "America/Los_Angeles", - mcp_servers: list[McpServer] | None = None, - secret_variables: dict[str, str] | None = None, - input_variables: Mapping[str, Any] | None = None, - on_input_required: InputRequiredCallback | None = None, - critic: CriticConfig | None = None, - timeout: int = 1000, - ) -> AgentResponse: - """Invokes an agent in the Narada extension side panel chat.""" - # Branch on `reasoning` so each call site binds a single, typed overload - # of `dispatch_request`. The validation also lives in `dispatch_request` - # itself (defense in depth + reachable when callers go straight to the - # low-level API), so the redundancy here is intentional. - if reasoning is None: - remote_dispatch_response = await self.dispatch_request( - prompt=prompt, - agent=agent, - clear_chat=clear_chat, - generate_gif=generate_gif, - output_schema=output_schema, - attachment=attachment, - time_zone=time_zone, - mcp_servers=mcp_servers, - secret_variables=secret_variables, - input_variables=input_variables, - on_input_required=on_input_required, - timeout=timeout, - ) - else: - if agent is not Agent.CORE_AGENT: - raise ValueError( - "`reasoning` is only supported with `agent=Agent.CORE_AGENT` " - f"(got agent={agent!r})" - ) - # The CORE_AGENT-specific overloads of `dispatch_request` split on - # a narrower `output_schema` discriminator (None vs `type[T]`), - # which the impl's `type[BaseModel] | None` union doesn't cleanly - # narrow into without further branching. The public `agent()` - # overloads above already give callers correct return-type - # narrowing, so the internal forward call bypasses overload - # disambiguation on this single dimension. - remote_dispatch_response = await self.dispatch_request( # pyright: ignore[reportCallIssue] - prompt=prompt, - agent=agent, - reasoning=reasoning, - clear_chat=clear_chat, - generate_gif=generate_gif, - output_schema=output_schema, # pyright: ignore[reportArgumentType] - attachment=attachment, - time_zone=time_zone, - mcp_servers=mcp_servers, - secret_variables=secret_variables, - input_variables=input_variables, - on_input_required=on_input_required, - timeout=timeout, - ) - response_content = remote_dispatch_response["response"] - assert response_content is not None - - action_trace_raw = response_content.get("actionTrace") - action_trace = ( - parse_action_trace(action_trace_raw) - if action_trace_raw is not None - else None - ) - workflow_trace = response_content.get("workflowTrace") - - critic_result: CriticResult | None = None - if critic is not None: - critic_result = await run_critic( - dispatch_request=self.dispatch_request, - original_prompt=prompt, - response_content=response_content, - action_trace_raw=action_trace_raw, - critic=critic, - time_zone=time_zone, - timeout=timeout, - ) - - return AgentResponse( - request_id=remote_dispatch_response["requestId"], - status=remote_dispatch_response["status"], - text=response_content["text"], - output=response_content["output"], - structured_output=response_content.get("structuredOutput"), - usage=AgentUsage.model_validate(remote_dispatch_response["usage"]), - action_trace=action_trace, - workflow_trace=workflow_trace, - critic_result=critic_result, - ) - - async def agentic_selector( - self, - *, - action: AgenticSelectorAction, - selectors: AgenticSelectors, - fallback_operator_query: str, - # Larger default timeout because Operator can take a bit to run. - timeout: int | None = 300, - ) -> AgenticSelectorResponse: - """Performs an action on an element specified by the given selectors, falling back to using - the Operator agent if the selectors fail to match a unique element. - """ - response_model = ( - AgenticSelectorResponse - if action["type"] in {"get_text", "get_property"} - else None - ) - result = await self._run_extension_action( - AgenticSelectorRequest( - action=action, - selectors=selectors, - fallback_operator_query=fallback_operator_query, - ), - response_model=response_model, - timeout=timeout, - ) - - if result is None: - return AgenticSelectorResponse(value=None) - - return result - - async def agentic_matching_selectors_finder( - self, - *, - prompt: str, - timeout: int | None = 300, - ) -> list[AgenticSelectors]: - """Finds all visible targets matching a prompt and returns selectors.""" - result = await self._run_extension_action( - AgenticMatchingSelectorsFinderRequest(prompt=prompt), - AgenticMatchingSelectorsFinderResponse, - timeout=timeout, - ) - return result.selectors - - async def agentic_mouse_action( - self, - *, - action: AgenticMouseAction, - recorded_click: RecordedClick, - fallback_operator_query: str, - resize_window: bool = True, - timeout: int | None = 60, - ) -> None: - """Performs a mouse action at the specified click coordinates, falling back to using - the Operator agent if the click fails. - """ - return await self._run_extension_action( - AgenticMouseActionRequest( - action=action, - recorded_click=recorded_click, - resize_window=resize_window, - fallback_operator_query=fallback_operator_query, - ), - timeout=timeout, - ) - - async def close(self, *, timeout: int | None = None) -> None: - """Gracefully closes the current browser window.""" - return await self._run_extension_action(CloseWindowRequest(), timeout=timeout) - - async def go_to_url( - self, *, url: str, new_tab: bool = False, timeout: int | None = None - ) -> None: - """Navigates the active page in this window to the given URL.""" - return await self._run_extension_action( - GoToUrlRequest(url=url, new_tab=new_tab), timeout=timeout - ) - - async def wait_for_element( - self, - *, - selectors: AgenticSelectors, - state: Literal["visible", "hidden"], - timeout: int, - ) -> bool: - """Waits for an element matching the given selectors to reach the specified state. - - Returns True if the element was found, False if no selector matched before timeout. - """ - result = await self._run_extension_action( - WaitForElementRequest(selectors=selectors, state=state, timeout=timeout), - WaitForElementResponse, - timeout=timeout // 1000 + 30, - ) - if result is None: - return False - return result.found - - async def get_url(self, *, timeout: int | None = None) -> GetUrlResponse: - """Gets the URL of the current active page.""" - return await self._run_extension_action( - GetUrlRequest(), - GetUrlResponse, - timeout=timeout, - ) - - async def print_message(self, *, message: str, timeout: int | None = None) -> None: - """Prints a message in the Narada extension side panel chat.""" - return await self._run_extension_action( - PrintMessageRequest(message=message), timeout=timeout - ) - - async def prompt_for_user_input( - self, - *, - step_id: str, - variables: list[PromptForUserInputVariable], - prompt_message: str | None = None, - timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, - ) -> dict[str, Any]: - """Prompts the user for one or more input values in the extension UI.""" - result = await self._run_extension_action( - PromptForUserInputRequest( - step_id=step_id, prompt_message=prompt_message, variables=variables - ), - PromptForUserInputResponse, - timeout=timeout, - ) - return result.values_by_name - - async def user_approval( - self, - *, - step_id: str, - prompt_message: str, - approve_label: str, - reject_label: str, - timeout: int | None = DEFAULT_HITL_TIMEOUT_SECONDS, - ) -> bool: - """Prompts the user to approve or reject in the extension UI.""" - result = await self._run_extension_action( - UserApprovalRequest( - step_id=step_id, - prompt_message=prompt_message, - approve_label=approve_label, - reject_label=reject_label, - ), - UserApprovalResponse, - timeout=timeout, - ) - return result.approved - - async def read_google_sheet( - self, - *, - spreadsheet_id: str, - range: str, - timeout: int | None = None, - ) -> ReadGoogleSheetResponse: - """Reads a range of cells from a Google Sheet.""" - return await self._run_extension_action( - ReadGoogleSheetRequest(spreadsheet_id=spreadsheet_id, range=range), - ReadGoogleSheetResponse, - timeout=timeout, - ) - - async def read_excel_sheet( - self, - *, - workbook_url: str, - range: str, - microsoft_account_email: str, - timeout: int | None = None, - ) -> ReadExcelSheetResponse: - """Reads a range of cells from a Microsoft Excel workbook.""" - return await self._run_extension_action( - ReadExcelSheetRequest( - workbook_url=workbook_url, - range=range, - microsoft_account_email=microsoft_account_email, - ), - ReadExcelSheetResponse, - timeout=timeout, - ) - - async def write_google_sheet( - self, - *, - spreadsheet_id: str, - range: str, - values: list[list[str]], - timeout: int | None = None, - ) -> None: - """Writes a range of cells to a Google Sheet.""" - return await self._run_extension_action( - WriteGoogleSheetRequest( - spreadsheet_id=spreadsheet_id, range=range, values=values - ), - timeout=timeout, - ) - - async def write_excel_sheet( - self, - *, - workbook_url: str, - range: str, - microsoft_account_email: str, - values: list[list[str]], - timeout: int | None = None, - ) -> None: - """Writes a range of cells to a Microsoft Excel workbook.""" - return await self._run_extension_action( - WriteExcelSheetRequest( - workbook_url=workbook_url, - range=range, - microsoft_account_email=microsoft_account_email, - values=values, - ), - timeout=timeout, - ) - - async def get_full_html(self, *, timeout: int | None = None) -> GetFullHtmlResponse: - """Gets the full HTML content of the current page.""" - return await self._run_extension_action( - GetFullHtmlRequest(), - GetFullHtmlResponse, - timeout=timeout, - ) - - async def get_simplified_html( - self, *, timeout: int | None = None - ) -> GetSimplifiedHtmlResponse: - """Gets the simplified HTML content of the current page.""" - return await self._run_extension_action( - GetSimplifiedHtmlRequest(), - GetSimplifiedHtmlResponse, - timeout=timeout, - ) - - async def get_screenshot( - self, *, timeout: int | None = None - ) -> GetScreenshotResponse: - """Takes a screenshot of the current browser window.""" - return await self._run_extension_action( - GetScreenshotRequest(), - GetScreenshotResponse, - timeout=timeout, - ) - - @overload - async def _run_extension_action( - self, - request: ExtensionActionRequest, - response_model: None = None, - *, - timeout: int | None = None, - ) -> None: ... - - @overload - async def _run_extension_action( - self, - request: ExtensionActionRequest, - response_model: type[_ResponseModel], - *, - timeout: int | None = None, - ) -> _ResponseModel: ... - - async def _run_extension_action( - self, - request: ExtensionActionRequest, - response_model: type[_ResponseModel] | None = None, - *, - timeout: int | None = None, - ) -> _ResponseModel | None: - body = { - "action": request.model_dump(), - "browserWindowId": self.browser_window_id, - } - remote_dispatch_request_id = os.environ.get(_REMOTE_DISPATCH_REQUEST_ID_ENV_VAR) - if remote_dispatch_request_id is not None: - body["requestId"] = remote_dispatch_request_id - remote_dispatch_api_key_id = os.environ.get(_REMOTE_DISPATCH_API_KEY_ID_ENV_VAR) - if remote_dispatch_api_key_id is not None: - body["apiKeyId"] = remote_dispatch_api_key_id - if timeout is not None: - body["timeout"] = timeout - - async with aiohttp.ClientSession() as session: - async with session.post( - f"{self._base_url}/extension-actions", - headers=self._auth_headers, - json=body, - # Don't specify `timeout` here as the (soft) timeout is handled by the server. - ) as resp: - if resp.status == HTTPStatus.GATEWAY_TIMEOUT: - raise NaradaTimeoutError - resp.raise_for_status() - resp_json = await resp.json() - - response = ExtensionActionResponse.model_validate(resp_json) - if response.status == "error": - raise NaradaError(response.error) - if response.status == "aborted": - raise UserAbortedError - - if response_model is None: - return None - - assert response.data is not None - return response_model.model_validate_json(response.data) - - -class LocalBrowserWindow(BaseBrowserWindow): - _browser_process_id: int | None - _config: BrowserConfig - _context: BrowserContext - - def __init__( - self, - *, - auth_headers: dict[str, str], - browser_process_id: int | None, - browser_window_id: str, - config: BrowserConfig, - context: BrowserContext, - ) -> None: - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - super().__init__( - auth_headers=auth_headers, - base_url=base_url, - browser_window_id=browser_window_id, - ) - self._browser_process_id = browser_process_id - self._config = config - self._context = context - - @property - def browser_process_id(self) -> int | None: - return self._browser_process_id - - def __str__(self) -> str: - return ( - "LocalBrowserWindow(" - f"browser_process_id={self._browser_process_id}, " - f"browser_window_id={self.browser_window_id}" - ")" - ) - - async def reinitialize(self) -> None: - side_panel_url = create_side_panel_url(self._config, self._browser_window_id) - side_panel_page = next( - p for p in self._context.pages if p.url == side_panel_url - ) - - # Refresh the extension side panel, which ensures any inflight Narada operations are - # canceled. - await side_panel_page.reload() - - -class RemoteBrowserWindow(BaseBrowserWindow): - def __init__( - self, - *, - browser_window_id: str, - cloud_browser_session_id: str | None = None, - api_key: str | None = None, - auth_headers: dict[str, str] | None = None, - ) -> None: - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - if auth_headers is None: - api_key = api_key or os.environ["NARADA_API_KEY"] - auth_headers = {"x-api-key": api_key} - super().__init__( - auth_headers=auth_headers, - base_url=base_url, - browser_window_id=browser_window_id, - ) - self._cloud_browser_session_id = cloud_browser_session_id - - @property - def cloud_browser_session_id(self) -> str | None: - return self._cloud_browser_session_id - - @override - async def close(self, *, timeout: int | None = None) -> None: - """Closes the browser window. - - If this window is backed by a cloud browser session, this also stops the cloud - session. - """ - if self._cloud_browser_session_id is None: - return await super().close(timeout=timeout) - - await _stop_cloud_browser_session( - base_url=self._base_url, - auth_headers=self._auth_headers, - session_id=self._cloud_browser_session_id, - timeout=timeout, - ) - - async def get_downloaded_files(self) -> list[SessionDownloadItem]: - """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" - if self._cloud_browser_session_id is None: - raise ValueError( - "Cloud browser session ID is required to get downloaded files" - ) - return await _get_cloud_browser_downloads( - base_url=self._base_url, - auth_headers=self._auth_headers, - session_id=self._cloud_browser_session_id, - ) - - def __str__(self) -> str: - return f"RemoteBrowserWindow(browser_window_id={self.browser_window_id})" - - -class CloudBrowserWindow(BaseBrowserWindow): - """A browser window that connects to a backend-cloud browser session via CDP. - - This class connects to a cloud browser session created by the backend API and provides - the same interface as other browser window classes for agent operations. - """ - - def __init__( - self, - *, - browser_window_id: str, - session_id: str, - api_key: str | None = None, - auth_headers: dict[str, str] | None = None, - ) -> None: - base_url = os.getenv("NARADA_API_BASE_URL", "https://api.narada.ai/fast/v2") - if auth_headers is None: - api_key = api_key or os.environ["NARADA_API_KEY"] - auth_headers = {"x-api-key": api_key} - super().__init__( - auth_headers=auth_headers, - base_url=base_url, - browser_window_id=browser_window_id, - ) - self._session_id = session_id - - @property - def cloud_browser_session_id(self) -> str: - return self._session_id - - @override - async def close(self, *, timeout: int | None = None) -> None: - """Stops the cloud browser session. - - Unlike local browser windows where close() closes a single window, this stops the - entire cloud session since the serverless container manages the browser lifecycle. - """ - await _stop_cloud_browser_session( - base_url=self._base_url, - auth_headers=self._auth_headers, - session_id=self._session_id, - timeout=timeout, - ) - - async def get_downloaded_files(self) -> list[SessionDownloadItem]: - """Return files downloaded during this cloud browser session (file name, size, presigned GET URL per file).""" - return await _get_cloud_browser_downloads( - base_url=self._base_url, - auth_headers=self._auth_headers, - session_id=self._session_id, - ) - - def __str__(self) -> str: - return ( - "CloudBrowserWindow(" - f"cloud_browser_session_id={self._session_id}, " - f"browser_window_id={self.browser_window_id}" - ")" - ) - - -async def _fetch_presigned_download_url( - http_session: aiohttp.ClientSession, - *, - base_url: str, - auth_headers: dict[str, str], - session_id: str, - key: str, - timeout: aiohttp.ClientTimeout, -) -> str: - async with http_session.get( - f"{base_url}/cloud-browser/replay/download-url", - params={"session_id": session_id, "key": key}, - headers=auth_headers, - timeout=timeout, - ) as resp: - resp.raise_for_status() - data = await resp.json() - return data["presigned_url"] - - -async def _get_cloud_browser_downloads( - *, - base_url: str, - auth_headers: dict[str, str], - session_id: str, -) -> list[SessionDownloadItem]: - """GET cloud-browser session downloads and return list of SessionDownloadItem with presigned URLs.""" - timeout = aiohttp.ClientTimeout(total=60) - async with aiohttp.ClientSession() as http_session: - async with http_session.get( - f"{base_url}/cloud-browser/replay/downloads", - params={"session_id": session_id}, - headers=auth_headers, - timeout=timeout, - ) as resp: - resp.raise_for_status() - data = await resp.json() - files = data.get("downloaded_files") or [] - if not files: - return [] - - presigned_urls = await asyncio.gather( - *[ - _fetch_presigned_download_url( - http_session, - base_url=base_url, - auth_headers=auth_headers, - session_id=session_id, - key=f["key"], - timeout=timeout, - ) - for f in files - ] - ) - return [ - SessionDownloadItem( - file_name=item["file_name"], - size=item["size"], - download_url=presigned_urls[i], - ) - for i, item in enumerate(files) - ] - - -async def _stop_cloud_browser_session( - *, - base_url: str, - auth_headers: dict[str, str], - session_id: str, - timeout: int | None = None, -) -> None: - try: - async with aiohttp.ClientSession() as session: - async with session.post( - f"{base_url}/cloud-browser/stop-cloud-browser-session", - headers=auth_headers, - json={"session_id": session_id}, - timeout=aiohttp.ClientTimeout(total=timeout or 40), - ) as resp: - if resp.ok: - response_data = await resp.json() - if not response_data.get("success"): - logger.warning( - "Failed to stop session: %s", - response_data.get("message"), - ) - else: - logger.warning("Failed to stop session: %s", resp.status) - except Exception as e: - logger.warning("Error calling stop session endpoint: %s", e) - - -def create_side_panel_url(config: BrowserConfig, browser_window_id: str) -> str: - return f"chrome-extension://{config.extension_id}/sidepanel.html?browserWindowId={browser_window_id}" diff --git a/packages/narada/tests/test_agent.py b/packages/narada/tests/test_agent.py new file mode 100644 index 0000000..d21131c --- /dev/null +++ b/packages/narada/tests/test_agent.py @@ -0,0 +1,121 @@ +from __future__ import annotations + +from typing import Any + +import pytest +from narada import Agent, Environment + + +class _FakeResponse: + ok = True + status = 200 + + def __init__(self, payload: dict[str, Any]) -> None: + self._payload = payload + + async def __aenter__(self) -> "_FakeResponse": + return self + + async def __aexit__(self, *args: Any) -> None: + return None + + def raise_for_status(self) -> None: + return None + + async def json(self) -> dict[str, Any]: + return self._payload + + +class _RemoteDispatchFakeClientSession: + def __init__(self) -> None: + self.dispatched_bodies: list[dict[str, Any]] = [] + self._poll_count = 0 + + async def __aenter__(self) -> "_RemoteDispatchFakeClientSession": + return self + + async def __aexit__(self, *args: Any) -> None: + return None + + def post(self, url: str, **kwargs: Any) -> _FakeResponse: + if url.endswith("/remote-dispatch"): + self.dispatched_bodies.append(kwargs["json"]) + return _FakeResponse({"requestId": f"req-{len(self.dispatched_bodies)}"}) + raise AssertionError(f"Unexpected POST URL: {url}") + + def get(self, url: str, **kwargs: Any) -> _FakeResponse: + if "/remote-dispatch/responses/" not in url: + raise AssertionError(f"Unexpected GET URL: {url}") + + self._poll_count += 1 + return _FakeResponse( + { + "status": "success", + "response": { + "text": f"ok-{self._poll_count}", + "output": {"type": "text", "content": f"ok-{self._poll_count}"}, + }, + "usage": {"actions": 1, "credits": 1}, + "createdAt": "2026-01-01T00:00:00Z", + "completedAt": "2026-01-01T00:00:01Z", + "activeInputRequest": None, + } + ) + + +class _CountingEnvironment(Environment): + def __init__(self) -> None: + super().__init__(auth_headers={}) + self.initialize_count = 0 + + @property + def _validates_sdk_config(self) -> bool: + return False + + async def _initialize(self) -> None: + self.initialize_count += 1 + + +@pytest.mark.asyncio +async def test_agent_run_reruns_but_environment_initialization_is_cached( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import narada.environment as environment_module + + fake_session = _RemoteDispatchFakeClientSession() + monkeypatch.setattr( + environment_module.aiohttp, "ClientSession", lambda: fake_session + ) + + env = _CountingEnvironment() + agent = Agent(environment=env) + + first = await agent.run("first") + second = await agent.run("second") + + assert env.initialize_count == 1 + assert first.request_id == "req-1" + assert second.request_id == "req-2" + assert [body["prompt"] for body in fake_session.dispatched_bodies] == [ + "/Operator first", + "/Operator second", + ] + + +@pytest.mark.asyncio +async def test_agent_run_forwards_clear_chat( + monkeypatch: pytest.MonkeyPatch, +) -> None: + import narada.environment as environment_module + + fake_session = _RemoteDispatchFakeClientSession() + monkeypatch.setattr( + environment_module.aiohttp, "ClientSession", lambda: fake_session + ) + + env = _CountingEnvironment() + agent = Agent(environment=env) + + await agent.run("fresh task", clear_chat=True) + + assert fake_session.dispatched_bodies[0]["clearChat"] is True diff --git a/packages/narada/tests/test_cloud_browser.py b/packages/narada/tests/test_cloud_browser.py index 754b01e..85139eb 100644 --- a/packages/narada/tests/test_cloud_browser.py +++ b/packages/narada/tests/test_cloud_browser.py @@ -2,9 +2,13 @@ from unittest.mock import AsyncMock, call import pytest -from narada.client import Narada +from narada import ( + Agent, + CloudBrowserEnvironment, + LambdaEnvironment, + RemoteBrowserEnvironment, +) from narada.config import BrowserConfig -from narada.window import CloudBrowserWindow, RemoteBrowserWindow from narada_core.errors import NaradaTimeoutError @@ -70,20 +74,23 @@ def get(self, url: str, **kwargs): raise AssertionError(f"Unexpected GET URL: {url}") -def _build_client_with_cloud_page(page: AsyncMock) -> Narada: - client = Narada(auth_headers={"x-api-key": "test-key"}) +def _build_cloud_environment_with_page(page: AsyncMock) -> CloudBrowserEnvironment: + env = CloudBrowserEnvironment( + auth_headers={"x-api-key": "test-key"}, + config=BrowserConfig(interactive=False), + ) browser = SimpleNamespace(contexts=[SimpleNamespace(pages=[page])]) - client._playwright = SimpleNamespace( + env._playwright = SimpleNamespace( chromium=SimpleNamespace(connect_over_cdp=AsyncMock(return_value=browser)) ) - return client + return env @pytest.mark.asyncio async def test_dispatch_request_calls_input_required_callback_once_per_input_id( monkeypatch: pytest.MonkeyPatch, ) -> None: - import narada.window as window_module + import narada.environment as environment_module fake_session = _RemoteDispatchFakeClientSession( [ @@ -148,18 +155,20 @@ async def test_dispatch_request_calls_input_required_callback_once_per_input_id( }, ] ) - monkeypatch.setattr(window_module.aiohttp, "ClientSession", lambda: fake_session) + monkeypatch.setattr( + environment_module.aiohttp, "ClientSession", lambda: fake_session + ) sleep = AsyncMock() - monkeypatch.setattr(window_module.asyncio, "sleep", sleep) + monkeypatch.setattr(environment_module.asyncio, "sleep", sleep) observed_input_ids: list[str] = [] async def on_input_required(active_input_request) -> None: observed_input_ids.append(active_input_request.input_id) - window = RemoteBrowserWindow(browser_window_id="bw-1", api_key="test-key") + env = RemoteBrowserEnvironment(browser_window_id="bw-1", api_key="test-key") - response = await window.dispatch_request( + response = await env._dispatch_request( prompt="Summarize", timeout=5, on_input_required=on_input_required, @@ -171,10 +180,10 @@ async def on_input_required(active_input_request) -> None: @pytest.mark.asyncio -async def test_extensionless_cloud_browser_uses_backend_initialization( +async def test_lambda_environment_uses_backend_initialization( monkeypatch: pytest.MonkeyPatch, ) -> None: - import narada.client as client_module + import narada.environment as environment_module fake_session = _FakeClientSession( { @@ -183,26 +192,19 @@ async def test_extensionless_cloud_browser_uses_backend_initialization( "browser_window_id": "browser-window-123", } ) - monkeypatch.setattr(client_module.aiohttp, "ClientSession", lambda: fake_session) - - async def fail_if_client_initializes(*args, **kwargs): - raise AssertionError( - "extensionless cloud sessions should initialize server-side" - ) - - narada = Narada(auth_headers={"x-api-key": "test-key"}) monkeypatch.setattr( - narada, "_initialize_cloud_browser_window", fail_if_client_initializes + environment_module.aiohttp, "ClientSession", lambda: fake_session ) - window = await narada.open_and_initialize_cloud_browser_window( + env = LambdaEnvironment( + auth_headers={"x-api-key": "test-key"}, session_name="fast-session", session_timeout=300, - require_extension=False, ) + await env.start() - assert window.browser_window_id == "browser-window-123" - assert window.cloud_browser_session_id == "session-123" + assert env.session_id == "session-123" + assert env.cloud_browser_session_id == "session-123" assert len(fake_session.posts) == 1 post = fake_session.posts[0] assert post["url"].endswith( @@ -217,19 +219,47 @@ async def fail_if_client_initializes(*args, **kwargs): @pytest.mark.asyncio -async def test_initialize_cloud_browser_window_uses_domcontentloaded_for_login_navigation( +async def test_lambda_environment_exposes_downloaded_files( monkeypatch: pytest.MonkeyPatch, ) -> None: - page = AsyncMock() - client = _build_client_with_cloud_page(page) + import narada.environment as environment_module - wait_for_browser_window_id = AsyncMock(return_value="browser-window-123") + downloaded_files = [ + environment_module.SessionDownloadItem( + file_name="report.pdf", + size=42, + download_url="https://example.com/report.pdf", + ) + ] + get_downloads = AsyncMock(return_value=downloaded_files) monkeypatch.setattr( - client, "_wait_for_browser_window_id", wait_for_browser_window_id + environment_module, + "_get_cloud_browser_downloads", + get_downloads, ) - window = await client._initialize_cloud_browser_window( - config=BrowserConfig(interactive=False), + env = LambdaEnvironment(auth_headers={"x-api-key": "test-key"}) + env._session_id = "session-123" + + assert await env.get_downloaded_files() == downloaded_files + get_downloads.assert_awaited_once_with( + base_url=env._base_url, + auth_headers={"x-api-key": "test-key"}, + session_id="session-123", + ) + + +@pytest.mark.asyncio +async def test_cloud_browser_environment_uses_domcontentloaded_for_login_navigation( + monkeypatch: pytest.MonkeyPatch, +) -> None: + page = AsyncMock() + env = _build_cloud_environment_with_page(page) + + wait_for_browser_window_id = AsyncMock(return_value="browser-window-123") + monkeypatch.setattr(env, "_wait_for_browser_window_id", wait_for_browser_window_id) + + await env._initialize_cloud_browser_window( cdp_websocket_url="wss://agentcore.example.test/session-123", session_id="session-123", login_url="https://app.narada.ai/chat?customToken=test-token", @@ -246,16 +276,16 @@ async def test_initialize_cloud_browser_window_uses_domcontentloaded_for_login_n BrowserConfig(interactive=False), timeout=30_000, ) - assert window.browser_window_id == "browser-window-123" - assert window.cloud_browser_session_id == "session-123" + assert env.browser_window_id == "browser-window-123" + assert env.cloud_browser_session_id == "session-123" @pytest.mark.asyncio -async def test_initialize_cloud_browser_window_uses_domcontentloaded_for_retry_navigation( +async def test_cloud_browser_environment_uses_domcontentloaded_for_retry_navigation( monkeypatch: pytest.MonkeyPatch, ) -> None: page = AsyncMock() - client = _build_client_with_cloud_page(page) + env = _build_cloud_environment_with_page(page) wait_for_browser_window_id = AsyncMock( side_effect=[ @@ -263,12 +293,9 @@ async def test_initialize_cloud_browser_window_uses_domcontentloaded_for_retry_n "browser-window-123", ] ) - monkeypatch.setattr( - client, "_wait_for_browser_window_id", wait_for_browser_window_id - ) + monkeypatch.setattr(env, "_wait_for_browser_window_id", wait_for_browser_window_id) - window = await client._initialize_cloud_browser_window( - config=BrowserConfig(interactive=False), + await env._initialize_cloud_browser_window( cdp_websocket_url="wss://agentcore.example.test/session-123", session_id="session-123", login_url="https://app.narada.ai/chat?customToken=test-token", @@ -288,22 +315,22 @@ async def test_initialize_cloud_browser_window_uses_domcontentloaded_for_retry_n ), ] assert wait_for_browser_window_id.await_count == 2 - assert window.browser_window_id == "browser-window-123" + assert env.browser_window_id == "browser-window-123" @pytest.mark.asyncio -async def test_window_agent_exposes_workflow_trace_alias( +async def test_agent_run_exposes_workflow_trace_alias( monkeypatch: pytest.MonkeyPatch, ) -> None: workflow_trace = {"step_type": "workflow", "children": []} - window = CloudBrowserWindow( + env = RemoteBrowserEnvironment( browser_window_id="browser-window-123", - session_id="session-123", + cloud_browser_session_id="session-123", auth_headers={"x-api-key": "test-key"}, ) monkeypatch.setattr( - window, - "dispatch_request", + env, + "_dispatch_request", AsyncMock( return_value={ "requestId": "request-123", @@ -320,7 +347,7 @@ async def test_window_agent_exposes_workflow_trace_alias( ), ) - response = await window.agent(prompt="return a trace") + response = await Agent(environment=env).run("return a trace") assert response.workflow_trace == workflow_trace assert response.model_dump(by_alias=True)["workflowTrace"] == workflow_trace diff --git a/packages/narada/tests/test_input_variables.py b/packages/narada/tests/test_input_variables.py index 409a2f4..b5514c3 100644 --- a/packages/narada/tests/test_input_variables.py +++ b/packages/narada/tests/test_input_variables.py @@ -1,16 +1,15 @@ from io import BytesIO import pytest -from narada.window import BaseBrowserWindow, CloudBrowserWindow, RemoteBrowserWindow +from narada import RemoteBrowserEnvironment @pytest.mark.asyncio async def test_input_variable_files_normalize_to_current_file_variable_shape( monkeypatch: pytest.MonkeyPatch, ) -> None: - window = BaseBrowserWindow( + env = RemoteBrowserEnvironment( auth_headers={}, - base_url="https://api.example.test", browser_window_id="browser-window-123", ) upload_calls = [] @@ -19,14 +18,12 @@ async def fake_upload_file_impl(*, file): upload_calls.append(file) return {"key": "user-user-123/20260426000000000000-report.txt"} - monkeypatch.setattr(window, "_upload_file_impl", fake_upload_file_impl) + monkeypatch.setattr(env, "_upload_file_impl", fake_upload_file_impl) file_obj = BytesIO(b"hello") file_obj.name = "/tmp/report.txt" - normalized = await window._normalize_input_variables( - input_variables={"doc": file_obj} - ) + normalized = await env._normalize_input_variables(input_variables={"doc": file_obj}) assert upload_calls == [file_obj] assert normalized == { @@ -39,17 +36,13 @@ async def fake_upload_file_impl(*, file): } -def test_cloud_browser_windows_expose_session_id_for_remote_dispatch() -> None: - cloud_window = CloudBrowserWindow( - auth_headers={}, - browser_window_id="browser-window-123", - session_id="session-123", - ) - remote_window = RemoteBrowserWindow( +def test_cloud_backed_remote_environment_exposes_session_id_for_remote_dispatch() -> ( + None +): + remote_env = RemoteBrowserEnvironment( auth_headers={}, browser_window_id="browser-window-456", cloud_browser_session_id="session-456", ) - assert cloud_window.cloud_browser_session_id == "session-123" - assert remote_window.cloud_browser_session_id == "session-456" + assert remote_env.cloud_browser_session_id == "session-456" diff --git a/packages/narada/tests/test_window_human_interaction.py b/packages/narada/tests/test_window_human_interaction.py index 093b8c9..47ef453 100644 --- a/packages/narada/tests/test_window_human_interaction.py +++ b/packages/narada/tests/test_window_human_interaction.py @@ -4,7 +4,7 @@ from typing import Any import pytest -from narada.window import RemoteBrowserWindow +from narada import Agent, RemoteBrowserEnvironment from narada_core.actions.models import ( DEFAULT_HITL_TIMEOUT_SECONDS, PromptForUserInputVariable, @@ -57,10 +57,16 @@ async def test_prompt_for_user_input_uses_hitl_default_timeout( } ] ) - monkeypatch.setattr("narada.window.aiohttp.ClientSession", lambda: fake_session) - window = RemoteBrowserWindow(browser_window_id="bw-1", api_key="test-key") + monkeypatch.setattr( + "narada.environment.aiohttp.ClientSession", lambda: fake_session + ) + agent = Agent( + environment=RemoteBrowserEnvironment( + browser_window_id="bw-1", api_key="test-key" + ) + ) - values = await window.prompt_for_user_input( + values = await agent.prompt_for_user_input( step_id="input-step", variables=[ PromptForUserInputVariable(name="name", type="string", required=True), @@ -83,10 +89,16 @@ async def test_user_approval_respects_explicit_timeout( } ] ) - monkeypatch.setattr("narada.window.aiohttp.ClientSession", lambda: fake_session) - window = RemoteBrowserWindow(browser_window_id="bw-1", api_key="test-key") + monkeypatch.setattr( + "narada.environment.aiohttp.ClientSession", lambda: fake_session + ) + agent = Agent( + environment=RemoteBrowserEnvironment( + browser_window_id="bw-1", api_key="test-key" + ) + ) - approved = await window.user_approval( + approved = await agent.user_approval( step_id="approval-step", prompt_message="Proceed?", approve_label="Approve", diff --git a/uv.lock b/uv.lock index a595386..5ab4830 100644 --- a/uv.lock +++ b/uv.lock @@ -312,7 +312,7 @@ wheels = [ [[package]] name = "narada" -version = "0.1.53a7" +version = "0.2.0" source = { editable = "packages/narada" } dependencies = [ { name = "aiohttp" }, @@ -345,7 +345,7 @@ dev = [ [[package]] name = "narada-core" -version = "0.0.27" +version = "0.1.0" source = { editable = "packages/narada-core" } dependencies = [ { name = "pydantic" }, @@ -356,7 +356,7 @@ requires-dist = [{ name = "pydantic", specifier = "==2.12.5" }] [[package]] name = "narada-pyodide" -version = "0.0.59" +version = "0.1.0" source = { editable = "packages/narada-pyodide" } dependencies = [ { name = "narada-core" },