diff --git a/src/askui/chat/api/assistants/seeds.py b/src/askui/chat/api/assistants/seeds.py index 2c14ea5a..f710cc4d 100644 --- a/src/askui/chat/api/assistants/seeds.py +++ b/src/askui/chat/api/assistants/seeds.py @@ -19,6 +19,7 @@ "list_displays", "set_active_display", "retrieve_active_display", + "computer_connect", ], ) diff --git a/src/askui/chat/api/mcp_servers/computer.py b/src/askui/chat/api/mcp_servers/computer.py index 251dfed0..4ff2d8cb 100644 --- a/src/askui/chat/api/mcp_servers/computer.py +++ b/src/askui/chat/api/mcp_servers/computer.py @@ -19,7 +19,8 @@ RESOLUTION = RESOLUTIONS_RECOMMENDED_BY_ANTHROPIC["WXGA"] -active_display = 1 + +AGENT_OS = AskUiControllerClient() @mcp.tool( @@ -45,20 +46,19 @@ def computer( duration: Annotated[float, Field(ge=0.0, le=100.0)] | None = None, key: str | None = None, ) -> Image | None | str: - with AskUiControllerClient(display=active_display) as agent_os: - result = Computer20250124Tool(agent_os=agent_os, resolution=RESOLUTION)( - action=action, - text=text, - coordinate=coordinate, - scroll_direction=scroll_direction, - scroll_amount=scroll_amount, - duration=duration, - key=key, - ) - if isinstance(result, PILImage.Image): - src = ImageSource(result) - return Image(data=src.to_bytes(), format="png") - return result + result = Computer20250124Tool(agent_os=AGENT_OS, resolution=RESOLUTION)( + action=action, + text=text, + coordinate=coordinate, + scroll_direction=scroll_direction, + scroll_amount=scroll_amount, + duration=duration, + key=key, + ) + if isinstance(result, PILImage.Image): + src = ImageSource(result) + return Image(data=src.to_bytes(), format="png") + return result class Display(BaseModel): @@ -71,10 +71,9 @@ class DisplayListResponse(BaseModel): @mcp.tool(description="List all available displays", tags={"computer"}) def list_displays() -> DisplayListResponse: - with AskUiControllerClient(display=active_display) as agent_os: - return DisplayListResponse( - data=[Display(id=display.id) for display in agent_os.list_displays().data], - ) + return DisplayListResponse( + data=[Display(id=display.id) for display in AGENT_OS.list_displays().data], + ) @mcp.tool( @@ -84,8 +83,7 @@ def list_displays() -> DisplayListResponse: def set_active_display( display_id: Annotated[int, Field(ge=1)], ) -> None: - global active_display - active_display = display_id + AGENT_OS.set_display(display_id) @mcp.tool( @@ -93,4 +91,13 @@ def set_active_display( tags={"computer"}, ) def retrieve_active_display() -> Display: - return Display(id=active_display) + return Display(id=AGENT_OS.retrieve_active_display().id) + + +@mcp.tool( + description="Connect to the computer", + tags={"computer"}, +) +def computer_connect() -> str: + AGENT_OS.connect() + return "Agent OS connected" diff --git a/src/askui/chat/api/runs/runner/runner.py b/src/askui/chat/api/runs/runner/runner.py index 5f5b6a61..1790d493 100644 --- a/src/askui/chat/api/runs/runner/runner.py +++ b/src/askui/chat/api/runs/runner/runner.py @@ -7,7 +7,6 @@ from asyncer import asyncify, syncify from askui.chat.api.assistants.models import Assistant -from askui.chat.api.assistants.seeds import ANDROID_AGENT from askui.chat.api.mcp_clients.manager import McpClientManagerManager from askui.chat.api.messages.chat_history_manager import ChatHistoryManager from askui.chat.api.models import RunId, ThreadId, WorkspaceId @@ -26,7 +25,7 @@ from askui.models.shared.agent_message_param import MessageParam from askui.models.shared.agent_on_message_cb import OnMessageCbParam from askui.models.shared.settings import ActSettings, MessageSettings -from askui.models.shared.tools import Tool, ToolCollection +from askui.models.shared.tools import ToolCollection logger = logging.getLogger(__name__) diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index 28a2d1a5..f2d01b7e 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -174,6 +174,9 @@ def connect(self) -> None: This method starts the controller server, establishes a gRPC channel, creates a session, and sets up the initial display. """ + if self._session_info is not None: + msg = "AskUI Controller already connected" + raise RuntimeError(msg) self._controller_server.start() self._channel = grpc.insecure_channel( "localhost:23000",