From 098f96c8ce1a9470af6bdaee667a6c40d04eb852 Mon Sep 17 00:00:00 2001 From: Adrian Stritzinger Date: Fri, 12 Sep 2025 09:04:04 +0200 Subject: [PATCH] fix(tools): computer tool - tool was used differently by chat (through mcp) and agent (through act) as it was once used as regular tool and once as special computer tool -> use it in both places as special computer tool --- src/askui/chat/api/mcp_servers/computer.py | 20 +++++++++++-- src/askui/chat/api/runs/runner/runner.py | 2 ++ src/askui/models/shared/tools.py | 34 +++++++++++++++------- src/askui/tools/computer.py | 4 ++- 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/askui/chat/api/mcp_servers/computer.py b/src/askui/chat/api/mcp_servers/computer.py index 220920d4..251dfed0 100644 --- a/src/askui/chat/api/mcp_servers/computer.py +++ b/src/askui/chat/api/mcp_servers/computer.py @@ -5,12 +5,19 @@ from PIL import Image as PILImage from pydantic import BaseModel, Field +from askui.models.shared.settings import COMPUTER_USE_20250124_BETA_FLAG from askui.tools.askui.askui_controller import AskUiControllerClient -from askui.tools.computer import Action20250124, Computer20250124Tool, ScrollDirection +from askui.tools.computer import ( + RESOLUTIONS_RECOMMENDED_BY_ANTHROPIC, + Action20250124, + Computer20250124Tool, + ScrollDirection, +) from askui.utils.image_utils import ImageSource mcp = FastMCP(name="AskUI Computer MCP") +RESOLUTION = RESOLUTIONS_RECOMMENDED_BY_ANTHROPIC["WXGA"] active_display = 1 @@ -18,6 +25,15 @@ @mcp.tool( description="Interact with your computer", tags={"computer"}, + meta={ + "betas": [COMPUTER_USE_20250124_BETA_FLAG], + "params": { + "name": "computer", + "display_width_px": RESOLUTION.width, + "display_height_px": RESOLUTION.height, + "type": "computer_20250124", + }, + }, ) def computer( action: Action20250124, @@ -30,7 +46,7 @@ def computer( key: str | None = None, ) -> Image | None | str: with AskUiControllerClient(display=active_display) as agent_os: - result = Computer20250124Tool(agent_os=agent_os)( + result = Computer20250124Tool(agent_os=agent_os, resolution=RESOLUTION)( action=action, text=text, coordinate=coordinate, diff --git a/src/askui/chat/api/runs/runner/runner.py b/src/askui/chat/api/runs/runner/runner.py index 7407b666..8fd001d3 100644 --- a/src/askui/chat/api/runs/runner/runner.py +++ b/src/askui/chat/api/runs/runner/runner.py @@ -162,6 +162,7 @@ def _run_agent_inner() -> None: # Remove this after having extracted tools into Android MCP if self._run.assistant_id == ANDROID_AGENT.id: tools.append_tool(*_get_android_tools()) + betas = tools.retrieve_tool_beta_flags() custom_agent = CustomAgent() custom_agent.act( messages, @@ -170,6 +171,7 @@ def _run_agent_inner() -> None: tools=tools, settings=ActSettings( messages=MessageSettings( + betas=betas, model=ModelName.CLAUDE__SONNET__4__20250514, system=self._build_system(), thinking={"type": "enabled", "budget_tokens": 2048}, diff --git a/src/askui/models/shared/tools.py b/src/askui/models/shared/tools.py index 634f1488..4dc4bf37 100644 --- a/src/askui/models/shared/tools.py +++ b/src/askui/models/shared/tools.py @@ -2,7 +2,7 @@ import types from abc import ABC, abstractmethod from datetime import timedelta -from typing import Any, Literal, Protocol, Type, cast +from typing import Any, Literal, Protocol, Type import jsonref import mcp @@ -202,6 +202,18 @@ def __init__( self._mcp_client = mcp_client self._include = include + def retrieve_tool_beta_flags(self) -> list[str]: + result: set[str] = set() + for tool in self._get_mcp_tools().values(): + beta_flags = (tool.meta or {}).get("betas", []) + if not isinstance(beta_flags, list): + continue + for beta_flag in beta_flags: + if not isinstance(beta_flag, str): + continue + result.add(beta_flag) + return list(result) + def to_params(self) -> list[BetaToolUnionParam]: tool_map = { **self._get_mcp_tool_params(), @@ -221,17 +233,17 @@ def _get_mcp_tool_params(self) -> dict[str, BetaToolUnionParam]: if not self._mcp_client: return {} mcp_tools = self._get_mcp_tools() - return { - tool_name: cast( - "BetaToolUnionParam", - BetaToolParam( - name=tool_name, - description=tool.description or "", - input_schema=_replace_refs(tool_name, tool.inputSchema), - ), + result: dict[str, BetaToolUnionParam] = {} + for tool_name, tool in mcp_tools.items(): + if params := (tool.meta or {}).get("params"): + # validation missing + result[tool_name] = params + result[tool_name] = BetaToolParam( + name=tool_name, + description=tool.description or "", + input_schema=_replace_refs(tool_name, tool.inputSchema), ) - for tool_name, tool in mcp_tools.items() - } + return result def append_tool(self, *tools: Tool) -> "Self": """Append a tool to the collection.""" diff --git a/src/askui/tools/computer.py b/src/askui/tools/computer.py index ede19fa4..3815f727 100644 --- a/src/askui/tools/computer.py +++ b/src/askui/tools/computer.py @@ -407,6 +407,7 @@ class Computer20250124Tool(ComputerToolBase): def __init__( self, agent_os: AgentOs, + resolution: Resolution | None = None, ) -> None: super().__init__( agent_os=agent_os, @@ -437,6 +438,7 @@ def __init__( }, "required": ["action"], }, + resolution=resolution, ) @override @@ -465,7 +467,7 @@ def __call__( # noqa: C901 case "hold_key": self._hold_key(keystroke=text, duration=duration) # type: ignore[arg-type] case "key": - return super().__call__(action, key, coordinate) + return super().__call__(action, text, coordinate) case "left_mouse_down": self._agent_os.mouse_down("left") case "left_mouse_up":