From 7ec3bfba9c9938dd1cd4f524128b29ea2063200d Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Thu, 24 Jul 2025 13:20:34 +0200 Subject: [PATCH 1/7] feat: add ScreenSwitchTool and display information retrieval - Introduced `ScreenSwitchTool` for cycling through multiple displays. - Added methods in `AgentOs` for retrieving display information and active display. - Updated `VisionAgent` to utilize the new `ScreenSwitchTool`. - Enhanced `AskUiControllerClient` with display information retrieval capabilities. --- src/askui/agent.py | 13 +++--- src/askui/tools/agent_os.py | 34 +++++++++++++- src/askui/tools/askui/askui_controller.py | 54 ++++++++++++++++++----- src/askui/tools/screen_switch_tool.py | 42 ++++++++++++++++++ 4 files changed, 125 insertions(+), 18 deletions(-) create mode 100644 src/askui/tools/screen_switch_tool.py diff --git a/src/askui/agent.py b/src/askui/agent.py index 30dbce32..eeb17a4f 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -10,15 +10,13 @@ from askui.agent_base import AgentBase from askui.container import telemetry from askui.locators.locators import Locator -from askui.models.shared.settings import ( - COMPUTER_USE_20241022_BETA_FLAG, - COMPUTER_USE_20250124_BETA_FLAG, - ActSettings, - MessageSettings, -) +from askui.models.shared.settings import (COMPUTER_USE_20241022_BETA_FLAG, + COMPUTER_USE_20250124_BETA_FLAG, + ActSettings, MessageSettings) from askui.models.shared.tools import Tool from askui.tools.computer import Computer20241022Tool, Computer20250124Tool from askui.tools.exception_tool import ExceptionTool +from askui.tools.screen_switch_tool import ScreenSwitchTool from .logger import logger from .models import ModelComposition @@ -401,6 +399,9 @@ def _get_default_settings_for_act(self, model_choice: str) -> ActSettings: @override def _get_default_tools_for_act(self, model_choice: str) -> list[Tool]: + + self._tools.append(ScreenSwitchTool(agent_os=self.tools.os)) + match model_choice: case ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: return self._tools + [Computer20241022Tool(agent_os=self.tools.os)] diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index bd845130..42e70670 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -2,7 +2,7 @@ from typing import Literal from PIL import Image -from pydantic import BaseModel +from pydantic import BaseModel, Field ModifierKey = Literal[ "command", @@ -148,6 +148,26 @@ class ClickEvent(BaseModel): timestamp: float +class SizeInPixels(BaseModel): + """Represents the size of a display in pixels.""" + + width: int + height: int + +class DisplayInformation(BaseModel): + """Contains information about a single display.""" + + display_id: int = Field(alias="displayID") + size_in_pixels: SizeInPixels = Field(alias="sizeInPixels") + + +class GetDisplayInformationResponse(BaseModel): + """Response model for display information requests.""" + + displays: list[DisplayInformation] + + + InputEvent = ClickEvent @@ -322,6 +342,18 @@ def set_display(self, display: int = 1) -> None: """ raise NotImplementedError + def get_display_information(self) -> GetDisplayInformationResponse: + """ + Get information about all available displays and virtual screen. + """ + raise NotImplementedError + + def get_active_display(self) -> int: + """ + Get the active display. + """ + raise NotImplementedError + def run_command(self, command: str, timeout_ms: int = 30000) -> None: """ Executes a shell command. diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index 7c385bbf..5a9d465f 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -7,6 +7,7 @@ from typing import Literal, Type import grpc +from google.protobuf.json_format import MessageToDict # type: ignore from PIL import Image from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict @@ -15,20 +16,17 @@ from askui.container import telemetry from askui.logger import logger from askui.reporting import Reporter -from askui.tools.agent_os import AgentOs, ModifierKey, PcKey -from askui.tools.askui.askui_ui_controller_grpc import ( - Controller_V1_pb2 as controller_v1_pbs, -) -from askui.tools.askui.askui_ui_controller_grpc import ( - Controller_V1_pb2_grpc as controller_v1, -) +from askui.tools.agent_os import (AgentOs, GetDisplayInformationResponse, + ModifierKey, PcKey) +from askui.tools.askui.askui_ui_controller_grpc import \ + Controller_V1_pb2 as controller_v1_pbs +from askui.tools.askui.askui_ui_controller_grpc import \ + Controller_V1_pb2_grpc as controller_v1 from askui.utils.image_utils import draw_point_on_image from ..utils import process_exists, wait_for_port -from .exceptions import ( - AskUiControllerOperationFailedError, - AskUiControllerOperationTimeoutError, -) +from .exceptions import (AskUiControllerOperationFailedError, + AskUiControllerOperationTimeoutError) class RemoteDeviceController(BaseModel): @@ -704,6 +702,14 @@ def set_display(self, display: int = 1) -> None: ) self._display = display + @telemetry.record_call() + @override + def get_active_display(self) -> int: + """ + Get the active display. + """ + return self._display + @telemetry.record_call(exclude={"command"}) @override def run_command(self, command: str, timeout_ms: int = 30000) -> None: @@ -724,3 +730,29 @@ def run_command(self, command: str, timeout_ms: int = 30000) -> None: ) ), ) + + @telemetry.record_call() + def get_display_information( + self, + ) -> GetDisplayInformationResponse: + """ + Get information about all available displays and virtual screen. + + Returns: + GetDisplayInformationResponse: A Pydantic model containing information + about all available displays and the virtual screen. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", "get_display_information()") + + response: controller_v1_pbs.Response_GetDisplayInformation = ( + self._stub.GetDisplayInformation(controller_v1_pbs.Request_Void()) + ) + response_dict = MessageToDict( + response, + preserving_proto_field_name=True, + ) + return GetDisplayInformationResponse.model_validate(response_dict) diff --git a/src/askui/tools/screen_switch_tool.py b/src/askui/tools/screen_switch_tool.py new file mode 100644 index 00000000..8833a5f0 --- /dev/null +++ b/src/askui/tools/screen_switch_tool.py @@ -0,0 +1,42 @@ +from askui.models.shared.tools import Tool +from askui.tools.agent_os import AgentOs, DisplayInformation + + +class ScreenSwitchTool(Tool): + """ + Tool to change the screen. + """ + + def __init__(self, agent_os: AgentOs) -> None: + # We need to determine the number of displays available to provide context to the agent + # indicating that screen switching can only be done this number of times. + displays: list[DisplayInformation] = agent_os.get_display_information().displays + + super().__init__( + name="screen_switch", + description=f""" + This tool is useful for switching between multiple displays to find information not present on the current active screen. + If more than one display is available, this tool cycles through them. + Number of displays available: {len(displays)}. + """, + ) + self._agent_os: AgentOs = agent_os + self._displays: list[DisplayInformation] = displays + + def __call__(self) -> None: + """ + Cycles to the next display if there are multiple displays. + This tool is useful to switch between multiple displays if some information is not found on the current display. + """ + if len(self._displays) <= 1: + return + + active_display_id: int = self._agent_os.get_active_display() + + current_display_index: int = next( + i for i, d in enumerate(self._displays) if d.display_id == active_display_id + ) + # if current_index is the last index, wrap around to the first index + next_index: int = (current_display_index + 1) % len(self._displays) + + self._agent_os.set_display(self._displays[next_index].display_id) From 046dc892784ace7c098f6071d8872ec03bff4eba Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Thu, 24 Jul 2025 18:14:09 +0200 Subject: [PATCH 2/7] feat: revert removed code --- src/askui/tools/askui/askui_controller.py | 503 ++++++++++++++++++++++ 1 file changed, 503 insertions(+) diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index 80ff2805..c559dcd6 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -768,3 +768,506 @@ def get_display_information( preserving_proto_field_name=True, ) return GetDisplayInformationResponse.model_validate(response_dict) + +@telemetry.record_call() + def get_process_list( + self, get_extended_info: bool = False + ) -> controller_v1_pbs.Response_GetProcessList: + """ + Get a list of running processes. + Args: + get_extended_info (bool, optional): Whether to include + extended process information. + Defaults to `False`. + Returns: + controller_v1_pbs.Response_GetProcessList: Process list response containing: + - processes: List of ProcessInfo objects + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f"get_process_list({get_extended_info})") + + response: controller_v1_pbs.Response_GetProcessList = self._stub.GetProcessList( + controller_v1_pbs.Request_GetProcessList(getExtendedInfo=get_extended_info) + ) + + return response + + @telemetry.record_call() + def get_window_list( + self, process_id: int + ) -> controller_v1_pbs.Response_GetWindowList: + """ + Get a list of windows for a specific process. + Args: + process_id (int): The ID of the process to get windows for. + Returns: + controller_v1_pbs.Response_GetWindowList: Window list response containing: + - windows: List of WindowInfo objects with ID and name + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f"get_window_list({process_id})") + + response: controller_v1_pbs.Response_GetWindowList = self._stub.GetWindowList( + controller_v1_pbs.Request_GetWindowList(processID=process_id) + ) + + return response + + @telemetry.record_call() + def get_automation_target_list( + self, + ) -> controller_v1_pbs.Response_GetAutomationTargetList: + """ + Get a list of available automation targets. + Returns: + controller_v1_pbs.Response_GetAutomationTargetList: + Automation target list response: + - targets: List of AutomationTarget objects + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", "get_automation_target_list()") + + response: controller_v1_pbs.Response_GetAutomationTargetList = ( + self._stub.GetAutomationTargetList(controller_v1_pbs.Request_Void()) + ) + + return response + + @telemetry.record_call() + def set_mouse_delay(self, delay_ms: int) -> None: + """ + Configure mouse action delay. + Args: + delay_ms (int): The delay in milliseconds to set for mouse actions. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f"set_mouse_delay({delay_ms})") + + self._stub.SetMouseDelay( + controller_v1_pbs.Request_SetMouseDelay( + sessionInfo=self._session_info, delayInMilliseconds=delay_ms + ) + ) + + @telemetry.record_call() + def set_keyboard_delay(self, delay_ms: int) -> None: + """ + Configure keyboard action delay. + Args: + delay_ms (int): The delay in milliseconds to set for keyboard actions. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f"set_keyboard_delay({delay_ms})") + + self._stub.SetKeyboardDelay( + controller_v1_pbs.Request_SetKeyboardDelay( + sessionInfo=self._session_info, delayInMilliseconds=delay_ms + ) + ) + + @telemetry.record_call() + def set_active_window(self, process_id: int, window_id: int) -> None: + """ + Set the active window for automation. + Args: + process_id (int): The ID of the process that owns the window. + window_id (int): The ID of the window to set as active. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message( + "AgentOS", f"set_active_window({process_id}, {window_id})" + ) + + self._stub.SetActiveWindow( + controller_v1_pbs.Request_SetActiveWindow( + processID=process_id, windowID=window_id + ) + ) + + @telemetry.record_call() + def set_active_automation_target(self, target_id: int) -> None: + """ + Set the active automation target. + Args: + target_id (int): The ID of the automation target to set as active. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message( + "AgentOS", f"set_active_automation_target({target_id})" + ) + + self._stub.SetActiveAutomationTarget( + controller_v1_pbs.Request_SetActiveAutomationTarget(ID=target_id) + ) + + @telemetry.record_call() + def schedule_batched_action( + self, + action_class_id: controller_v1_pbs.ActionClassID, + action_parameters: controller_v1_pbs.ActionParameters, + ) -> controller_v1_pbs.Response_ScheduleBatchedAction: + """ + Schedule an action for batch execution. + Args: + action_class_id (controller_v1_pbs.ActionClassID): The class ID + of the action to schedule. + action_parameters (controller_v1_pbs.ActionParameters): + Parameters for the action. + Returns: + controller_v1_pbs.Response_ScheduleBatchedAction: Response containing + the scheduled action ID. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message( + "AgentOS", + f"schedule_batched_action({action_class_id}, {action_parameters})", + ) + + response: controller_v1_pbs.Response_ScheduleBatchedAction = ( + self._stub.ScheduleBatchedAction( + controller_v1_pbs.Request_ScheduleBatchedAction( + sessionInfo=self._session_info, + actionClassID=action_class_id, + actionParameters=action_parameters, + ) + ) + ) + + return response + + @telemetry.record_call() + def start_batch_run(self) -> None: + """ + Start executing batched actions. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", "start_batch_run()") + + self._stub.StartBatchRun( + controller_v1_pbs.Request_StartBatchRun(sessionInfo=self._session_info) + ) + + @telemetry.record_call() + def stop_batch_run(self) -> None: + """ + Stop executing batched actions. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", "stop_batch_run()") + + self._stub.StopBatchRun( + controller_v1_pbs.Request_StopBatchRun(sessionInfo=self._session_info) + ) + + @telemetry.record_call() + def get_action_count(self) -> controller_v1_pbs.Response_GetActionCount: + """ + Get the count of recorded or batched actions. + Returns: + controller_v1_pbs.Response_GetActionCount: Response + containing the action count. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", "get_action_count()") + + response: controller_v1_pbs.Response_GetActionCount = self._stub.GetActionCount( + controller_v1_pbs.Request_GetActionCount(sessionInfo=self._session_info) + ) + + return response + + @telemetry.record_call() + def get_action(self, action_index: int) -> controller_v1_pbs.Response_GetAction: + """ + Get a specific action by its index. + Args: + action_index (int): The index of the action to retrieve. + Returns: + controller_v1_pbs.Response_GetAction: Action information containing: + - actionID: The action ID + - actionClassID: The action class ID + - actionParameters: The action parameters + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f"get_action({action_index})") + + response: controller_v1_pbs.Response_GetAction = self._stub.GetAction( + controller_v1_pbs.Request_GetAction( + sessionInfo=self._session_info, actionIndex=action_index + ) + ) + + return response + + @telemetry.record_call() + def remove_action(self, action_id: int) -> None: + """ + Remove a specific action by its ID. + Args: + action_id (int): The ID of the action to remove. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f"remove_action({action_id})") + + self._stub.RemoveAction( + controller_v1_pbs.Request_RemoveAction( + sessionInfo=self._session_info, actionID=action_id + ) + ) + + @telemetry.record_call() + def remove_all_actions(self) -> None: + """ + Clear all recorded or batched actions. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", "remove_all_actions()") + + self._stub.RemoveAllActions( + controller_v1_pbs.Request_RemoveAllActions(sessionInfo=self._session_info) + ) + + def _send_message(self, message: str) -> controller_v1_pbs.Response_Send: + """ + Send a general message to the controller. + Args: + message (str): The message to send to the controller. + Returns: + controller_v1_pbs.Response_Send: Response containing + the message from the controller. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + + self._reporter.add_message("AgentOS", f'send_message("{message}")') + + response: controller_v1_pbs.Response_Send = self._stub.Send( + controller_v1_pbs.Request_Send(message=message) + ) + + return response + + @telemetry.record_call() + def get_mouse_position(self) -> Coordinate: + """ + Get the mouse cursor position + Returns: + Coordinate: Response containing the result of the mouse position change. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + req_json = create_get_mouse_position_command( + self._session_guid + ).model_dump_json(exclude_unset=True) + self._reporter.add_message("AgentOS", "get_mouse_position()") + res = self._send_message(req_json) + parsed_res = AskuiAgentosSendResponseSchema.model_validate_json(res.message) + return Coordinate( + x=parsed_res.message.command.response.position.x.root, # type: ignore[union-attr] + y=parsed_res.message.command.response.position.y.root, # type: ignore[union-attr] + ) + + @telemetry.record_call() + def set_mouse_position(self, x: int, y: int) -> None: + """ + Set the mouse cursor position to specific coordinates. + Args: + x (int): The horizontal coordinate (in pixels) to set the cursor to. + y (int): The vertical coordinate (in pixels) to set the cursor to. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + req_json = create_set_mouse_position_command( + x, y, self._session_guid + ).model_dump_json(exclude_unset=True) + self._reporter.add_message("AgentOS", f"set_mouse_position({x},{y})") + self._send_message(req_json) + + @telemetry.record_call() + def render_quad(self, style: RenderObjectStyle) -> int: + """ + Render a quad object to the display. + Args: + style (RenderObjectStyle): The style properties for the quad. + Returns: + int: Object ID. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", f"render_quad({style})") + req_json = create_quad_command(style, self._session_guid).model_dump_json( + exclude_unset=True, by_alias=True + ) + res = self._send_message(req_json) + parsed_response = AskuiAgentosSendResponseSchema.model_validate_json( + res.message + ) + return int(parsed_response.message.command.response.id.root) # type: ignore[union-attr] + + @telemetry.record_call() + def render_line(self, style: RenderObjectStyle, points: list[Coordinate]) -> int: + """ + Render a line object to the display. + Args: + style (RenderObjectStyle): The style properties for the line. + points (list[Coordinates]): The points defining the line. + Returns: + int: Object ID. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", f"render_line({style}, {points})") + req = create_line_command(style, points, self._session_guid).model_dump_json( + exclude_unset=True, by_alias=True + ) + res = self._send_message(req) + parsed_response = AskuiAgentosSendResponseSchema.model_validate_json( + res.message + ) + return int(parsed_response.message.command.response.id.root) # type: ignore[union-attr] + + @telemetry.record_call(exclude={"image_data"}) + def render_image(self, style: RenderObjectStyle, image_data: str) -> int: + """ + Render an image object to the display. + Args: + style (RenderObjectStyle): The style properties for the image. + image_data (str): The base64-encoded image data. + Returns: + int: Object ID. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", f"render_image({style}, [image_data])") + req = create_image_command( + style, image_data, self._session_guid + ).model_dump_json(exclude_unset=True, by_alias=True) + res = self._send_message(req) + + parsed_response = AskuiAgentosSendResponseSchema.model_validate_json( + res.message + ) + return int(parsed_response.message.command.response.id.root) # type: ignore[union-attr] + + @telemetry.record_call() + def render_text(self, style: RenderObjectStyle, content: str) -> int: + """ + Render a text object to the display. + Args: + style (RenderObjectStyle): The style properties for the text. + content (str): The text content to display. + Returns: + int: Object ID. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", f"render_text({style}, {content})") + + req = create_text_command(style, content, self._session_guid).model_dump_json( + exclude_unset=True, by_alias=True + ) + res = self._send_message(req) + parsed_response = AskuiAgentosSendResponseSchema.model_validate_json( + res.message + ) + return int(parsed_response.message.command.response.id.root) # type: ignore[union-attr] + + @telemetry.record_call() + def update_render_object(self, object_id: int, style: RenderObjectStyle) -> None: + """ + Update styling properties of an existing render object. + Args: + object_id (float): The ID of the render object to update. + style (RenderObjectStyle): The new style properties. + Returns: + int: Object ID. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message( + "AgentOS", f"update_render_object({object_id}, {style})" + ) + req = create_update_render_object_command( + object_id, style, self._session_guid + ).model_dump_json(exclude_unset=True, by_alias=True) + self._send_message(req) + + @telemetry.record_call() + def delete_render_object(self, object_id: int) -> None: + """ + Delete an existing render object from the display. + Args: + object_id (RenderObjectId): The ID of the render object to delete. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", f"delete_render_object({object_id})") + req = create_delete_render_object_command( + object_id, self._session_guid + ).model_dump_json(exclude_unset=True, by_alias=True) + self._send_message(req) + + @telemetry.record_call() + def clear_render_objects(self) -> None: + """ + Clear all render objects from the display. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", "clear_render_objects()") + req = create_clear_render_objects_command(self._session_guid).model_dump_json( + exclude_unset=True, by_alias=True + ) + self._send_message(req) From 9ae1b0263bec6429b21b806445ed8479d1f08eec Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Thu, 24 Jul 2025 18:15:25 +0200 Subject: [PATCH 3/7] fix: add missing import for Coordinate and adjust telemetry decorator indentation - Added import for `Coordinate` in `askui_controller.py`. - Adjusted the indentation of the `@telemetry.record_call()` decorator for the `get_process_list` method. --- src/askui/tools/askui/askui_controller.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index c559dcd6..c7d95961 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -16,8 +16,9 @@ from askui.container import telemetry from askui.logger import logger from askui.reporting import Reporter -from askui.tools.agent_os import (AgentOs, GetDisplayInformationResponse, - ModifierKey, PcKey) +from askui.tools.agent_os import (AgentOs, Coordinate, + GetDisplayInformationResponse, ModifierKey, + PcKey) from askui.tools.askui.askui_ui_controller_grpc.generated import \ Controller_V1_pb2 as controller_v1_pbs from askui.tools.askui.askui_ui_controller_grpc.generated import \ @@ -769,7 +770,7 @@ def get_display_information( ) return GetDisplayInformationResponse.model_validate(response_dict) -@telemetry.record_call() + @telemetry.record_call() def get_process_list( self, get_extended_info: bool = False ) -> controller_v1_pbs.Response_GetProcessList: From daa65ef932ca045b8fe003b88a0c391962c3445d Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Thu, 24 Jul 2025 19:05:03 +0200 Subject: [PATCH 4/7] refactor: clean up import statements and formatting - Reformatted import statements in `agent.py`, `agent_os.py`, and `askui_controller.py` for improved readability. - Removed unnecessary blank lines to maintain consistent formatting across files. --- src/askui/agent.py | 10 +++-- src/askui/tools/agent_os.py | 7 +++- src/askui/tools/askui/askui_controller.py | 51 +++++++++++++++-------- 3 files changed, 44 insertions(+), 24 deletions(-) diff --git a/src/askui/agent.py b/src/askui/agent.py index eeb17a4f..3600257d 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -10,9 +10,12 @@ from askui.agent_base import AgentBase from askui.container import telemetry from askui.locators.locators import Locator -from askui.models.shared.settings import (COMPUTER_USE_20241022_BETA_FLAG, - COMPUTER_USE_20250124_BETA_FLAG, - ActSettings, MessageSettings) +from askui.models.shared.settings import ( + COMPUTER_USE_20241022_BETA_FLAG, + COMPUTER_USE_20250124_BETA_FLAG, + ActSettings, + MessageSettings, +) from askui.models.shared.tools import Tool from askui.tools.computer import Computer20241022Tool, Computer20250124Tool from askui.tools.exception_tool import ExceptionTool @@ -399,7 +402,6 @@ def _get_default_settings_for_act(self, model_choice: str) -> ActSettings: @override def _get_default_tools_for_act(self, model_choice: str) -> list[Tool]: - self._tools.append(ScreenSwitchTool(agent_os=self.tools.os)) match model_choice: diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index 45166c9f..84a54c88 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -5,8 +5,9 @@ from pydantic import BaseModel, Field if TYPE_CHECKING: - from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import \ - RenderObjectStyle # noqa: E501 + from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( + RenderObjectStyle, + ) # noqa: E501 ModifierKey = Literal[ @@ -159,6 +160,7 @@ class SizeInPixels(BaseModel): width: int height: int + class DisplayInformation(BaseModel): """Contains information about a single display.""" @@ -171,6 +173,7 @@ class GetDisplayInformationResponse(BaseModel): displays: list[DisplayInformation] + class Coordinate(BaseModel): x: int y: int diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index c7d95961..9c353bcd 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -16,28 +16,43 @@ from askui.container import telemetry from askui.logger import logger from askui.reporting import Reporter -from askui.tools.agent_os import (AgentOs, Coordinate, - GetDisplayInformationResponse, ModifierKey, - PcKey) -from askui.tools.askui.askui_ui_controller_grpc.generated import \ - Controller_V1_pb2 as controller_v1_pbs -from askui.tools.askui.askui_ui_controller_grpc.generated import \ - Controller_V1_pb2_grpc as controller_v1 -from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import \ - RenderObjectStyle # noqa: E501 -from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Response_2501 import \ - AskuiAgentosSendResponseSchema # noqa: E501 +from askui.tools.agent_os import ( + AgentOs, + Coordinate, + GetDisplayInformationResponse, + ModifierKey, + PcKey, +) +from askui.tools.askui.askui_ui_controller_grpc.generated import ( + Controller_V1_pb2 as controller_v1_pbs, +) +from askui.tools.askui.askui_ui_controller_grpc.generated import ( + Controller_V1_pb2_grpc as controller_v1, +) +from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( + RenderObjectStyle, +) # noqa: E501 +from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Response_2501 import ( + AskuiAgentosSendResponseSchema, +) # noqa: E501 from askui.tools.askui.command_helpers import ( - create_clear_render_objects_command, create_delete_render_object_command, - create_get_mouse_position_command, create_image_command, - create_line_command, create_quad_command, - create_set_mouse_position_command, create_text_command, - create_update_render_object_command) + create_clear_render_objects_command, + create_delete_render_object_command, + create_get_mouse_position_command, + create_image_command, + create_line_command, + create_quad_command, + create_set_mouse_position_command, + create_text_command, + create_update_render_object_command, +) from askui.utils.image_utils import draw_point_on_image from ..utils import process_exists, wait_for_port -from .exceptions import (AskUiControllerOperationFailedError, - AskUiControllerOperationTimeoutError) +from .exceptions import ( + AskUiControllerOperationFailedError, + AskUiControllerOperationTimeoutError, +) class RemoteDeviceController(BaseModel): From 84221ce2f409251fb6534f9fbd6a263703b26fd2 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Thu, 24 Jul 2025 19:17:17 +0200 Subject: [PATCH 5/7] refactor: improve code formatting and readability - Reformatted comments and docstrings in `screen_switch_tool.py` for better clarity. - Cleaned up import statements in `askui_controller.py` for improved organization and readability. --- src/askui/tools/askui/askui_controller.py | 2 +- src/askui/tools/screen_switch_tool.py | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index 9c353bcd..af83c793 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -7,7 +7,7 @@ from typing import Literal, Type import grpc -from google.protobuf.json_format import MessageToDict # type: ignore +from google.protobuf.json_format import MessageToDict from PIL import Image from pydantic import BaseModel, Field, model_validator from pydantic_settings import BaseSettings, SettingsConfigDict diff --git a/src/askui/tools/screen_switch_tool.py b/src/askui/tools/screen_switch_tool.py index 8833a5f0..23957a64 100644 --- a/src/askui/tools/screen_switch_tool.py +++ b/src/askui/tools/screen_switch_tool.py @@ -8,14 +8,16 @@ class ScreenSwitchTool(Tool): """ def __init__(self, agent_os: AgentOs) -> None: - # We need to determine the number of displays available to provide context to the agent - # indicating that screen switching can only be done this number of times. + # We need to determine the number of displays available to provide context + # to the agent indicating that screen switching can only be done this number + # of times. displays: list[DisplayInformation] = agent_os.get_display_information().displays super().__init__( name="screen_switch", description=f""" - This tool is useful for switching between multiple displays to find information not present on the current active screen. + This tool is useful for switching between multiple displays to find + information not present on the current active screen. If more than one display is available, this tool cycles through them. Number of displays available: {len(displays)}. """, @@ -26,7 +28,8 @@ def __init__(self, agent_os: AgentOs) -> None: def __call__(self) -> None: """ Cycles to the next display if there are multiple displays. - This tool is useful to switch between multiple displays if some information is not found on the current display. + This tool is useful to switch between multiple displays if some information is + not found on the current display. """ if len(self._displays) <= 1: return From e54f95f87a109dee84b0cbaa195d0da9397c6010 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui Date: Thu, 24 Jul 2025 19:32:29 +0200 Subject: [PATCH 6/7] refactor: clean up import statements and formatting - Improved formatting of import statements in `agent_os.py` and `askui_controller.py` for better readability. - Removed unnecessary comments to enhance code clarity. --- src/askui/tools/agent_os.py | 4 ++-- src/askui/tools/askui/askui_controller.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index 84a54c88..a566bcd5 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -5,9 +5,9 @@ from pydantic import BaseModel, Field if TYPE_CHECKING: - from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( + from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( # noqa: E501 RenderObjectStyle, - ) # noqa: E501 + ) ModifierKey = Literal[ diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index af83c793..1010a232 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -29,12 +29,12 @@ from askui.tools.askui.askui_ui_controller_grpc.generated import ( Controller_V1_pb2_grpc as controller_v1, ) -from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( +from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Request_2501 import ( # noqa: E501 RenderObjectStyle, -) # noqa: E501 -from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Response_2501 import ( +) +from askui.tools.askui.askui_ui_controller_grpc.generated.AgentOS_Send_Response_2501 import ( # noqa: E501 AskuiAgentosSendResponseSchema, -) # noqa: E501 +) from askui.tools.askui.command_helpers import ( create_clear_render_objects_command, create_delete_render_object_command, From d1a44dabf17ad330bf0e7d76a200746c727b67e1 Mon Sep 17 00:00:00 2001 From: danyalxahid-askui <150019098+danyalxahid-askui@users.noreply.github.com> Date: Fri, 25 Jul 2025 10:48:07 +0200 Subject: [PATCH 7/7] Update src/askui/tools/agent_os.py Co-authored-by: adi-wan-askui <105295410+adi-wan-askui@users.noreply.github.com> --- src/askui/tools/agent_os.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index a566bcd5..e60353f6 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -164,8 +164,8 @@ class SizeInPixels(BaseModel): class DisplayInformation(BaseModel): """Contains information about a single display.""" - display_id: int = Field(alias="displayID") - size_in_pixels: SizeInPixels = Field(alias="sizeInPixels") + display_id: int = Field(validation_alias="displayID") + size_in_pixels: SizeInPixels = Field(validation_alias="sizeInPixels") class GetDisplayInformationResponse(BaseModel):