diff --git a/src/askui/tools/computer.py b/src/askui/tools/computer.py index 8289b591..a97bb3cb 100644 --- a/src/askui/tools/computer.py +++ b/src/askui/tools/computer.py @@ -12,8 +12,12 @@ from pydantic import Field, validate_call from typing_extensions import Self, override -from askui.tools.agent_os import AgentOs, ModifierKey, PcKey -from askui.utils.image_utils import scale_coordinates_back, scale_image_with_padding +from askui.tools.agent_os import AgentOs, Coordinate, ModifierKey, PcKey +from askui.utils.image_utils import ( + scale_coordinates_back, + scale_coordinates_with_padding, + scale_image_with_padding, +) from ..models.shared.tools import InputSchema, Tool @@ -223,10 +227,10 @@ def __call__( # noqa: C901 text: str | None = None, coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]] | None = None, - ) -> Image.Image | None: + ) -> Image.Image | None | str: match action: case "cursor_position": - raise ActionNotImplementedError(action, self.name) + return self._get_mouse_position_scaled() case "double_click": return self._agent_os.click("left", 2) case "key": @@ -325,6 +329,20 @@ def _screenshot(self) -> Image.Image: self._real_screen_height = screenshot.height return scale_image_with_padding(screenshot, self._width, self._height) + def _get_mouse_position_scaled(self) -> str: + mouse_position: Coordinate = self._agent_os.get_mouse_position() + real_screen_width, real_screen_height = self._get_real_screen_resolution() + x, y = scale_coordinates_with_padding( + mouse_position.x, + mouse_position.y, + real_screen_width, + real_screen_height, + self._width, + self._height, + ) + + return f"X={x},Y={y}" + class Computer20241022Tool(ComputerToolBase): type: Literal["computer_20241022"] = "computer_20241022" @@ -426,7 +444,7 @@ def __call__( # noqa: C901 scroll_amount: Annotated[int, Field(ge=0)] | None = None, duration: Annotated[float, Field(ge=0.0, le=100.0)] | None = None, key: str | None = None, # maybe not all keys supported - ) -> Image.Image | None: + ) -> Image.Image | None | str: match action: case "hold_key": self._hold_key(keystroke=text, duration=duration) # type: ignore[arg-type] diff --git a/src/askui/utils/image_utils.py b/src/askui/utils/image_utils.py index c2647f76..f3a60a5b 100644 --- a/src/askui/utils/image_utils.py +++ b/src/askui/utils/image_utils.py @@ -190,6 +190,66 @@ def scale_image_with_padding( ) +def scale_coordinates_with_padding( + x: float, + y: float, + original_width: int, + original_height: int, + max_width: int, + max_height: int, +) -> Tuple[float, float]: + """ + Scale coordinates from an original coordinate system to a scaled and padded coordinate system. + + """ + scale_factor, scaled_width, scaled_height = _calculate_aspect_fit_scaling( + original_width, original_height, max_width, max_height + ) + + pad_left = (max_width - scaled_width) // 2 + pad_top = (max_height - scaled_height) // 2 + + scaled_x = x * scale_factor + pad_left + scaled_y = y * scale_factor + pad_top + + if scaled_x < 0 or scaled_y < 0 or scaled_x > max_width or scaled_y > max_height: + error_msg = "Coordinates are outside the padded image area" + raise ValueError(error_msg) + return scaled_x, scaled_y + + +def _calculate_aspect_fit_scaling( + original_width: int, + original_height: int, + max_width: int, + max_height: int, +) -> Tuple[float, float, float]: + """Calculate the scale factors for an image to fit within specified dimensions while maintaining aspect ratio. + + Args: + original_width (int): The width of the original coordinate system. + original_height (int): The height of the original coordinate system. + max_width (int): The maximum width of the output scaled coordinate system. + max_height (int): The maximum height of the output scaled coordinate system. + + Returns: + Tuple[float, float, float]: A tuple of (scale_factor, scaled_width, scaled_height). + + """ + + aspect_ratio = original_width / original_height + if (max_width / max_height) > aspect_ratio: + scale_factor = max_height / original_height + scaled_width = int(original_width * scale_factor) + scaled_height = max_height + else: + scale_factor = max_width / original_width + scaled_width = max_width + scaled_height = int(original_height * scale_factor) + + return scale_factor, scaled_width, scaled_height + + def scale_coordinates_back( x: float, y: float, @@ -214,15 +274,10 @@ def scale_coordinates_back( Raises: ValueError: If the coordinates are outside the padded image area. """ - aspect_ratio = original_width / original_height - if (max_width / max_height) > aspect_ratio: - scale_factor = max_height / original_height - scaled_width = int(original_width * scale_factor) - scaled_height = max_height - else: - scale_factor = max_width / original_width - scaled_width = max_width - scaled_height = int(original_height * scale_factor) + scale_factor, scaled_width, scaled_height = _calculate_aspect_fit_scaling( + original_width, original_height, max_width, max_height + ) + pad_left = (max_width - scaled_width) // 2 pad_top = (max_height - scaled_height) // 2 adjusted_x = x - pad_left