Skip to content
28 changes: 23 additions & 5 deletions src/askui/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
from pydantic import Field, validate_call
from typing_extensions import Self, override

from askui.tools.agent_os import AgentOs, ModifierKey, PcKey
from askui.utils.image_utils import scale_coordinates_back, scale_image_with_padding
from askui.tools.agent_os import AgentOs, Coordinate, ModifierKey, PcKey
from askui.utils.image_utils import (
scale_coordinates_back,
scale_coordinates_with_padding,
scale_image_with_padding,
)

from ..models.shared.tools import InputSchema, Tool

Expand Down Expand Up @@ -223,10 +227,10 @@ def __call__( # noqa: C901
text: str | None = None,
coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]]
| None = None,
) -> Image.Image | None:
) -> Image.Image | None | str:
match action:
case "cursor_position":
raise ActionNotImplementedError(action, self.name)
return self._get_mouse_position_scaled()
case "double_click":
return self._agent_os.click("left", 2)
case "key":
Expand Down Expand Up @@ -325,6 +329,20 @@ def _screenshot(self) -> Image.Image:
self._real_screen_height = screenshot.height
return scale_image_with_padding(screenshot, self._width, self._height)

def _get_mouse_position_scaled(self) -> str:
mouse_position: Coordinate = self._agent_os.get_mouse_position()
real_screen_width, real_screen_height = self._get_real_screen_resolution()
x, y = scale_coordinates_with_padding(
mouse_position.x,
mouse_position.y,
real_screen_width,
real_screen_height,
self._width,
self._height,
)

return f"X={x},Y={y}"


class Computer20241022Tool(ComputerToolBase):
type: Literal["computer_20241022"] = "computer_20241022"
Expand Down Expand Up @@ -426,7 +444,7 @@ def __call__( # noqa: C901
scroll_amount: Annotated[int, Field(ge=0)] | None = None,
duration: Annotated[float, Field(ge=0.0, le=100.0)] | None = None,
key: str | None = None, # maybe not all keys supported
) -> Image.Image | None:
) -> Image.Image | None | str:
match action:
case "hold_key":
self._hold_key(keystroke=text, duration=duration) # type: ignore[arg-type]
Expand Down
73 changes: 64 additions & 9 deletions src/askui/utils/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,66 @@ def scale_image_with_padding(
)


def scale_coordinates_with_padding(
x: float,
y: float,
original_width: int,
original_height: int,
max_width: int,
max_height: int,
) -> Tuple[float, float]:
"""
Scale coordinates from an original coordinate system to a scaled and padded coordinate system.

"""
scale_factor, scaled_width, scaled_height = _calculate_aspect_fit_scaling(
original_width, original_height, max_width, max_height
)

pad_left = (max_width - scaled_width) // 2
pad_top = (max_height - scaled_height) // 2

scaled_x = x * scale_factor + pad_left
scaled_y = y * scale_factor + pad_top

if scaled_x < 0 or scaled_y < 0 or scaled_x > max_width or scaled_y > max_height:
error_msg = "Coordinates are outside the padded image area"
raise ValueError(error_msg)
return scaled_x, scaled_y


def _calculate_aspect_fit_scaling(
original_width: int,
original_height: int,
max_width: int,
max_height: int,
) -> Tuple[float, float, float]:
"""Calculate the scale factors for an image to fit within specified dimensions while maintaining aspect ratio.

Args:
original_width (int): The width of the original coordinate system.
original_height (int): The height of the original coordinate system.
max_width (int): The maximum width of the output scaled coordinate system.
max_height (int): The maximum height of the output scaled coordinate system.

Returns:
Tuple[float, float, float]: A tuple of (scale_factor, scaled_width, scaled_height).

"""

aspect_ratio = original_width / original_height
if (max_width / max_height) > aspect_ratio:
scale_factor = max_height / original_height
scaled_width = int(original_width * scale_factor)
scaled_height = max_height
else:
scale_factor = max_width / original_width
scaled_width = max_width
scaled_height = int(original_height * scale_factor)

return scale_factor, scaled_width, scaled_height


def scale_coordinates_back(
x: float,
y: float,
Expand All @@ -214,15 +274,10 @@ def scale_coordinates_back(
Raises:
ValueError: If the coordinates are outside the padded image area.
"""
aspect_ratio = original_width / original_height
if (max_width / max_height) > aspect_ratio:
scale_factor = max_height / original_height
scaled_width = int(original_width * scale_factor)
scaled_height = max_height
else:
scale_factor = max_width / original_width
scaled_width = max_width
scaled_height = int(original_height * scale_factor)
scale_factor, scaled_width, scaled_height = _calculate_aspect_fit_scaling(
original_width, original_height, max_width, max_height
)

pad_left = (max_width - scaled_width) // 2
pad_top = (max_height - scaled_height) // 2
adjusted_x = x - pad_left
Expand Down