Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 22 additions & 5 deletions src/askui/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,12 @@
from pydantic import Field, validate_call
from typing_extensions import Self, override

from askui.tools.agent_os import AgentOs, ModifierKey, PcKey
from askui.utils.image_utils import scale_coordinates_back, scale_image_with_padding
from askui.tools.agent_os import AgentOs, Coordinate, ModifierKey, PcKey
from askui.utils.image_utils import (
scale_coordinates_back,
scale_coordinates_with_padding,
scale_image_with_padding,
)

from ..models.shared.tools import InputSchema, Tool

Expand Down Expand Up @@ -223,10 +227,10 @@ def __call__( # noqa: C901
text: str | None = None,
coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]]
| None = None,
) -> Image.Image | None:
) -> Image.Image | None | Coordinate:
match action:
case "cursor_position":
raise ActionNotImplementedError(action, self.name)
return self._get_mouse_position_scaled()
case "double_click":
return self._agent_os.click("left", 2)
case "key":
Expand Down Expand Up @@ -325,6 +329,19 @@ def _screenshot(self) -> Image.Image:
self._real_screen_height = screenshot.height
return scale_image_with_padding(screenshot, self._width, self._height)

def _get_mouse_position_scaled(self) -> Coordinate:
mouse_position: Coordinate = self._agent_os.get_mouse_position()
real_screen_width, real_screen_height = self._get_real_screen_resolution()
x, y = scale_coordinates_with_padding(
mouse_position.x,
mouse_position.y,
real_screen_width,
real_screen_height,
self._width,
self._height,
)
return Coordinate(x=int(x), y=int(y))


class Computer20241022Tool(ComputerToolBase):
type: Literal["computer_20241022"] = "computer_20241022"
Expand Down Expand Up @@ -426,7 +443,7 @@ def __call__( # noqa: C901
scroll_amount: Annotated[int, Field(ge=0)] | None = None,
duration: Annotated[float, Field(ge=0.0, le=100.0)] | None = None,
key: str | None = None, # maybe not all keys supported
) -> Image.Image | None:
) -> Image.Image | None | Coordinate:
match action:
case "hold_key":
self._hold_key(keystroke=text, duration=duration) # type: ignore[arg-type]
Expand Down
49 changes: 49 additions & 0 deletions src/askui/utils/image_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,55 @@ def scale_image_with_padding(
)


def scale_coordinates_with_padding(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we reuse this inside the scale_image_with_padding?

x: float,
y: float,
original_width: int,
original_height: int,
max_width: int,
max_height: int,
) -> Tuple[float, float]:
"""Convert coordinates from an original image to a scaled and padded image.

This function takes coordinates from the original image and calculates
their corresponding position in an image that has been scaled and
padded to fit within `max_width` and `max_height`.

Args:
x (float): The x-coordinate in the original image.
y (float): The y-coordinate in the original image.
original_width (int): The width of the original image.
original_height (int): The height of the original image.
max_width (int): The maximum width of the output scaled and padded image.
max_height (int): The maximum height of the output scaled and padded image.

Returns:
Tuple[float, float]: A tuple of (scaled_x, scaled_y) coordinates
in the padded image.
"""
Comment on lines +201 to +218
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should adapt the docstring as it talks about an image where there is none 😆

aspect_ratio = original_width / original_height
if (max_width / max_height) > aspect_ratio:
scale_factor = max_height / original_height
scaled_width = int(original_width * scale_factor)
scaled_height = max_height
else:
scale_factor = max_width / original_width
scaled_width = max_width
scaled_height = int(original_height * scale_factor)

pad_left = (max_width - scaled_width) // 2
pad_top = (max_height - scaled_height) // 2

scaled_x = x * scale_factor + pad_left
scaled_y = y * scale_factor + pad_top

if scaled_x < 0 or scaled_y < 0 or scaled_x > max_width or scaled_y > max_height:
error_msg = "Coordinates are outside the padded image area"
raise ValueError(error_msg)

Comment on lines +235 to +238
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this even happen? Looks to me like this can be removed.

return scaled_x, scaled_y


def scale_coordinates_back(
x: float,
y: float,
Expand Down