From a2223701c29080caca2b48fb800a10372707bb81 Mon Sep 17 00:00:00 2001
From: danyalxahid-askui <danyal.zahid@askui.com>
Date: Thu, 24 Jul 2025 21:19:53 +0200
Subject: [PATCH 1/2] refactor: enhance ComputerToolBase functionality and add
 coordinate scaling

- Added `_get_mouse_position_scaled` method to `ComputerToolBase` for retrieving and scaling mouse position.
- Updated `action` method to return scaled mouse position.
- Introduced `scale_coordinates_with_padding` function in `image_utils.py` for scaling coordinates with padding.
- Cleaned up import statements for better organization and readability.
---
 src/askui/tools/computer.py    | 23 +++++++++-----
 src/askui/utils/image_utils.py | 58 +++++++++++++++++++++++++++++++++-
 2 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/src/askui/tools/computer.py b/src/askui/tools/computer.py
index 8289b591..70ed4260 100644
--- a/src/askui/tools/computer.py
+++ b/src/askui/tools/computer.py
@@ -4,16 +4,16 @@
 from dataclasses import dataclass
 from typing import Annotated, Literal, TypedDict, cast, get_args
 
-from anthropic.types.beta import (
-    BetaToolComputerUse20241022Param,
-    BetaToolComputerUse20250124Param,
-)
+from anthropic.types.beta import (BetaToolComputerUse20241022Param,
+                                  BetaToolComputerUse20250124Param)
 from PIL import Image
 from pydantic import Field, validate_call
 from typing_extensions import Self, override
 
-from askui.tools.agent_os import AgentOs, ModifierKey, PcKey
-from askui.utils.image_utils import scale_coordinates_back, scale_image_with_padding
+from askui.tools.agent_os import AgentOs, Coordinate, ModifierKey, PcKey
+from askui.utils.image_utils import (scale_coordinates_back,
+                                     scale_coordinates_with_padding,
+                                     scale_image_with_padding)
 
 from ..models.shared.tools import InputSchema, Tool
 
@@ -223,10 +223,10 @@ def __call__(  # noqa: C901
         text: str | None = None,
         coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]]
         | None = None,
-    ) -> Image.Image | None:
+    ) -> Image.Image | None | Coordinate:
         match action:
             case "cursor_position":
-                raise ActionNotImplementedError(action, self.name)
+                return self._get_mouse_position_scaled()
             case "double_click":
                 return self._agent_os.click("left", 2)
             case "key":
@@ -326,6 +326,13 @@ def _screenshot(self) -> Image.Image:
         return scale_image_with_padding(screenshot, self._width, self._height)
 
 
+    def _get_mouse_position_scaled(self) -> Coordinate:
+        mouse_position: Coordinate = self._agent_os.get_mouse_position()
+        real_screen_width, real_screen_height = self._get_real_screen_resolution()
+        x, y = scale_coordinates_with_padding(mouse_position.x, mouse_position.y, real_screen_width, real_screen_height, self._width, self._height)
+        return Coordinate(x=int(x), y=int(y))
+
+
 class Computer20241022Tool(ComputerToolBase):
     type: Literal["computer_20241022"] = "computer_20241022"
 
diff --git a/src/askui/utils/image_utils.py b/src/askui/utils/image_utils.py
index c2647f76..6caa7881 100644
--- a/src/askui/utils/image_utils.py
+++ b/src/askui/utils/image_utils.py
@@ -7,8 +7,9 @@
 from pathlib import Path
 from typing import Any, Literal, Tuple, Union
 
-from PIL import Image, ImageDraw, ImageOps, UnidentifiedImageError
+from PIL import Image
 from PIL import Image as PILImage
+from PIL import ImageDraw, ImageOps, UnidentifiedImageError
 from pydantic import ConfigDict, RootModel, field_validator
 
 # Regex to capture any kind of valid base64 data url (with optional media type and ;base64)
@@ -190,6 +191,61 @@ def scale_image_with_padding(
     )
 
 
+def scale_coordinates_with_padding(
+    x: float,
+    y: float,
+    original_width: int,
+    original_height: int,
+    max_width: int,
+    max_height: int,
+) -> Tuple[float, float]:
+    """Convert coordinates from an original image to a scaled and padded image.
+
+    This function takes coordinates from the original image and calculates
+    their corresponding position in an image that has been scaled and
+    padded to fit within `max_width` and `max_height`.
+
+    Args:
+        x (float): The x-coordinate in the original image.
+        y (float): The y-coordinate in the original image.
+        original_width (int): The width of the original image.
+        original_height (int): The height of the original image.
+        max_width (int): The maximum width of the output scaled and padded image.
+        max_height (int): The maximum height of the output scaled and padded image.
+
+    Returns:
+        Tuple[float, float]: A tuple of (scaled_x, scaled_y) coordinates
+        in the padded image.
+    """
+    aspect_ratio = original_width / original_height
+    if (max_width / max_height) > aspect_ratio:
+        scale_factor = max_height / original_height
+        scaled_width = int(original_width * scale_factor)
+        scaled_height = max_height
+    else:
+        scale_factor = max_width / original_width
+        scaled_width = max_width
+        scaled_height = int(original_height * scale_factor)
+
+    pad_left = (max_width - scaled_width) // 2
+    pad_top = (max_height - scaled_height) // 2
+
+    scaled_x = x * scale_factor + pad_left
+    scaled_y = y * scale_factor + pad_top
+
+    if (
+        scaled_x < 0
+        or scaled_y < 0
+        or scaled_x > max_width
+        or scaled_y > max_height
+    ):
+        error_msg = "Coordinates are outside the padded image area"
+        raise ValueError(error_msg)
+
+
+    return scaled_x, scaled_y
+
+
 def scale_coordinates_back(
     x: float,
     y: float,

From cd4d4e82647b6def867296f9f14ddb67bc18a0ed Mon Sep 17 00:00:00 2001
From: danyalxahid-askui <danyal.zahid@askui.com>
Date: Thu, 24 Jul 2025 21:33:13 +0200
Subject: [PATCH 2/2] refactor: update action method return type to include
 Coordinate

- Modified the return type of the `action` method in `Computer20250124Tool` to include `Coordinate` in addition to `Image.Image | None`, enhancing its functionality to return more comprehensive results.
---
 src/askui/tools/computer.py    | 26 ++++++++++++++++++--------
 src/askui/utils/image_utils.py | 11 ++---------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/askui/tools/computer.py b/src/askui/tools/computer.py
index 70ed4260..08723ad6 100644
--- a/src/askui/tools/computer.py
+++ b/src/askui/tools/computer.py
@@ -4,16 +4,20 @@
 from dataclasses import dataclass
 from typing import Annotated, Literal, TypedDict, cast, get_args
 
-from anthropic.types.beta import (BetaToolComputerUse20241022Param,
-                                  BetaToolComputerUse20250124Param)
+from anthropic.types.beta import (
+    BetaToolComputerUse20241022Param,
+    BetaToolComputerUse20250124Param,
+)
 from PIL import Image
 from pydantic import Field, validate_call
 from typing_extensions import Self, override
 
 from askui.tools.agent_os import AgentOs, Coordinate, ModifierKey, PcKey
-from askui.utils.image_utils import (scale_coordinates_back,
-                                     scale_coordinates_with_padding,
-                                     scale_image_with_padding)
+from askui.utils.image_utils import (
+    scale_coordinates_back,
+    scale_coordinates_with_padding,
+    scale_image_with_padding,
+)
 
 from ..models.shared.tools import InputSchema, Tool
 
@@ -325,11 +329,17 @@ def _screenshot(self) -> Image.Image:
         self._real_screen_height = screenshot.height
         return scale_image_with_padding(screenshot, self._width, self._height)
 
-
     def _get_mouse_position_scaled(self) -> Coordinate:
         mouse_position: Coordinate = self._agent_os.get_mouse_position()
         real_screen_width, real_screen_height = self._get_real_screen_resolution()
-        x, y = scale_coordinates_with_padding(mouse_position.x, mouse_position.y, real_screen_width, real_screen_height, self._width, self._height)
+        x, y = scale_coordinates_with_padding(
+            mouse_position.x,
+            mouse_position.y,
+            real_screen_width,
+            real_screen_height,
+            self._width,
+            self._height,
+        )
         return Coordinate(x=int(x), y=int(y))
 
 
@@ -433,7 +443,7 @@ def __call__(  # noqa: C901
         scroll_amount: Annotated[int, Field(ge=0)] | None = None,
         duration: Annotated[float, Field(ge=0.0, le=100.0)] | None = None,
         key: str | None = None,  # maybe not all keys supported
-    ) -> Image.Image | None:
+    ) -> Image.Image | None | Coordinate:
         match action:
             case "hold_key":
                 self._hold_key(keystroke=text, duration=duration)  # type: ignore[arg-type]
diff --git a/src/askui/utils/image_utils.py b/src/askui/utils/image_utils.py
index 6caa7881..1cd40a89 100644
--- a/src/askui/utils/image_utils.py
+++ b/src/askui/utils/image_utils.py
@@ -7,9 +7,8 @@
 from pathlib import Path
 from typing import Any, Literal, Tuple, Union
 
-from PIL import Image
+from PIL import Image, ImageDraw, ImageOps, UnidentifiedImageError
 from PIL import Image as PILImage
-from PIL import ImageDraw, ImageOps, UnidentifiedImageError
 from pydantic import ConfigDict, RootModel, field_validator
 
 # Regex to capture any kind of valid base64 data url (with optional media type and ;base64)
@@ -233,16 +232,10 @@ def scale_coordinates_with_padding(
     scaled_x = x * scale_factor + pad_left
     scaled_y = y * scale_factor + pad_top
 
-    if (
-        scaled_x < 0
-        or scaled_y < 0
-        or scaled_x > max_width
-        or scaled_y > max_height
-    ):
+    if scaled_x < 0 or scaled_y < 0 or scaled_x > max_width or scaled_y > max_height:
         error_msg = "Coordinates are outside the padded image area"
         raise ValueError(error_msg)
 
-
     return scaled_x, scaled_y