From 454e72ac7d4dd12e41a22545f93c693423c346d6 Mon Sep 17 00:00:00 2001
From: Samir mlika <105347215+mlikasam-askui@users.noreply.github.com>
Date: Thu, 14 Aug 2025 11:48:34 +0200
Subject: [PATCH 1/3] Feat: Add locate all function

---
 README.md                                  |  4 +-
 src/askui/agent.py                         |  2 +-
 src/askui/agent_base.py                    | 60 ++++++++++++++++++----
 src/askui/android_agent.py                 |  2 +-
 src/askui/models/anthropic/messages_api.py | 16 +++---
 src/askui/models/askui/inference_api.py    | 21 ++++----
 src/askui/models/askui/model_router.py     |  4 +-
 src/askui/models/huggingface/spaces_api.py |  8 +--
 src/askui/models/model_router.py           |  2 +-
 src/askui/models/models.py                 |  6 +--
 src/askui/models/shared/facade.py          |  2 +-
 src/askui/models/ui_tars_ep/ui_tars_api.py |  4 +-
 tests/integration/agent/test_retry.py      |  4 +-
 tests/integration/test_custom_models.py    |  4 +-
 14 files changed, 93 insertions(+), 46 deletions(-)

diff --git a/README.md b/README.md
index 862ab362..74636394 100644
--- a/README.md
+++ b/README.md
@@ -367,12 +367,12 @@ class MyGetAndLocateModel(GetModel, LocateModel):
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         # Implement custom locate logic, e.g.:
         # - Use a different object detection model
         # - Implement custom element finding
         # - Call external vision services
-        return (100, 100)  # Example coordinates
+        return [(100, 100)]  # Example coordinates
 
 
 # Create model registry
diff --git a/src/askui/agent.py b/src/askui/agent.py
index 98fb82af..a0fa569a 100644
--- a/src/askui/agent.py
+++ b/src/askui/agent.py
@@ -182,7 +182,7 @@ def _click(
     def _mouse_move(
         self, locator: str | Locator, model: ModelComposition | str | None = None
     ) -> None:
-        point = self._locate(locator=locator, model=model)
+        point = self._locate(locator=locator, model=model)[0]
         self.tools.os.mouse_move(point[0], point[1])
 
     @telemetry.record_call(exclude={"locator"})
diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py
index a98a4679..41ab429c 100644
--- a/src/askui/agent_base.py
+++ b/src/askui/agent_base.py
@@ -326,8 +326,8 @@ def _locate(
         locator: str | Locator,
         screenshot: Optional[Img] = None,
         model: ModelComposition | str | None = None,
-    ) -> Point:
-        def locate_with_screenshot() -> Point:
+    ) -> list[Point]:
+        def locate_with_screenshot() -> list[Point]:
             _screenshot = ImageSource(
                 self._agent_os.screenshot() if screenshot is None else screenshot
             )
@@ -337,10 +337,10 @@ def locate_with_screenshot() -> Point:
                 model_choice=model or self._model_choice["locate"],
             )
 
-        point = self._retry.attempt(locate_with_screenshot)
-        self._reporter.add_message("ModelRouter", f"locate: ({point[0]}, {point[1]})")
-        logger.debug("ModelRouter locate: (%d, %d)", point[0], point[1])
-        return point
+        points = self._retry.attempt(locate_with_screenshot)
+        self._reporter.add_message("ModelRouter", f"locate {len(points)} elements")
+        logger.debug("ModelRouter locate: %d elements", len(points))
+        return points
 
     @telemetry.record_call(exclude={"locator", "screenshot"})
     @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
@@ -351,7 +351,7 @@ def locate(
         model: ModelComposition | str | None = None,
     ) -> Point:
         """
-        Locates the UI element identified by the provided locator.
+        Locates the first matching UI element identified by the provided locator.
 
         Args:
             locator (str | Locator): The identifier or description of the element to
@@ -374,8 +374,50 @@ def locate(
                 print(f"Element found at coordinates: {point}")
             ```
         """
-        self._reporter.add_message("User", f"locate {locator}")
-        logger.debug("VisionAgent received instruction to locate %s", locator)
+        self._reporter.add_message("User", f"locate first matching element {locator}")
+        logger.debug(
+            "VisionAgent received instruction to locate first matching element %s",
+            locator,
+        )
+        return self._locate(locator, screenshot, model)[0]
+
+    @telemetry.record_call(exclude={"locator", "screenshot"})
+    @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
+    def locate_all(
+        self,
+        locator: str | Locator,
+        screenshot: Optional[Img] = None,
+        model: ModelComposition | str | None = None,
+    ) -> list[Point]:
+        """
+        Locates all matching UI elements identified by the provided locator.
+
+        Args:
+            locator (str | Locator): The identifier or description of the element to
+                locate.
+            screenshot (Img | None, optional): The screenshot to use for locating the
+                element. Can be a path to an image file, a PIL Image object or a data
+                URL. If `None`, takes a screenshot of the currently selected display.
+            model (ModelComposition | str | None, optional): The composition or name
+                of the model(s) to be used for locating the element using the `locator`.
+
+        Returns:
+            list[Point]: The coordinates of the elements as a list of tuples (x, y).
+
+        Example:
+            ```python
+            from askui import VisionAgent
+
+            with VisionAgent() as agent:
+                points = agent.locate_all("Submit button")
+                print(f"Found {len(points)} elements at coordinates: {points}")
+            ```
+        """
+        self._reporter.add_message("User", f"locate all matching UI elements {locator}")
+        logger.debug(
+            "VisionAgent received instruction to locate all matching UI elements %s",
+            locator,
+        )
         return self._locate(locator, screenshot, model)
 
     @telemetry.record_call()
diff --git a/src/askui/android_agent.py b/src/askui/android_agent.py
index 6fcb6d35..6834f88b 100644
--- a/src/askui/android_agent.py
+++ b/src/askui/android_agent.py
@@ -198,7 +198,7 @@ def tap(
             msg += f" on {target}"
             self._reporter.add_message("User", msg)
             logger.debug("VisionAgent received instruction to click on %s", target)
-            point = self._locate(locator=target, model=model)
+            point = self._locate(locator=target, model=model)[0]
             self.os.tap(point[0], point[1])
 
     @telemetry.record_call(exclude={"text"})
diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py
index bfb8704a..c5f40b99 100644
--- a/src/askui/models/anthropic/messages_api.py
+++ b/src/askui/models/anthropic/messages_api.py
@@ -200,7 +200,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         if not isinstance(model_choice, str):
             error_msg = "Model composition is not supported for Claude"
             raise NotImplementedError(error_msg)
@@ -221,12 +221,14 @@ def locate(
                 ),
                 model_choice=model_choice,
             )
-            return scale_coordinates(
-                extract_click_coordinates(content),
-                image.root.size,
-                self._settings.resolution,
-                inverse=True,
-            )
+            return [
+                scale_coordinates(
+                    extract_click_coordinates(content),
+                    image.root.size,
+                    self._settings.resolution,
+                    inverse=True,
+                )
+            ]
         except (
             _UnexpectedResponseError,
             ValueError,
diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py
index d40c5150..e706172c 100644
--- a/src/askui/models/askui/inference_api.py
+++ b/src/askui/models/askui/inference_api.py
@@ -160,7 +160,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         serialized_locator = (
             self._locator_serializer.serialize(locator=locator)
             if isinstance(locator, Locator)
@@ -169,7 +169,7 @@ def locate(
         logger.debug(f"serialized_locator:\n{json_lib.dumps(serialized_locator)}")
         json: dict[str, Any] = {
             "image": image.to_data_url(),
-            "instruction": f"Click on {serialized_locator['instruction']}",
+            "instruction": f"get element {serialized_locator['instruction']}",
         }
         if "customElements" in serialized_locator:
             json["customElements"] = serialized_locator["customElements"]
@@ -180,17 +180,20 @@ def locate(
             )
         response = self._post(path="/inference", json=json)
         content = response.json()
-        assert content["type"] == "COMMANDS", (
+        assert content["type"] == "DETECTED_ELEMENTS", (
             f"Received unknown content type {content['type']}"
         )
-        actions = [
-            el for el in content["data"]["actions"] if el["inputEvent"] == "MOUSE_MOVE"
-        ]
-        if len(actions) == 0:
+        detected_elements = content["data"]["detected_elements"]
+        if len(detected_elements) == 0:
             raise ElementNotFoundError(locator, serialized_locator)
 
-        position = actions[0]["position"]
-        return int(position["x"]), int(position["y"])
+        return [
+            (
+                int((element["bndbox"]["xmax"] + element["bndbox"]["xmin"]) / 2),
+                int((element["bndbox"]["ymax"] + element["bndbox"]["ymin"]) / 2),
+            )
+            for element in detected_elements
+        ]
 
     @override
     def get(
diff --git a/src/askui/models/askui/model_router.py b/src/askui/models/askui/model_router.py
index d2bf857f..6998003a 100644
--- a/src/askui/models/askui/model_router.py
+++ b/src/askui/models/askui/model_router.py
@@ -18,7 +18,7 @@ def __init__(self, inference_api: AskUiInferenceApi):
 
     def _locate_with_askui_ocr(
         self, screenshot: ImageSource, locator: str | Text
-    ) -> Point:
+    ) -> list[Point]:
         locator = Text(locator) if isinstance(locator, str) else locator
         return self._inference_api.locate(
             locator, screenshot, model_choice=ModelName.ASKUI__OCR
@@ -30,7 +30,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         if (
             isinstance(model_choice, ModelComposition)
             or model_choice == ModelName.ASKUI
diff --git a/src/askui/models/huggingface/spaces_api.py b/src/askui/models/huggingface/spaces_api.py
index a12d37bd..81ac7775 100644
--- a/src/askui/models/huggingface/spaces_api.py
+++ b/src/askui/models/huggingface/spaces_api.py
@@ -65,7 +65,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         """Predict element location using Hugging Face Spaces."""
         if not isinstance(model_choice, str):
             error_msg = "Model composition is not supported for Hugging Face Spaces"
@@ -76,9 +76,9 @@ def locate(
                 if isinstance(locator, Locator)
                 else locator
             )
-            return self._spaces[model_choice](
-                image.root, serialized_locator, model_choice
-            )
+            return [
+                self._spaces[model_choice](image.root, serialized_locator, model_choice)
+            ]
         except (ValueError, json.JSONDecodeError, httpx.HTTPError) as e:
             error_msg = f"Hugging Face Spaces Exception: {e}"
             raise AutomationError(error_msg) from e
diff --git a/src/askui/models/model_router.py b/src/askui/models/model_router.py
index 457ec10d..9ca6877a 100644
--- a/src/askui/models/model_router.py
+++ b/src/askui/models/model_router.py
@@ -212,7 +212,7 @@ def locate(
         screenshot: ImageSource,
         locator: str | Locator,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         _model_choice = (
             ModelName.ASKUI
             if isinstance(model_choice, ModelComposition)
diff --git a/src/askui/models/models.py b/src/askui/models/models.py
index 5bd65ddc..a89eb6d3 100644
--- a/src/askui/models/models.py
+++ b/src/askui/models/models.py
@@ -303,9 +303,9 @@ def locate(
                 locator: str | Locator,
                 image: ImageSource,
                 model_choice: ModelComposition | str,
-            ) -> Point:
+            ) -> list[Point]:
                 # Implement custom locate logic
-                return (100, 100)
+                return [(100, 100)]
 
         with VisionAgent(models={"my-locate": MyLocateModel()}) as agent:
             agent.click("button", model="my-locate")
@@ -318,7 +318,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         """Find the coordinates of a UI element in an image.
 
         Args:
diff --git a/src/askui/models/shared/facade.py b/src/askui/models/shared/facade.py
index 9789f3b1..cb8b4ec2 100644
--- a/src/askui/models/shared/facade.py
+++ b/src/askui/models/shared/facade.py
@@ -56,5 +56,5 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         return self._locate_model.locate(locator, image, model_choice)
diff --git a/src/askui/models/ui_tars_ep/ui_tars_api.py b/src/askui/models/ui_tars_ep/ui_tars_api.py
index 4c2c84ee..78e38714 100644
--- a/src/askui/models/ui_tars_ep/ui_tars_api.py
+++ b/src/askui/models/ui_tars_ep/ui_tars_api.py
@@ -146,7 +146,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         if not isinstance(model_choice, str):
             error_msg = "Model composition is not supported for UI-TARS"
             raise NotImplementedError(error_msg)
@@ -169,7 +169,7 @@ def locate(
             width, height = image.root.size
             new_height, new_width = smart_resize(height, width)
             x, y = (int(x / new_width * width), int(y / new_height * height))
-            return x, y
+            return [(x, y)]
         raise ElementNotFoundError(locator, locator_serialized)
 
     @override
diff --git a/tests/integration/agent/test_retry.py b/tests/integration/agent/test_retry.py
index 76f1dd67..44aaffb6 100644
--- a/tests/integration/agent/test_retry.py
+++ b/tests/integration/agent/test_retry.py
@@ -27,11 +27,11 @@ def locate(
         locator: Union[str, Locator],
         image: ImageSource,  # noqa: ARG002
         model_choice: Union[ModelComposition, str],  # noqa: ARG002
-    ) -> Tuple[int, int]:
+    ) -> list[Tuple[int, int]]:
         self.calls += 1
         if self.calls <= self.fail_times:
             raise ElementNotFoundError(locator, locator)
-        return self.succeed_point
+        return [self.succeed_point]
 
 
 @pytest.fixture
diff --git a/tests/integration/test_custom_models.py b/tests/integration/test_custom_models.py
index c1991b49..7347f551 100644
--- a/tests/integration/test_custom_models.py
+++ b/tests/integration/test_custom_models.py
@@ -93,11 +93,11 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> Point:
+    ) -> list[Point]:
         self.locators.append(locator)
         self.images.append(image)
         self.model_choices.append(model_choice)
-        return self._point
+        return [self._point]
 
 
 class SimpleResponseSchema(ResponseSchemaBase):

From 748b9870145f27f1ed1b74f0abe37285714f131d Mon Sep 17 00:00:00 2001
From: Samir Mlika <samir.mlika@askui.com>
Date: Fri, 15 Aug 2025 10:37:23 +0200
Subject: [PATCH 2/3] implement review remarks

---
 README.md                                  |  2 +-
 src/askui/__init__.py                      |  2 ++
 src/askui/agent_base.py                    | 13 +++++++++----
 src/askui/models/__init__.py               |  2 ++
 src/askui/models/anthropic/messages_api.py |  4 ++--
 src/askui/models/askui/inference_api.py    |  4 ++--
 src/askui/models/askui/model_router.py     |  6 +++---
 src/askui/models/huggingface/spaces_api.py |  4 ++--
 src/askui/models/model_router.py           |  4 ++--
 src/askui/models/models.py                 | 13 +++++++++----
 src/askui/models/shared/facade.py          | 10 ++++++++--
 src/askui/models/ui_tars_ep/ui_tars_api.py | 10 ++++++++--
 tests/integration/test_custom_models.py    |  3 ++-
 13 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/README.md b/README.md
index 74636394..e1bd7b4d 100644
--- a/README.md
+++ b/README.md
@@ -367,7 +367,7 @@ class MyGetAndLocateModel(GetModel, LocateModel):
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         # Implement custom locate logic, e.g.:
         # - Use a different object detection model
         # - Implement custom element finding
diff --git a/src/askui/__init__.py b/src/askui/__init__.py
index 393c72f5..d9eb4be5 100644
--- a/src/askui/__init__.py
+++ b/src/askui/__init__.py
@@ -25,6 +25,7 @@
     OnMessageCb,
     OnMessageCbParam,
     Point,
+    PointList,
     TextBlockParam,
     TextCitationParam,
     ToolResultBlockParam,
@@ -82,6 +83,7 @@
     "OnMessageCbParam",
     "PcKey",
     "Point",
+    "PointList",
     "ResponseSchema",
     "ResponseSchemaBase",
     "Retry",
diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py
index 41ab429c..af8f3379 100644
--- a/src/askui/agent_base.py
+++ b/src/askui/agent_base.py
@@ -26,6 +26,7 @@
     ModelName,
     ModelRegistry,
     Point,
+    PointList,
     TotalModelChoice,
 )
 from .models.types.response_schemas import ResponseSchema
@@ -321,13 +322,14 @@ class LinkedListNode(ResponseSchemaBase):
         self._reporter.add_message("Agent", message_content)
         return response
 
+    @validate_call(config=ConfigDict(arbitrary_types_allowed=True))
     def _locate(
         self,
         locator: str | Locator,
         screenshot: Optional[Img] = None,
         model: ModelComposition | str | None = None,
-    ) -> list[Point]:
-        def locate_with_screenshot() -> list[Point]:
+    ) -> PointList:
+        def locate_with_screenshot() -> PointList:
             _screenshot = ImageSource(
                 self._agent_os.screenshot() if screenshot is None else screenshot
             )
@@ -388,10 +390,13 @@ def locate_all(
         locator: str | Locator,
         screenshot: Optional[Img] = None,
         model: ModelComposition | str | None = None,
-    ) -> list[Point]:
+    ) -> PointList:
         """
         Locates all matching UI elements identified by the provided locator.
 
+        Note: Some LocateModels can only locate a single element. In this case, the
+        returned list will have a length of 1.
+
         Args:
             locator (str | Locator): The identifier or description of the element to
                 locate.
@@ -402,7 +407,7 @@ def locate_all(
                 of the model(s) to be used for locating the element using the `locator`.
 
         Returns:
-            list[Point]: The coordinates of the elements as a list of tuples (x, y).
+            PointList: The coordinates of the elements as a list of tuples (x, y).
 
         Example:
             ```python
diff --git a/src/askui/models/__init__.py b/src/askui/models/__init__.py
index 3de6de65..f496d769 100644
--- a/src/askui/models/__init__.py
+++ b/src/askui/models/__init__.py
@@ -9,6 +9,7 @@
     ModelName,
     ModelRegistry,
     Point,
+    PointList,
 )
 from .openrouter.model import OpenRouterModel
 from .openrouter.settings import ChatCompletionsCreateSettings, OpenRouterSettings
@@ -53,6 +54,7 @@
     "OpenRouterModel",
     "OpenRouterSettings",
     "Point",
+    "PointList",
     "TextBlockParam",
     "TextCitationParam",
     "ToolResultBlockParam",
diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py
index c5f40b99..fcb93dad 100644
--- a/src/askui/models/anthropic/messages_api.py
+++ b/src/askui/models/anthropic/messages_api.py
@@ -27,7 +27,7 @@
     LocateModel,
     ModelComposition,
     ModelName,
-    Point,
+    PointList,
 )
 from askui.models.shared.agent_message_param import (
     Base64ImageSourceParam,
@@ -200,7 +200,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         if not isinstance(model_choice, str):
             error_msg = "Model composition is not supported for Claude"
             raise NotImplementedError(error_msg)
diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py
index e706172c..a967172e 100644
--- a/src/askui/models/askui/inference_api.py
+++ b/src/askui/models/askui/inference_api.py
@@ -20,7 +20,7 @@
 from askui.locators.serializers import AskUiLocatorSerializer, AskUiSerializedLocator
 from askui.logger import logger
 from askui.models.exceptions import ElementNotFoundError
-from askui.models.models import GetModel, LocateModel, ModelComposition, Point
+from askui.models.models import GetModel, LocateModel, ModelComposition, PointList
 from askui.models.shared.agent_message_param import MessageParam
 from askui.models.shared.messages_api import MessagesApi
 from askui.models.shared.settings import MessageSettings
@@ -160,7 +160,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         serialized_locator = (
             self._locator_serializer.serialize(locator=locator)
             if isinstance(locator, Locator)
diff --git a/src/askui/models/askui/model_router.py b/src/askui/models/askui/model_router.py
index 6998003a..0dc7076c 100644
--- a/src/askui/models/askui/model_router.py
+++ b/src/askui/models/askui/model_router.py
@@ -8,7 +8,7 @@
     ElementNotFoundError,
     ModelNotFoundError,
 )
-from askui.models.models import LocateModel, ModelComposition, ModelName, Point
+from askui.models.models import LocateModel, ModelComposition, ModelName, PointList
 from askui.utils.image_utils import ImageSource
 
 
@@ -18,7 +18,7 @@ def __init__(self, inference_api: AskUiInferenceApi):
 
     def _locate_with_askui_ocr(
         self, screenshot: ImageSource, locator: str | Text
-    ) -> list[Point]:
+    ) -> PointList:
         locator = Text(locator) if isinstance(locator, str) else locator
         return self._inference_api.locate(
             locator, screenshot, model_choice=ModelName.ASKUI__OCR
@@ -30,7 +30,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         if (
             isinstance(model_choice, ModelComposition)
             or model_choice == ModelName.ASKUI
diff --git a/src/askui/models/huggingface/spaces_api.py b/src/askui/models/huggingface/spaces_api.py
index 81ac7775..eedef8c1 100644
--- a/src/askui/models/huggingface/spaces_api.py
+++ b/src/askui/models/huggingface/spaces_api.py
@@ -10,7 +10,7 @@
 from askui.exceptions import AutomationError
 from askui.locators.locators import Locator
 from askui.locators.serializers import VlmLocatorSerializer
-from askui.models.models import LocateModel, ModelComposition, ModelName, Point
+from askui.models.models import LocateModel, ModelComposition, ModelName, PointList
 from askui.utils.image_utils import ImageSource
 
 
@@ -65,7 +65,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         """Predict element location using Hugging Face Spaces."""
         if not isinstance(model_choice, str):
             error_msg = "Model composition is not supported for Hugging Face Spaces"
diff --git a/src/askui/models/model_router.py b/src/askui/models/model_router.py
index 9ca6877a..b91be94d 100644
--- a/src/askui/models/model_router.py
+++ b/src/askui/models/model_router.py
@@ -21,7 +21,7 @@
     ModelComposition,
     ModelName,
     ModelRegistry,
-    Point,
+    PointList,
 )
 from askui.models.shared.agent import Agent
 from askui.models.shared.agent_message_param import MessageParam
@@ -212,7 +212,7 @@ def locate(
         screenshot: ImageSource,
         locator: str | Locator,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         _model_choice = (
             ModelName.ASKUI
             if isinstance(model_choice, ModelComposition)
diff --git a/src/askui/models/models.py b/src/askui/models/models.py
index a89eb6d3..22f7256b 100644
--- a/src/askui/models/models.py
+++ b/src/askui/models/models.py
@@ -147,6 +147,11 @@ def __getitem__(self, index: int) -> ModelDefinition:
 A tuple of two integers representing the coordinates of a point on the screen.
 """
 
+PointList = Annotated[list[Point], Field(min_length=1)]
+"""
+A list of points representing the coordinates of elements on the screen.
+"""
+
 
 class ActModel(abc.ABC):
     """Abstract base class for models that can execute autonomous actions.
@@ -294,7 +299,7 @@ class LocateModel(abc.ABC):
 
     Example:
         ```python
-        from askui import LocateModel, VisionAgent, Locator, ImageSource, Point
+        from askui import LocateModel, VisionAgent, Locator, ImageSource, PointList
         from askui.models import ModelComposition
 
         class MyLocateModel(LocateModel):
@@ -303,7 +308,7 @@ def locate(
                 locator: str | Locator,
                 image: ImageSource,
                 model_choice: ModelComposition | str,
-            ) -> list[Point]:
+            ) -> PointList:
                 # Implement custom locate logic
                 return [(100, 100)]
 
@@ -318,7 +323,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         """Find the coordinates of a UI element in an image.
 
         Args:
@@ -329,7 +334,7 @@ def locate(
                 `ModelComposition` for models that support composition
 
         Returns:
-            A tuple of (x, y) coordinates where the element was found
+            A list of (x, y) coordinates where the element was found, minimum length 1
         """
         raise NotImplementedError
 
diff --git a/src/askui/models/shared/facade.py b/src/askui/models/shared/facade.py
index cb8b4ec2..655f0c43 100644
--- a/src/askui/models/shared/facade.py
+++ b/src/askui/models/shared/facade.py
@@ -3,7 +3,13 @@
 from typing_extensions import override
 
 from askui.locators.locators import Locator
-from askui.models.models import ActModel, GetModel, LocateModel, ModelComposition, Point
+from askui.models.models import (
+    ActModel,
+    GetModel,
+    LocateModel,
+    ModelComposition,
+    PointList,
+)
 from askui.models.shared.agent_message_param import MessageParam
 from askui.models.shared.agent_on_message_cb import OnMessageCb
 from askui.models.shared.settings import ActSettings
@@ -56,5 +62,5 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         return self._locate_model.locate(locator, image, model_choice)
diff --git a/src/askui/models/ui_tars_ep/ui_tars_api.py b/src/askui/models/ui_tars_ep/ui_tars_api.py
index 78e38714..122a82cb 100644
--- a/src/askui/models/ui_tars_ep/ui_tars_api.py
+++ b/src/askui/models/ui_tars_ep/ui_tars_api.py
@@ -11,7 +11,13 @@
 from askui.locators.locators import Locator
 from askui.locators.serializers import VlmLocatorSerializer
 from askui.models.exceptions import ElementNotFoundError, QueryNoResponseError
-from askui.models.models import ActModel, GetModel, LocateModel, ModelComposition, Point
+from askui.models.models import (
+    ActModel,
+    GetModel,
+    LocateModel,
+    ModelComposition,
+    PointList,
+)
 from askui.models.shared.agent_message_param import MessageParam
 from askui.models.shared.agent_on_message_cb import OnMessageCb
 from askui.models.shared.settings import ActSettings
@@ -146,7 +152,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         if not isinstance(model_choice, str):
             error_msg = "Model composition is not supported for UI-TARS"
             raise NotImplementedError(error_msg)
diff --git a/tests/integration/test_custom_models.py b/tests/integration/test_custom_models.py
index 7347f551..8f49d09b 100644
--- a/tests/integration/test_custom_models.py
+++ b/tests/integration/test_custom_models.py
@@ -11,6 +11,7 @@
     LocateModel,
     ModelRegistry,
     Point,
+    PointList,
     ResponseSchema,
     ResponseSchemaBase,
     VisionAgent,
@@ -93,7 +94,7 @@ def locate(
         locator: str | Locator,
         image: ImageSource,
         model_choice: ModelComposition | str,
-    ) -> list[Point]:
+    ) -> PointList:
         self.locators.append(locator)
         self.images.append(image)
         self.model_choices.append(model_choice)

From 1d1c2ee59ca85574aa82182ef75b88894d9ac920 Mon Sep 17 00:00:00 2001
From: Samir mlika <105347215+mlikasam-askui@users.noreply.github.com>
Date: Fri, 15 Aug 2025 15:33:11 +0200
Subject: [PATCH 3/3] fix type

---
 src/askui/agent_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py
index 656c502f..a270b6f7 100644
--- a/src/askui/agent_base.py
+++ b/src/askui/agent_base.py
@@ -360,7 +360,7 @@ def _locate(
         screenshot: Optional[Img] = None,
         model: ModelComposition | str | None = None,
     ) -> PointList:
-        def locate_with_screenshot() -> Point:
+        def locate_with_screenshot() -> PointList:
             _screenshot = load_image_source(
                 self._agent_os.screenshot() if screenshot is None else screenshot
             )