From bb597b27bc3adf17ba8a440e6474899bf3ce8197 Mon Sep 17 00:00:00 2001 From: Adrian Stritzinger Date: Thu, 26 Jun 2025 12:27:05 +0200 Subject: [PATCH 1/2] feat(agent): add support for locator in `type()` --- src/askui/agent.py | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/askui/agent.py b/src/askui/agent.py index e1edac58..14372699 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -167,9 +167,18 @@ def click( msg += f" {repeat}x times" if locator is not None: msg += f" on {locator}" + logger.debug("VisionAgent received instruction to %s", msg) self._reporter.add_message("User", msg) + self._click(locator, button, repeat, model) + + def _click( + self, + locator: Optional[str | Locator], + button: Literal["left", "middle", "right"], + repeat: int, + model: ModelComposition | str | None, + ) -> None: if locator is not None: - logger.debug("VisionAgent received instruction to click on %s", locator) self._mouse_move(locator, model) self.tools.os.click(button, repeat) @@ -294,30 +303,38 @@ def mouse_scroll( self._reporter.add_message("User", f'mouse_scroll: "{x}", "{y}"') self.tools.os.mouse_scroll(x, y) - @telemetry.record_call(exclude={"text"}) - @validate_call + @telemetry.record_call(exclude={"text", "locator"}) + @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def type( self, text: Annotated[str, Field(min_length=1)], + locator: str | Locator | None = None, + model: ModelComposition | str | None = None, ) -> None: """ - Types the specified text as if it were entered on a keyboard. + Types the specified text as if it were entered on a keyboard, optionally triple clicking on the UI element identified to give it focus and select the current text (in multi-line inputs like textareas the current line or paragraph) by the provided locator before typing. Args: text (str): The text to be typed. Must be at least `1` character long. + locator (str | Locator | None, optional): The identifier or description of the element (e.g., input field) to type into. If `None`, types at the current focus. + model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element to type into using the `locator`. Example: ```python from askui import VisionAgent with VisionAgent() as agent: - agent.type("Hello, world!") # Types "Hello, world!" - agent.type("user@example.com") # Types an email address - agent.type("password123") # Types a password + agent.type("Hello, world!") # Types "Hello, world!" at current focus + agent.type("user@example.com", locator="Email") # Clicks on "Email" input, then types + agent.type("password123", locator="Password field", model="custom_model") # Uses specific model ``` """ - self._reporter.add_message("User", f'type: "{text}"') - logger.debug("VisionAgent received instruction to type '%s'", text) + msg = f'type "{text}"' + if locator is not None: + msg += f" into {locator}" + self._click(locator=locator, button="left", repeat=3, model=model) + logger.debug("VisionAgent received instruction to %s", msg) + self._reporter.add_message("User", msg) self.tools.os.type(text) @overload From b73cb44957830b92c9cf663c421b25ed866ea7af Mon Sep 17 00:00:00 2001 From: Adrian Stritzinger Date: Thu, 26 Jun 2025 13:34:44 +0200 Subject: [PATCH 2/2] feat(agent): add `clear` parameter to `type()` --- src/askui/agent.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/askui/agent.py b/src/askui/agent.py index 14372699..6aa7f4dd 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -310,14 +310,21 @@ def type( text: Annotated[str, Field(min_length=1)], locator: str | Locator | None = None, model: ModelComposition | str | None = None, + clear: bool = True, ) -> None: """ - Types the specified text as if it were entered on a keyboard, optionally triple clicking on the UI element identified to give it focus and select the current text (in multi-line inputs like textareas the current line or paragraph) by the provided locator before typing. + Types the specified text as if it were entered on a keyboard. + + If `locator` is provided, it will first click on the element to give it focus before typing. + If `clear` is `True` (default), it will triple click on the element to select the current text (in multi-line inputs like textareas the current line or paragraph) before typing. + + **IMPORTANT:** `clear` only works if a `locator` is provided. Args: text (str): The text to be typed. Must be at least `1` character long. locator (str | Locator | None, optional): The identifier or description of the element (e.g., input field) to type into. If `None`, types at the current focus. - model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element to type into using the `locator`. + model (ModelComposition | str | None, optional): The composition or name of the model(s) to be used for locating the element, i.e., input field, to type into using the `locator`. + clear (bool, optional): Whether to triple click on the element to give it focus and select the current text before typing. Defaults to `True`. Example: ```python @@ -327,12 +334,18 @@ def type( agent.type("Hello, world!") # Types "Hello, world!" at current focus agent.type("user@example.com", locator="Email") # Clicks on "Email" input, then types agent.type("password123", locator="Password field", model="custom_model") # Uses specific model + agent.type("Hello, world!", locator="Textarea", clear=False) # Types "Hello, world!" into textarea without clearing ``` """ msg = f'type "{text}"' if locator is not None: msg += f" into {locator}" - self._click(locator=locator, button="left", repeat=3, model=model) + if clear: + repeat = 3 + msg += " clearing the current content (line/paragraph) of input field" + else: + repeat = 1 + self._click(locator=locator, button="left", repeat=repeat, model=model) logger.debug("VisionAgent received instruction to %s", msg) self._reporter.add_message("User", msg) self.tools.os.type(text)