Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/askui/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""AskUI Vision Agent"""

__version__ = "0.22.3"
__version__ = "0.22.4"

import logging
import os
Expand Down
24 changes: 24 additions & 0 deletions src/askui/android_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,30 @@


class AndroidVisionAgent(AgentBase):
"""
A vision-based agent that can interact with Android devices through computer vision and AI.

This agent can perform various UI interactions on Android devices like tapping, typing, swiping, and more.
It uses computer vision models to locate UI elements and execute actions on them.

Args:
reporters (list[Reporter] | None, optional): List of reporter instances for logging and reporting. If `None`, an empty list is used.
model (ModelChoice | ModelComposition | str | None, optional): The default choice or name of the model(s) to be used for vision tasks. Can be overridden by the `model` parameter in the `tap()`, `get()`, `act()` etc. methods.
retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method.
models (ModelRegistry | None, optional): A registry of models to make available to the `AndroidVisionAgent` so that they can be selected using the `model` parameter of `AndroidVisionAgent` or the `model` parameter of its `tap()`, `get()`, `act()` etc. methods. Entries in the registry override entries in the default model registry.
model_provider (str | None, optional): The model provider to use for vision tasks.

Example:
```python
from askui import AndroidVisionAgent

with AndroidVisionAgent() as agent:
agent.tap("Submit button")
agent.type("Hello World")
agent.act("Open settings menu")
```
"""

@telemetry.record_call(exclude={"model_router", "reporters", "tools"})
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def __init__(
Expand Down