Skip to content

Commit b3613cb

Browse files
fix(android): use correct android keys & set device by sn instead of name (#80)
- set device by sn instead of name - optimize system prompt so that it does not only propose plan but executes it more - refactor changing tools after initialization of agent - rename Android keys so they can be directly used - rename `PpadbAgentOSHandler` to `AndroidAgentOsFacade` as it is general for all Android handling scaling of coordinates and reporting - add `AndroidVisionAgent` to chat
1 parent c457c06 commit b3613cb

File tree

10 files changed

+317
-265
lines changed

10 files changed

+317
-265
lines changed

src/askui/android_agent.py

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from askui.models.shared.computer_agent_message_param import MessageParam
1313
from askui.models.shared.tools import ToolCollection
1414
from askui.tools.android.agent_os import ANDROID_KEY
15+
from askui.tools.android.agent_os_facade import AndroidAgentOsFacade
1516
from askui.tools.android.ppadb_agent_os import PpadbAgentOs
16-
from askui.tools.android.ppadb_agent_os_handler import PpadbAgentOSHandler
1717
from askui.tools.android.tools import (
1818
AndroidDragAndDropTool,
1919
AndroidKeyCombinationTool,
@@ -60,21 +60,22 @@ def __init__(
6060
configure_logging(level=log_level)
6161
self.os = PpadbAgentOs()
6262
self._reporter = CompositeReporter(reporters=reporters)
63-
android_os_handler = PpadbAgentOSHandler(self.os, self._reporter)
63+
self._act_agent_os_facade = AndroidAgentOsFacade(self.os, self._reporter)
64+
self.act_tool_collection = ToolCollection(
65+
tools=[
66+
AndroidScreenshotTool(self._act_agent_os_facade),
67+
AndroidTapTool(self._act_agent_os_facade),
68+
AndroidTypeTool(self._act_agent_os_facade),
69+
AndroidDragAndDropTool(self._act_agent_os_facade),
70+
AndroidKeyTapEventTool(self._act_agent_os_facade),
71+
AndroidSwipeTool(self._act_agent_os_facade),
72+
AndroidKeyCombinationTool(self._act_agent_os_facade),
73+
AndroidShellTool(self._act_agent_os_facade),
74+
ExceptionTool(),
75+
]
76+
)
6477
_models = initialize_default_android_model_registry(
65-
tool_collection=ToolCollection(
66-
tools=[
67-
AndroidScreenshotTool(android_os_handler),
68-
AndroidTapTool(android_os_handler),
69-
AndroidTypeTool(android_os_handler),
70-
AndroidDragAndDropTool(android_os_handler),
71-
AndroidKeyTapEventTool(android_os_handler),
72-
AndroidSwipeTool(android_os_handler),
73-
AndroidKeyCombinationTool(android_os_handler),
74-
AndroidShellTool(android_os_handler),
75-
ExceptionTool(),
76-
]
77-
),
78+
tool_collection=self.act_tool_collection,
7879
reporter=self._reporter,
7980
)
8081
_models.update(models or {})
@@ -421,8 +422,8 @@ def key_tap(
421422
from askui import AndroidVisionAgent
422423
423424
with AndroidVisionAgent() as agent:
424-
agent.key_tap("KEYCODE_HOME") # Taps the home key
425-
agent.key_tap("KEYCODE_BACK") # Taps the back key
425+
agent.key_tap("HOME") # Taps the home key
426+
agent.key_tap("BACK") # Taps the back key
426427
```
427428
"""
428429
self.os.key_tap(key)
@@ -431,7 +432,7 @@ def key_tap(
431432
@validate_call
432433
def key_combination(
433434
self,
434-
keys: Annotated[list[ANDROID_KEY], Field(min_length=1)],
435+
keys: Annotated[list[ANDROID_KEY], Field(min_length=2)],
435436
duration_in_ms: int = 100,
436437
) -> None:
437438
"""
@@ -446,8 +447,8 @@ def key_combination(
446447
from askui import AndroidVisionAgent
447448
448449
with AndroidVisionAgent() as agent:
449-
agent.key_combination(["KEYCODE_HOME", "KEYCODE_BACK"]) # Taps the home key and then the back key
450-
agent.key_combination(["KEYCODE_HOME", "KEYCODE_BACK"], duration_in_ms=200) # Taps the home key and then the back key with a 200ms delay
450+
agent.key_combination(["HOME", "BACK"]) # Taps the home key and then the back key
451+
agent.key_combination(["HOME", "BACK"], duration_in_ms=200) # Taps the home key and then the back key for 200ms.
451452
```
452453
"""
453454
self.os.key_combination(keys, duration_in_ms)
@@ -536,27 +537,27 @@ def swipe(
536537
self.os.swipe(x1, y1, x2, y2, duration_in_ms)
537538

538539
@telemetry.record_call(
539-
exclude={"device_name"},
540+
exclude={"device_sn"},
540541
)
541542
@validate_call
542-
def set_device_by_name(
543+
def set_device_by_serial_number(
543544
self,
544-
device_name: str,
545+
device_sn: str,
545546
) -> None:
546547
"""
547548
Sets the active device for screen interactions by name.
548549
549550
Args:
550-
device_name (str): The name of the device to set as active.
551+
device_sn (str): The serial number of the device to set as active.
551552
552553
Example:
553554
```python
554555
from askui import AndroidVisionAgent
555556
556557
with AndroidVisionAgent() as agent:
557-
agent.set_device_by_name("Pixel 6") # Sets the active device to the Pixel 6
558+
agent.set_device_by_serial_number("Pixel 6") # Sets the active device to the Pixel 6
558559
"""
559-
self.os.set_device_by_name(device_name)
560+
self.os.set_device_by_serial_number(device_sn)
560561

561562
@telemetry.record_call(exclude={"goal", "on_message"})
562563
@validate_call

src/askui/models/shared/android_agent.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
99
<CORE PRINCIPLES>
1010
* Autonomy: Operate independently and make informed decisions without requiring user input.
11+
* Never ask for other tasks to be done, only do the task you are given.
1112
* Reliability: Ensure actions are repeatable and maintain system stability.
1213
* Efficiency: Optimize operations to minimize latency and resource usage.
1314
* Safety: Always verify actions before execution, even with full system access.

src/askui/models/shared/tools.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,16 @@ def to_params(
110110
) -> list[BetaToolUnionParam]:
111111
return [tool.to_params() for tool in self._tools]
112112

113+
def append_tool(self, tool: Tool) -> None:
114+
"""Append a tool to the collection."""
115+
self._tools.append(tool)
116+
self._tool_map[tool.to_params()["name"]] = tool
117+
118+
def reset_tools(self, tools: list[Tool]) -> None:
119+
"""Reset the tools in the collection with new tools."""
120+
self._tools = tools
121+
self._tool_map = {tool.to_params()["name"]: tool for tool in tools}
122+
113123
def run(
114124
self, tool_use_block_params: list[ToolUseBlockParam]
115125
) -> list[ContentBlockParam]:

0 commit comments

Comments
 (0)