Skip to content

Commit 282cd7e

Browse files
committed
feat/web-support: add web agent
1 parent fe9e831 commit 282cd7e

File tree

14 files changed

+1089
-266
lines changed

14 files changed

+1089
-266
lines changed

pdm.lock

Lines changed: 89 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ dependencies = [
2424
"httpx>=0.28.1",
2525
"fastmcp>=2.3.4",
2626
"pure-python-adb>=0.3.0.dev0",
27+
"playwright>=1.0.0",
2728
]
2829
requires-python = ">=3.10"
2930
readme = "README.md"
@@ -85,6 +86,7 @@ test = [
8586
"types-pyperclip>=1.8.2.20240311",
8687
"pytest-timeout>=2.4.0",
8788
"types-pynput>=1.8.1.20250318",
89+
"playwright>=1.41.0",
8890
]
8991

9092

src/askui/agent.py

Lines changed: 5 additions & 127 deletions
Original file line numberDiff line numberDiff line change
@@ -28,136 +28,12 @@
2828
from .tools import AgentToolbox, ModifierKey, PcKey
2929
from .tools.askui import AskUiControllerClient
3030

31-
_PC_KEY = [
32-
"backspace",
33-
"delete",
34-
"enter",
35-
"tab",
36-
"escape",
37-
"up",
38-
"down",
39-
"right",
40-
"left",
41-
"home",
42-
"end",
43-
"pageup",
44-
"pagedown",
45-
"f1",
46-
"f2",
47-
"f3",
48-
"f4",
49-
"f5",
50-
"f6",
51-
"f7",
52-
"f8",
53-
"f9",
54-
"f10",
55-
"f11",
56-
"f12",
57-
"space",
58-
"0",
59-
"1",
60-
"2",
61-
"3",
62-
"4",
63-
"5",
64-
"6",
65-
"7",
66-
"8",
67-
"9",
68-
"a",
69-
"b",
70-
"c",
71-
"d",
72-
"e",
73-
"f",
74-
"g",
75-
"h",
76-
"i",
77-
"j",
78-
"k",
79-
"l",
80-
"m",
81-
"n",
82-
"o",
83-
"p",
84-
"q",
85-
"r",
86-
"s",
87-
"t",
88-
"u",
89-
"v",
90-
"w",
91-
"x",
92-
"y",
93-
"z",
94-
"A",
95-
"B",
96-
"C",
97-
"D",
98-
"E",
99-
"F",
100-
"G",
101-
"H",
102-
"I",
103-
"J",
104-
"K",
105-
"L",
106-
"M",
107-
"N",
108-
"O",
109-
"P",
110-
"Q",
111-
"R",
112-
"S",
113-
"T",
114-
"U",
115-
"V",
116-
"W",
117-
"X",
118-
"Y",
119-
"Z",
120-
"!",
121-
'"',
122-
"#",
123-
"$",
124-
"%",
125-
"&",
126-
"'",
127-
"(",
128-
")",
129-
"*",
130-
"+",
131-
",",
132-
"-",
133-
".",
134-
"/",
135-
":",
136-
";",
137-
"<",
138-
"=",
139-
">",
140-
"?",
141-
"@",
142-
"[",
143-
"\\",
144-
"]",
145-
"^",
146-
"_",
147-
"`",
148-
"{",
149-
"|",
150-
"}",
151-
"~",
152-
]
153-
15431
_SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
15532
* You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access.
15633
* When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems.
15734
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
15835
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
159-
* Valid keyboard keys available are {", ".join(_PC_KEY)}
160-
* The current date is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y").replace(" 0", " ")}.
36+
* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}.
16137
</SYSTEM_CAPABILITY>
16238
16339
<IMPORTANT>
@@ -201,7 +77,7 @@ class VisionAgent(AgentBase):
20177
```
20278
"""
20379

204-
@telemetry.record_call(exclude={"model_router", "reporters", "tools"})
80+
@telemetry.record_call(exclude={"model_router", "reporters", "tools", "act_tools"})
20581
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
20682
def __init__(
20783
self,
@@ -212,6 +88,7 @@ def __init__(
21288
model: ModelChoice | ModelComposition | str | None = None,
21389
retry: Retry | None = None,
21490
models: ModelRegistry | None = None,
91+
act_tools: list[Tool] | None = None,
21592
) -> None:
21693
reporter = CompositeReporter(reporters=reporters)
21794
self.tools = tools or AgentToolbox(
@@ -228,7 +105,8 @@ def __init__(
228105
models=models,
229106
tools=[
230107
ExceptionTool(),
231-
],
108+
]
109+
+ (act_tools or []),
232110
settings=_SETTINGS,
233111
agent_os=self.tools.os,
234112
)

src/askui/models/askui/inference_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def create_message(
206206
}
207207
response = self._post(
208208
"/act/inference",
209-
json={k: v for k, v in json.items() if v is not NOT_GIVEN},
209+
json={k: v for k, v in json.items() if not isinstance(v, NotGiven)},
210210
timeout=300.0,
211211
)
212212
return MessageParam.model_validate_json(response.text)

src/askui/tools/agent_os.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,15 @@
44
from PIL import Image
55
from pydantic import BaseModel
66

7-
ModifierKey = Literal["command", "alt", "control", "shift", "right_shift"]
7+
ModifierKey = Literal[
8+
"command",
9+
"alt",
10+
"control",
11+
"shift",
12+
"right_shift",
13+
]
814
"""Modifier keys for keyboard actions."""
915

10-
ModifierKeys: list[ModifierKey] = ["command", "alt", "control", "shift", "right_shift"]
11-
1216
PcKey = Literal[
1317
"backspace",
1418
"delete",
@@ -308,7 +312,6 @@ def keyboard_tap(
308312
"""
309313
raise NotImplementedError
310314

311-
@abstractmethod
312315
def set_display(self, display: int = 1) -> None:
313316
"""
314317
Sets the active display for screen interactions.
@@ -319,7 +322,6 @@ def set_display(self, display: int = 1) -> None:
319322
"""
320323
raise NotImplementedError
321324

322-
@abstractmethod
323325
def run_command(self, command: str, timeout_ms: int = 30000) -> None:
324326
"""
325327
Executes a shell command.

0 commit comments

Comments
 (0)