From 90ffd0d3594f472c655321af6aa3311dd8906bde Mon Sep 17 00:00:00 2001 From: Adrian Stritzinger Date: Mon, 7 Jul 2025 11:42:33 +0200 Subject: [PATCH 1/5] feat/web-support: add web agent --- pdm.lock | 90 ++++- pyproject.toml | 3 + src/askui/agent.py | 134 +------ src/askui/tools/agent_os.py | 12 +- src/askui/tools/computer.py | 352 +++++++++++++------ src/askui/tools/playwright/__init__.py | 0 src/askui/tools/playwright/agent_os.py | 462 +++++++++++++++++++++++++ src/askui/tools/playwright/tools.py | 140 ++++++++ src/askui/web_agent.py | 98 ++++++ src/chat/api/assistants/seeds.py | 13 + src/chat/api/assistants/service.py | 11 +- src/chat/api/runs/runner/runner.py | 38 +- 12 files changed, 1090 insertions(+), 263 deletions(-) create mode 100644 src/askui/tools/playwright/__init__.py create mode 100644 src/askui/tools/playwright/agent_os.py create mode 100644 src/askui/tools/playwright/tools.py create mode 100644 src/askui/web_agent.py diff --git a/pdm.lock b/pdm.lock index 86256aa0..56b77875 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "chat", "pynput", "test"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:9d38242005523af1ed152c8b64eaf12264152a0ce121ca05efb3a21014c5798d" +content_hash = "sha256:3090c84eb2439f19cf4e2d303fcc89cf6852fa9739e84bd0e7d0cfe9e15a314b" [[metadata.targets]] requires_python = ">=3.10" @@ -424,6 +424,59 @@ files = [ {file = "gradio_client-1.8.0.tar.gz", hash = "sha256:a58c520c73fa7ff8bef54e41b19df2cd9071fd9d0cc00475eb397842baed19c8"}, ] +[[package]] +name = "greenlet" +version = "3.2.3" +requires_python = ">=3.9" +summary = "Lightweight in-process concurrent programming" +groups = ["default", "test"] +files = [ + {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00"}, + {file = "greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302"}, + {file = "greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba"}, + {file = "greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34"}, + {file = "greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163"}, + {file = "greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849"}, + {file = "greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36"}, + {file = "greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3"}, + {file = "greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141"}, + {file = "greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a"}, + {file = "greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365"}, +] + [[package]] name = "grpc-stubs" version = "1.53.0.6" @@ -1001,6 +1054,27 @@ files = [ {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"}, ] +[[package]] +name = "playwright" +version = "1.52.0" +requires_python = ">=3.9" +summary = "A high-level API to automate web browsers" +groups = ["default", "test"] +dependencies = [ + "greenlet<4.0.0,>=3.1.1", + "pyee<14,>=13", +] +files = [ + {file = "playwright-1.52.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:19b2cb9d4794062008a635a99bd135b03ebb782d460f96534a91cb583f549512"}, + {file = "playwright-1.52.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:0797c0479cbdc99607412a3c486a3a2ec9ddc77ac461259fd2878c975bcbb94a"}, + {file = "playwright-1.52.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:7223960b7dd7ddeec1ba378c302d1d09733b8dac438f492e9854c85d3ca7144f"}, + {file = "playwright-1.52.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:d010124d24a321e0489a8c0d38a3971a7ca7656becea7656c9376bfea7f916d4"}, + {file = "playwright-1.52.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4173e453c43180acc60fd77ffe1ebee8d0efbfd9986c03267007b9c3845415af"}, + {file = "playwright-1.52.0-py3-none-win32.whl", hash = "sha256:cd0bdf92df99db6237a99f828e80a6a50db6180ef8d5352fc9495df2c92f9971"}, + {file = "playwright-1.52.0-py3-none-win_amd64.whl", hash = "sha256:dcbf75101eba3066b7521c6519de58721ea44379eb17a0dafa94f9f1b17f59e4"}, + {file = "playwright-1.52.0-py3-none-win_arm64.whl", hash = "sha256:9d0085b8de513de5fb50669f8e6677f0252ef95a9a1d2d23ccee9638e71e65cb"}, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -1172,6 +1246,20 @@ files = [ {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, ] +[[package]] +name = "pyee" +version = "13.0.0" +requires_python = ">=3.8" +summary = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" +groups = ["default", "test"] +dependencies = [ + "typing-extensions", +] +files = [ + {file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"}, + {file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"}, +] + [[package]] name = "pygments" version = "2.19.1" diff --git a/pyproject.toml b/pyproject.toml index 9fe2e7f8..bf9a71d6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "httpx>=0.28.1", "fastmcp>=2.3.4", "pure-python-adb>=0.3.0.dev0", + "playwright>=1.0.0", ] requires-python = ">=3.10" readme = "README.md" @@ -85,6 +86,7 @@ test = [ "types-pyperclip>=1.8.2.20240311", "pytest-timeout>=2.4.0", "types-pynput>=1.8.1.20250318", + "playwright>=1.41.0", ] @@ -195,6 +197,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.per-file-ignores] "src/askui/agent.py" = ["E501"] "src/askui/android_agent.py" = ["E501"] +"src/askui/web_agent.py" = ["E501"] "src/askui/models/shared/android_agent.py" = ["E501"] "src/askui/chat/*" = ["E501", "F401", "F403"] "src/askui/tools/askui/askui_workspaces/*" = ["ALL"] diff --git a/src/askui/agent.py b/src/askui/agent.py index 582b2c3d..30dbce32 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -28,136 +28,12 @@ from .tools import AgentToolbox, ModifierKey, PcKey from .tools.askui import AskUiControllerClient -_PC_KEY = [ - "backspace", - "delete", - "enter", - "tab", - "escape", - "up", - "down", - "right", - "left", - "home", - "end", - "pageup", - "pagedown", - "f1", - "f2", - "f3", - "f4", - "f5", - "f6", - "f7", - "f8", - "f9", - "f10", - "f11", - "f12", - "space", - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "!", - '"', - "#", - "$", - "%", - "&", - "'", - "(", - ")", - "*", - "+", - ",", - "-", - ".", - "/", - ":", - ";", - "<", - "=", - ">", - "?", - "@", - "[", - "\\", - "]", - "^", - "_", - "`", - "{", - "|", - "}", - "~", -] - _SYSTEM_PROMPT = f""" * You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access. * When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems. * When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available. -* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* Valid keyboard keys available are {", ".join(_PC_KEY)} -* The current date is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y").replace(" 0", " ")}. +* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. @@ -211,7 +87,7 @@ class VisionAgent(AgentBase): ``` """ - @telemetry.record_call(exclude={"model_router", "reporters", "tools"}) + @telemetry.record_call(exclude={"model_router", "reporters", "tools", "act_tools"}) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def __init__( self, @@ -222,6 +98,7 @@ def __init__( model: ModelChoice | ModelComposition | str | None = None, retry: Retry | None = None, models: ModelRegistry | None = None, + act_tools: list[Tool] | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) self.tools = tools or AgentToolbox( @@ -238,7 +115,8 @@ def __init__( models=models, tools=[ ExceptionTool(), - ], + ] + + (act_tools or []), agent_os=self.tools.os, ) diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index d0e36af9..bd845130 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -4,11 +4,15 @@ from PIL import Image from pydantic import BaseModel -ModifierKey = Literal["command", "alt", "control", "shift", "right_shift"] +ModifierKey = Literal[ + "command", + "alt", + "control", + "shift", + "right_shift", +] """Modifier keys for keyboard actions.""" -ModifierKeys: list[ModifierKey] = ["command", "alt", "control", "shift", "right_shift"] - PcKey = Literal[ "backspace", "delete", @@ -308,7 +312,6 @@ def keyboard_tap( """ raise NotImplementedError - @abstractmethod def set_display(self, display: int = 1) -> None: """ Sets the active display for screen interactions. @@ -319,7 +322,6 @@ def set_display(self, display: int = 1) -> None: """ raise NotImplementedError - @abstractmethod def run_command(self, command: str, timeout_ms: int = 30000) -> None: """ Executes a shell command. diff --git a/src/askui/tools/computer.py b/src/askui/tools/computer.py index 780eca1b..8289b591 100644 --- a/src/askui/tools/computer.py +++ b/src/askui/tools/computer.py @@ -1,5 +1,8 @@ +import sys +import time from abc import ABC -from typing import Annotated, Literal, TypedDict, get_args +from dataclasses import dataclass +from typing import Annotated, Literal, TypedDict, cast, get_args from anthropic.types.beta import ( BetaToolComputerUse20241022Param, @@ -7,102 +10,167 @@ ) from PIL import Image from pydantic import Field, validate_call -from typing_extensions import override +from typing_extensions import Self, override -from askui.tools.agent_os import AgentOs, PcKey -from askui.utils.dict_utils import IdentityDefaultDict +from askui.tools.agent_os import AgentOs, ModifierKey, PcKey from askui.utils.image_utils import scale_coordinates_back, scale_image_with_padding from ..models.shared.tools import InputSchema, Tool Action20241022 = Literal[ + "cursor_position", + "double_click", "key", - "type", - "mouse_move", "left_click", "left_click_drag", - "right_click", "middle_click", - "double_click", + "mouse_move", + "right_click", "screenshot", - "cursor_position", + "type", ] Action20250124 = ( Action20241022 | Literal[ + "hold_key", "left_mouse_down", "left_mouse_up", "scroll", - "hold_key", - "wait", "triple_click", + "wait", ] ) ScrollDirection = Literal["up", "down", "left", "right"] -KeysToMap = Literal[ - "BackSpace", - "Delete", - "Return", - "Enter", - "Tab", - "Escape", - "Up", - "Down", - "Right", - "Left", - "Home", - "End", - "Page_Up", - "Page_Down", - "F1", - "F2", - "F3", - "F4", - "F5", - "F6", - "F7", - "F8", - "F9", - "F10", - "F11", - "F12", -] +XDOTOOL_TO_MODIFIER_KEY_MAP: dict[str, ModifierKey] = { + # Aliases + "alt": "alt", + "ctrl": "command" if sys.platform == "darwin" else "control", + "cmd": "command", + "shift": "shift", + "super": "command", + "meta": "command", + # Real keys + "Control_L": "control", + "Control_R": "control", + "Shift_L": "shift", + "Shift_R": "right_shift", + "Alt_L": "alt", + "Alt_R": "alt", + "Super_L": "command", + "Super_R": "command", + "Meta_L": "command", + "Meta_R": "command", +} + +XDOTOOL_TO_PC_KEY_MAP: dict[str, PcKey] = { + "space": "space", + # Navigation and control + "BackSpace": "backspace", + "Delete": "delete", + "Return": "enter", + "Tab": "tab", + "Escape": "escape", + "Up": "up", + "Down": "down", + "Right": "right", + "Left": "left", + "Home": "home", + "End": "end", + "Page_Up": "pageup", + "Page_Down": "pagedown", + # Function keys + **{f"F{i}": cast("PcKey", f"f{i}") for i in range(1, 13)}, + # Symbols + "exclam": "!", + "quotedbl": '"', + "numbersign": "#", + "dollar": "$", + "percent": "%", + "ampersand": "&", + "apostrophe": "'", + "parenleft": "(", + "parenright": ")", + "asterisk": "*", + "plus": "+", + "comma": ",", + "minus": "-", + "period": ".", + "slash": "/", + "colon": ":", + "semicolon": ";", + "less": "<", + "equal": "=", + "greater": ">", + "question": "?", + "at": "@", + "bracketleft": "[", + "backslash": "\\", + "bracketright": "]", + "asciicircum": "^", + "underscore": "_", + "grave": "`", + "braceleft": "{", + "bar": "|", + "braceright": "}", + "asciitilde": "~", + # Digits and letters + **{ + ch: cast("PcKey", ch) + for ch in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + }, +} + +XDOTOOL_TO_KEY_MAP = XDOTOOL_TO_MODIFIER_KEY_MAP | XDOTOOL_TO_PC_KEY_MAP + +RELATIVE_SCROLL_FACTOR = 0.1 +""" +The factor by which the scroll amount is multiplied together with the real screen +resolution to get the actual scroll amount. Represents the relative height/width +of the screen (e.g., 0.1 means that 1 scroll amount equals 10% of the screen height +or width) that equals 1 scroll amount. + +Example of how the scroll amount is calculated: +- real screen resolution: 1920x1080 +- scroll amount: 1 +- relative scroll factor: 0.1 +- actual scroll amount: 1 * 0.1 * 1920 = 192 or 1 * 0.1 * 1080 = 108 +""" + + +@dataclass +class KeyboardParam: + key: PcKey | ModifierKey + modifier_keys: list[ModifierKey] | None = None + + @classmethod + def from_xdotool(cls, keystroke: str) -> Self: + """ + Convert an xdotool keystroke (see + [xdotool documentation](https://www.mankier.com/1/xdotool#Keyboard_Commands)) + to a `KeyboardParam`. -Key = PcKey | KeysToMap - -KEYS_MAPPING: IdentityDefaultDict[Key, PcKey] = IdentityDefaultDict( - { - "BackSpace": "backspace", - "Delete": "delete", - "Return": "enter", - "Enter": "enter", - "Tab": "tab", - "Escape": "escape", - "Up": "up", - "Down": "down", - "Right": "right", - "Left": "left", - "Home": "home", - "End": "end", - "Page_Up": "pageup", - "Page_Down": "pagedown", - "F1": "f1", - "F2": "f2", - "F3": "f3", - "F4": "f4", - "F5": "f5", - "F6": "f6", - "F7": "f7", - "F8": "f8", - "F9": "f9", - "F10": "f10", - "F11": "f11", - "F12": "f12", - } -) + Args: + keystroke (str): The xdotool keystroke to convert. + + Example: + `"ctrl+shift+a"` -> `KeyboardParam(key="a", modifier_keys=["control", "shift"])` + """ + keys = keystroke.split("+") + key = keys.pop() + if key not in XDOTOOL_TO_KEY_MAP: + err_msg = ( + f"Unknown key: {key} " + f"(expected one of {list(XDOTOOL_TO_KEY_MAP.keys())})" + ) + raise ValueError(err_msg) + + return cls( + key=XDOTOOL_TO_KEY_MAP[key], + modifier_keys=[XDOTOOL_TO_MODIFIER_KEY_MAP[k] for k in keys], + ) class ActionNotImplementedError(NotImplementedError): @@ -149,72 +217,80 @@ def params_base( @override @validate_call - def __call__( + def __call__( # noqa: C901 self, - action: Action20250124, + action: Action20241022, text: str | None = None, coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]] | None = None, ) -> Image.Image | None: match action: - case "mouse_move": - self._mouse_move(coordinate) # type: ignore[arg-type] + case "cursor_position": + raise ActionNotImplementedError(action, self.name) + case "double_click": + return self._agent_os.click("left", 2) + case "key": + return self._key(keystroke=text) # type: ignore[arg-type] + case "left_click": + return self._agent_os.click("left") case "left_click_drag": - # does not seem to work - self._left_click_drag(coordinate) # type: ignore[arg-type] + return self._left_click_drag(coordinate) # type: ignore[arg-type] + case "middle_click": + return self._agent_os.click("middle") + case "mouse_move": + return self._mouse_move(coordinate) # type: ignore[arg-type] + case "right_click": + return self._agent_os.click("right") case "screenshot": return self._screenshot() - case "left_click": - self._agent_os.click("left") - case "right_click": - self._agent_os.click("right") - case "middle_click": - self._agent_os.click("middle") - case "double_click": - self._agent_os.click("left", 2) case "type": - self._type(text) # type: ignore[arg-type] - case "key": - # we do not seem to support all kinds of key nor modifier keys - # + key combinations - self._key(text) # type: ignore[arg-type] - case _: - raise ActionNotImplementedError(action, self.name) - return None + return self._type(text) # type: ignore[arg-type] @validate_call def _type(self, text: str) -> None: self._agent_os.type(text) @validate_call - def _key(self, key: Key) -> None: - _key = KEYS_MAPPING[key] - self._agent_os.keyboard_pressed(_key) - self._agent_os.keyboard_release(_key) + def _key(self, keystroke: str) -> None: + keyboard_param = KeyboardParam.from_xdotool(keystroke) + self._agent_os.keyboard_pressed( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) + self._agent_os.keyboard_release( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) @validate_call - def _keyboard_pressed(self, key: Key) -> None: - _key = KEYS_MAPPING[key] - self._agent_os.keyboard_pressed(_key) + def _keyboard_pressed(self, keystroke: str) -> None: + keyboard_param = KeyboardParam.from_xdotool(keystroke) + self._agent_os.keyboard_pressed( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) @validate_call - def _keyboard_released(self, key: Key) -> None: - _key = KEYS_MAPPING[key] - self._agent_os.keyboard_release(_key) + def _keyboard_released(self, keystroke: str) -> None: + keyboard_param = KeyboardParam.from_xdotool(keystroke) + self._agent_os.keyboard_release( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) - def _scale_coordinates_back( - self, - coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]], - ) -> tuple[int, int]: + def _get_real_screen_resolution(self) -> tuple[int, int]: if self._real_screen_width is None or self._real_screen_height is None: screenshot = self._agent_os.screenshot() self._real_screen_width = screenshot.width self._real_screen_height = screenshot.height + return self._real_screen_width, self._real_screen_height + + def _scale_coordinates_back( + self, + coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]], + ) -> tuple[int, int]: + real_screen_width, real_screen_height = self._get_real_screen_resolution() x, y = scale_coordinates_back( coordinate[0], coordinate[1], - self._real_screen_width, # - self._real_screen_height, + real_screen_width, + real_screen_height, self._width, self._height, ) @@ -340,7 +416,7 @@ def to_params( @override @validate_call - def __call__( + def __call__( # noqa: C901 self, action: Action20250124, text: str | None = None, @@ -352,6 +428,8 @@ def __call__( key: str | None = None, # maybe not all keys supported ) -> Image.Image | None: match action: + case "hold_key": + self._hold_key(keystroke=text, duration=duration) # type: ignore[arg-type] case "left_mouse_down": self._agent_os.mouse_down("left") case "left_mouse_up": @@ -364,12 +442,62 @@ def __call__( self._click("middle", coordinate=coordinate, key=key) case "double_click": self._click("left", count=2, coordinate=coordinate, key=key) + case "scroll": + self._scroll( + scroll_direction=scroll_direction, # type: ignore[arg-type] + scroll_amount=scroll_amount, # type: ignore[arg-type] + text=text, + coordinate=coordinate, + ) + return self._screenshot() case "triple_click": self._click("left", count=3, coordinate=coordinate, key=key) + case "wait": + self._wait(duration=duration) # type: ignore[arg-type] case _: return super().__call__(action, text, coordinate) return None + @validate_call + def _hold_key( + self, keystroke: str, duration: Annotated[float, Field(ge=0.0, le=100.0)] + ) -> None: + self._keyboard_pressed(keystroke=keystroke) + time.sleep(duration) + self._keyboard_released(keystroke=keystroke) + + @validate_call + def _scroll( + self, + scroll_direction: ScrollDirection, + scroll_amount: Annotated[int, Field(ge=0)], + text: str | None = None, + coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]] + | None = None, + ) -> None: + real_screen_width, real_screen_height = self._get_real_screen_resolution() + x = int(RELATIVE_SCROLL_FACTOR * scroll_amount * real_screen_width) + y = int(RELATIVE_SCROLL_FACTOR * scroll_amount * real_screen_height) + if coordinate is not None: + self._mouse_move(coordinate) + if text is not None: + self._keyboard_pressed(text) + match scroll_direction: + case "up": + self._agent_os.mouse_scroll(0, y) + case "down": + self._agent_os.mouse_scroll(0, -y) + case "left": + self._agent_os.mouse_scroll(x, 0) + case "right": + self._agent_os.mouse_scroll(-x, 0) + if text is not None: + self._keyboard_released(text) + + @validate_call + def _wait(self, duration: Annotated[float, Field(ge=0.0, le=100.0)]) -> None: + time.sleep(duration) + def _click( self, button: Literal["left", "right", "middle"], @@ -381,7 +509,7 @@ def _click( if coordinate is not None: self._mouse_move(coordinate) if key is not None: - self._keyboard_pressed(key) # type: ignore[arg-type] + self._keyboard_pressed(keystroke=key) self._agent_os.click(button, count) if key is not None: - self._keyboard_released(key) # type: ignore[arg-type] + self._keyboard_released(keystroke=key) diff --git a/src/askui/tools/playwright/__init__.py b/src/askui/tools/playwright/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/askui/tools/playwright/agent_os.py b/src/askui/tools/playwright/agent_os.py new file mode 100644 index 00000000..21d3aeeb --- /dev/null +++ b/src/askui/tools/playwright/agent_os.py @@ -0,0 +1,462 @@ +from __future__ import annotations + +import io +import subprocess +from typing import Literal + +from PIL import Image +from playwright.sync_api import ( + Browser, + BrowserContext, + BrowserType, + Page, + Playwright, + ViewportSize, + sync_playwright, +) +from typing_extensions import override + +from ..agent_os import AgentOs, InputEvent, ModifierKey, PcKey + + +class PlaywrightAgentOs(AgentOs): + """ + Playwright-based implementation of AgentOs. + + This implementation uses Playwright's Python SDK to control browser automation + and simulate user interactions. It provides mouse control, keyboard input, + and screen capture functionality through a browser context. + + Args: + browser_type (Literal["chromium", "firefox", "webkit"], optional): The browser + type to use. Defaults to `"chromium"`. + headless (bool, optional): Whether to run the browser in headless mode. + Defaults to `False`. + viewport_size (ViewportSize | None, optional): The viewport size. + Defaults to `None` (uses default). + slow_mo (int, optional): Slows down Playwright operations by the specified + amount of milliseconds. Defaults to `0`. + install_browser (bool, optional): Whether to install browser on connection. + Defaults to `True`. + install_dependencies (bool, optional): Whether to install system dependencies + (requires root permissions). Defaults to `False`. + """ + + def __init__( + self, + browser_type: Literal["chromium", "firefox", "webkit"] = "chromium", + headless: bool = False, + viewport_size: ViewportSize | None = None, + slow_mo: int = 0, + install_browser: bool = True, + install_dependencies: bool = False, + ) -> None: + self._browser_type = browser_type + self._headless = headless + self._viewport_size = viewport_size + self._slow_mo = slow_mo + self._install_browser = install_browser + self._install_dependencies = install_dependencies + + # Playwright objects + self._playwright: Playwright | None = None + self._browser: Browser | None = None + self._context: BrowserContext | None = None + self._page: Page | None = None + + # Event listening state + self._listening = False + self._event_queue: list[InputEvent] = [] + + def _install_playwright_browser(self) -> None: + """Install Playwright browser if requested.""" + if not self._install_browser: + return + + try: + # Install the specific browser type + subprocess.run( + ["playwright", "install", self._browser_type], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + error_msg = f"Failed to install {self._browser_type} browser: {e}" + raise RuntimeError(error_msg) from e + except FileNotFoundError as e: + error_msg = ( + "Playwright CLI not found. Install with `pip install playwright`" + ) + raise RuntimeError(error_msg) from e + + def _install_system_dependencies(self) -> None: + """Install system dependencies if requested (requires root permissions).""" + if not self._install_dependencies: + return + + try: + # Install system dependencies + subprocess.run( + ["playwright", "install-deps"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + error_msg = f"Failed to install system dependencies: {e}" + raise RuntimeError(error_msg) from e + except FileNotFoundError as e: + error_msg = ( + "Playwright CLI not found. Install with `pip install playwright`" + ) + raise RuntimeError(error_msg) from e + + @override + def connect(self) -> None: + """Establishes a synchronous connection to the browser.""" + + # Install browser and dependencies if requested + if self._install_dependencies: + self._install_system_dependencies() + + if self._install_browser: + self._install_playwright_browser() + + self._playwright = sync_playwright().start() + browser_launcher: BrowserType = getattr(self._playwright, self._browser_type) + self._browser = browser_launcher.launch( + headless=self._headless, + slow_mo=self._slow_mo, + ) + self._context = self._browser.new_context( + viewport=self._viewport_size, + ) + + self._page = self._context.new_page() + # Navigate to a blank page to ensure we have a working page + self._page.goto("data:text/html,") + + @override + def disconnect(self) -> None: + """Terminates the connection to the browser.""" + if self._listening: + self.stop_listening() + + if self._page: + self._page.close() + self._page = None + + if self._context: + self._context.close() + self._context = None + + if self._browser: + self._browser.close() + self._browser = None + + if self._playwright: + self._playwright.stop() + self._playwright = None + + @override + def screenshot(self, report: bool = True) -> Image.Image: + """ + Captures a screenshot of the current page. + + Args: + report (bool, optional): Whether to include the screenshot in + reporting. Defaults to `True`. + + Returns: + Image.Image: A PIL Image object containing the screenshot. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + screenshot_bytes = self._page.screenshot() + return Image.open(io.BytesIO(screenshot_bytes)) + + @override + def mouse_move(self, x: int, y: int) -> None: + """ + Moves the mouse cursor to specified coordinates on the page. + + Args: + x (int): The horizontal coordinate (in pixels) to move to. + y (int): The vertical coordinate (in pixels) to move to. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.move(x, y) + + @override + def type(self, text: str, typing_speed: int = 50) -> None: + """ + Simulates typing text as if entered on a keyboard. + + Args: + text (str): The text to be typed. + typing_speed (int, optional): The speed of typing in characters per + minute. Defaults to `50`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + # Convert typing speed from CPM to delay between characters + delay = (60 / typing_speed) * 1000 if typing_speed > 0 else 0 + self._page.keyboard.type(text, delay=delay) + + @override + def click( + self, button: Literal["left", "middle", "right"] = "left", count: int = 1 + ) -> None: + """ + Simulates clicking a mouse button. + + Args: + button (Literal["left", "middle", "right"], optional): The mouse + button to click. Defaults to `"left"`. + count (int, optional): Number of times to click. Defaults to `1`. + """ + for _ in range(count): + self.mouse_down(button) + self.mouse_up(button) + + @override + def mouse_down(self, button: Literal["left", "middle", "right"] = "left") -> None: + """ + Simulates pressing and holding a mouse button. + + Args: + button (Literal["left", "middle", "right"], optional): The mouse + button to press. Defaults to `"left"`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.down(button=button) + + @override + def mouse_up(self, button: Literal["left", "middle", "right"] = "left") -> None: + """ + Simulates releasing a mouse button. + + Args: + button (Literal["left", "middle", "right"], optional): The mouse + button to release. Defaults to `"left"`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.up(button=button) + + @override + def mouse_scroll(self, x: int, y: int) -> None: + """ + Simulates scrolling the mouse wheel. + + Args: + x (int): The horizontal scroll amount. Positive values scroll right, + negative values scroll left. + y (int): The vertical scroll amount. Positive values scroll down, + negative values scroll up. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.wheel(delta_x=x, delta_y=y) + + @override + def keyboard_pressed( + self, key: PcKey | ModifierKey, modifier_keys: list[ModifierKey] | None = None + ) -> None: + """ + Simulates pressing and holding a keyboard key. + + Args: + key (PcKey | ModifierKey): The key to press. + modifier_keys (list[ModifierKey] | None, optional): List of modifier keys to + press along with the main key. Defaults to `None`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + # Press modifier keys first + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.down(self._convert_key(modifier)) + + # Press the main key + self._page.keyboard.down(self._convert_key(key)) + + @override + def keyboard_release( + self, key: PcKey | ModifierKey, modifier_keys: list[ModifierKey] | None = None + ) -> None: + """ + Simulates releasing a keyboard key. + + Args: + key (PcKey | ModifierKey): The key to release. + modifier_keys (list[ModifierKey] | None, optional): List of modifier keys to + release along with the main key. Defaults to `None`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + # Release the main key first + self._page.keyboard.up(self._convert_key(key)) + + # Release modifier keys + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.up(self._convert_key(modifier)) + + @override + def keyboard_tap( + self, + key: PcKey | ModifierKey, + modifier_keys: list[ModifierKey] | None = None, + count: int = 1, + ) -> None: + """ + Simulates pressing and immediately releasing a keyboard key. + + Args: + key (PcKey | ModifierKey): The key to tap. + modifier_keys (list[ModifierKey] | None, optional): List of modifier keys to + press along with the main key. Defaults to `None`. + count (int, optional): The number of times to tap the key. Defaults to `1`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + for _ in range(count): + # Press modifier keys first + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.down(self._convert_key(modifier)) + + # Press and release the main key + self._page.keyboard.press(self._convert_key(key)) + + # Release modifier keys + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.up(self._convert_key(modifier)) + + def _convert_key(self, key: PcKey | ModifierKey) -> str: + """ + Convert our key format to Playwright's key format. + + Args: + key (PcKey | ModifierKey): The key to convert. + + Returns: + str: The Playwright-compatible key string. + """ + # Map our modifier keys to Playwright format + modifier_map: dict[PcKey | ModifierKey, str] = { + "command": "Meta", + "alt": "Alt", + "control": "Control", + "shift": "Shift", + "right_shift": "Shift", + } + + if key in modifier_map: + return modifier_map[key] + + # For regular keys, Playwright uses similar format + # but some keys might need conversion + key_map: dict[PcKey | ModifierKey, str] = { + "backspace": "Backspace", + "delete": "Delete", + "enter": "Enter", + "tab": "Tab", + "escape": "Escape", + "up": "ArrowUp", + "down": "ArrowDown", + "right": "ArrowRight", + "left": "ArrowLeft", + "home": "Home", + "end": "End", + "pageup": "PageUp", + "pagedown": "PageDown", + "space": " ", + } + + if key in key_map: + return key_map[key] + + # Function keys + if key.startswith("f") and key[1:].isdigit(): + return key.upper() + + # For most other keys, return as-is + return key + + # --- Extra browser-oriented actions --- + def goto(self, url: str) -> None: + """ + Navigate to a specific URL. + + Args: + url (str): The URL to navigate to. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.goto(url) + + def back(self) -> None: + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.go_back() + + def forward(self) -> None: + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.go_forward() + + def get_page_title(self) -> str: + """ + Get the title of the current page. + + Returns: + str: The page title. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + return self._page.title() + + def get_page_url(self) -> str: + """ + Get the URL of the current page. + + Returns: + str: The current page URL. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + return self._page.url diff --git a/src/askui/tools/playwright/tools.py b/src/askui/tools/playwright/tools.py new file mode 100644 index 00000000..96aa7843 --- /dev/null +++ b/src/askui/tools/playwright/tools.py @@ -0,0 +1,140 @@ +from typing_extensions import override + +from askui.models.shared.tools import Tool +from askui.tools.playwright.agent_os import PlaywrightAgentOs + + +class PlaywrightGotoTool(Tool): + """ + Navigates to a specific URL in the browser. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_goto_tool", + description=( + """ + Navigates the browser to a specific URL. + This will load the webpage at the given URL and make it the current + page. The browser will wait for the page to load completely before + proceeding. + """ + ), + input_schema={ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": ( + "The URL to navigate to. Must be a valid URL including " + "the protocol (e.g., 'https://example.com')." + ), + }, + }, + "required": ["url"], + }, + ) + self._agent_os = agent_os + + @override + def __call__(self, url: str) -> str: + self._agent_os.goto(url) + return f"Navigated to: {url}" + + +class PlaywrightBackTool(Tool): + """ + Navigates back to the previous page in the browser history. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_back_tool", + description=( + """ + Navigates back to the previous page in the browser history. + This is equivalent to clicking the back button in a browser. + If there is no previous page in the history, this action will have no + effect. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + self._agent_os.back() + return "Navigated back to the previous page" + + +class PlaywrightForwardTool(Tool): + """ + Navigates forward to the next page in the browser history. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_forward_tool", + description=( + """ + Navigates forward to the next page in the browser history. + This is equivalent to clicking the forward button in a browser. + If there is no next page in the history, this action will have no + effect. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + self._agent_os.forward() + return "Navigated forward to the next page" + + +class PlaywrightGetPageTitleTool(Tool): + """ + Gets the title of the current page. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_get_page_title_tool", + description=( + """ + Retrieves the title of the currently loaded webpage. + The title is typically displayed in the browser tab and represents + the main heading or name of the page content. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + title = self._agent_os.get_page_title() + return f"Page title: {title}" + + +class PlaywrightGetPageUrlTool(Tool): + """ + Gets the URL of the current page. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_get_page_url_tool", + description=( + """ + Retrieves the URL of the currently loaded webpage. + This returns the full URL including protocol, domain, path, and query + parameters. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + url = self._agent_os.get_page_url() + return f"Current page URL: {url}" diff --git a/src/askui/web_agent.py b/src/askui/web_agent.py new file mode 100644 index 00000000..24d69e61 --- /dev/null +++ b/src/askui/web_agent.py @@ -0,0 +1,98 @@ +import logging +from datetime import datetime, timezone + +from pydantic import ConfigDict, validate_call +from typing_extensions import override + +from askui.agent import VisionAgent +from askui.container import telemetry +from askui.models.shared.settings import ( + COMPUTER_USE_20241022_BETA_FLAG, + COMPUTER_USE_20250124_BETA_FLAG, + ActSettings, + MessageSettings, +) +from askui.tools.exception_tool import ExceptionTool +from askui.tools.playwright.agent_os import PlaywrightAgentOs +from askui.tools.playwright.tools import ( + PlaywrightBackTool, + PlaywrightForwardTool, + PlaywrightGetPageTitleTool, + PlaywrightGetPageUrlTool, + PlaywrightGotoTool, +) +from askui.tools.toolbox import AgentToolbox + +from .models import ModelComposition +from .models.models import ModelChoice, ModelName, ModelRegistry +from .reporting import Reporter +from .retry import Retry + +_SYSTEM_PROMPT = f""" + +* You are utilizing a webbrowser in full-screen mode. So you are only seeing the content of the currently opened webpage (tab). +* It can be helpful to zoom in/out or scroll down/up so that you can see everything on the page. Make sure to that before deciding something isn't available. +* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. + +""" + +_ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings( + messages=MessageSettings( + model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022.value, + system=_SYSTEM_PROMPT, + betas=[COMPUTER_USE_20241022_BETA_FLAG], + ), +) + +_CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings( + messages=MessageSettings( + model=ModelName.CLAUDE__SONNET__4__20250514.value, + system=_SYSTEM_PROMPT, + betas=[COMPUTER_USE_20250124_BETA_FLAG], + thinking={"type": "enabled", "budget_tokens": 2048}, + ), +) + + +class WebVisionAgent(VisionAgent): + @telemetry.record_call(exclude={"model_router", "reporters", "tools"}) + @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) + def __init__( + self, + log_level: int | str = logging.INFO, + reporters: list[Reporter] | None = None, + model: ModelChoice | ModelComposition | str | None = None, + retry: Retry | None = None, + models: ModelRegistry | None = None, + ) -> None: + agent_os = PlaywrightAgentOs() + tools = AgentToolbox( + agent_os=agent_os, + ) + super().__init__( + log_level=log_level, + reporters=reporters, + model=model, + retry=retry, + models=models, + tools=tools, + act_tools=[ + PlaywrightGotoTool(agent_os=agent_os), + PlaywrightBackTool(agent_os=agent_os), + PlaywrightForwardTool(agent_os=agent_os), + PlaywrightGetPageTitleTool(agent_os=agent_os), + PlaywrightGetPageUrlTool(agent_os=agent_os), + ExceptionTool(), + ], + ) + + @override + def _get_default_settings_for_act(self, model_choice: str) -> ActSettings: + match model_choice: + case ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: + return _ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS + case ModelName.CLAUDE__SONNET__4__20250514 | ModelName.ASKUI: + return _CLAUDE__SONNET__4__20250514__ACT_SETTINGS + case _: + return ActSettings() diff --git a/src/chat/api/assistants/seeds.py b/src/chat/api/assistants/seeds.py index 5d7bf97b..42c054af 100644 --- a/src/chat/api/assistants/seeds.py +++ b/src/chat/api/assistants/seeds.py @@ -17,3 +17,16 @@ name="AskUI Android Vision Agent", avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciICB2aWV3Qm94PSIwIDAgNDggNDgiIHdpZHRoPSIyNXB4IiBoZWlnaHQ9IjI1cHgiPjxwYXRoIGQ9Ik0gMzIuNTE5NTMxIDAuOTgyNDIxODggQSAxLjUwMDE1IDEuNTAwMTUgMCAwIDAgMzEuMjc5Mjk3IDEuNjI4OTA2MiBMIDI5LjQzNzUgNC4yMDg5ODQ0IEMgMjcuNzgwMjA3IDMuNDQwNTAwNiAyNS45NDE5MSAzIDI0IDMgQyAyMi4wNTgwOSAzIDIwLjIxOTc5MyAzLjQ0MDUwMDYgMTguNTYyNSA0LjIwODk4NDQgTCAxNi43MjA3MDMgMS42Mjg5MDYyIEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDE1LjQzNTU0NyAwLjk4NDM3NSBBIDEuNTAwMTUgMS41MDAxNSAwIDAgMCAxNC4yNzkyOTcgMy4zNzEwOTM4IEwgMTYgNS43NzkyOTY5IEMgMTMuMTM4ODk2IDguMDI0NzU4MiAxMS4yNDUxODggMTEuNDM2MDIgMTEuMDM1MTU2IDE1LjI5MTAxNiBDIDEwLjU1MzI2IDE1LjExMjgxOCAxMC4wNDA0MDggMTUgOS41IDE1IEMgNy4wMzI0OTkxIDE1IDUgMTcuMDMyNDk5IDUgMTkuNSBMIDUgMzAuNSBDIDUgMzIuOTY3NTAxIDcuMDMyNDk5MSAzNSA5LjUgMzUgQyAxMC4wOTAzMTMgMzUgMTAuNjUzMjI5IDM0Ljg3ODc0OSAxMS4xNzE4NzUgMzQuNjY3OTY5IEMgMTEuNTY0MzM2IDM2LjA3MjEwNSAxMi42MzEzMzMgMzcuMTk2OTk0IDE0IDM3LjY5MzM1OSBMIDE0IDQxLjUgQyAxNCA0My45Njc1MDEgMTYuMDMyNDk5IDQ2IDE4LjUgNDYgQyAyMC45Njc1MDEgNDYgMjMgNDMuOTY3NTAxIDIzIDQxLjUgTCAyMyAzOCBMIDI1IDM4IEwgMjUgNDEuNSBDIDI1IDQzLjk2NzUwMSAyNy4wMzI0OTkgNDYgMjkuNSA0NiBDIDMxLjk2NzUwMSA0NiAzNCA0My45Njc1MDEgMzQgNDEuNSBMIDM0IDM3LjY5MzM1OSBDIDM1LjM2ODY2NyAzNy4xOTY5OTQgMzYuNDM1NjY0IDM2LjA3MjEwNSAzNi44MjgxMjUgMzQuNjY3OTY5IEMgMzcuMzQ2NzcxIDM0Ljg3ODc0OSAzNy45MDk2ODcgMzUgMzguNSAzNSBDIDQwLjk2NzUwMSAzNSA0MyAzMi45Njc1MDEgNDMgMzAuNSBMIDQzIDE5LjUgQyA0MyAxNy4wMzI0OTkgNDAuOTY3NTAxIDE1IDM4LjUgMTUgQyAzNy45NTk1OTIgMTUgMzcuNDQ2NzQgMTUuMTEyODE4IDM2Ljk2NDg0NCAxNS4yOTEwMTYgQyAzNi43NTQ4MTIgMTEuNDM2MDIgMzQuODYxMTA0IDguMDI0NzU4MiAzMiA1Ljc3OTI5NjkgTCAzMy43MjA3MDMgMy4zNzEwOTM4IEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDMyLjUxOTUzMSAwLjk4MjQyMTg4IHogTSAyNCA2IEMgMjkuMTg1MTI3IDYgMzMuMjc2NzI3IDkuOTU3NTEzMiAzMy43OTg4MjggMTUgTCAxNC4yMDExNzIgMTUgQyAxNC43MjMyNzMgOS45NTc1MTMyIDE4LjgxNDg3MyA2IDI0IDYgeiBNIDE5LjUgMTAgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTMgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTAgeiBNIDI4LjUgMTAgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTMgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTAgeiBNIDkuNSAxOCBDIDEwLjM0NjQ5OSAxOCAxMSAxOC42NTM1MDEgMTEgMTkuNSBMIDExIDMwLjUgQyAxMSAzMS4zNDY0OTkgMTAuMzQ2NDk5IDMyIDkuNSAzMiBDIDguNjUzNTAwOSAzMiA4IDMxLjM0NjQ5OSA4IDMwLjUgTCA4IDE5LjUgQyA4IDE4LjY1MzUwMSA4LjY1MzUwMDkgMTggOS41IDE4IHogTSAxNCAxOCBMIDM0IDE4IEwgMzQgMTkuNSBMIDM0IDMwLjUgTCAzNCAzMy41IEMgMzQgMzQuMzQ2NDk5IDMzLjM0NjQ5OSAzNSAzMi41IDM1IEwgMjUgMzUgTCAyMyAzNSBMIDE1LjUgMzUgQyAxNC42NTM1MDEgMzUgMTQgMzQuMzQ2NDk5IDE0IDMzLjUgTCAxNCAzMC41IEwgMTQgMTkuNSBMIDE0IDE4IHogTSAzOC41IDE4IEMgMzkuMzQ2NDk5IDE4IDQwIDE4LjY1MzUwMSA0MCAxOS41IEwgNDAgMzAuNSBDIDQwIDMxLjM0NjQ5OSAzOS4zNDY0OTkgMzIgMzguNSAzMiBDIDM3LjY1MzUwMSAzMiAzNyAzMS4zNDY0OTkgMzcgMzAuNSBMIDM3IDE5LjUgQyAzNyAxOC42NTM1MDEgMzcuNjUzNTAxIDE4IDM4LjUgMTggeiBNIDE3IDM4IEwgMjAgMzggTCAyMCA0MS41IEMgMjAgNDIuMzQ2NDk5IDE5LjM0NjQ5OSA0MyAxOC41IDQzIEMgMTcuNjUzNTAxIDQzIDE3IDQyLjM0NjQ5OSAxNyA0MS41IEwgMTcgMzggeiBNIDI4IDM4IEwgMzEgMzggTCAzMSA0MS41IEMgMzEgNDIuMzQ2NDk5IDMwLjM0NjQ5OSA0MyAyOS41IDQzIEMgMjguNjUzNTAxIDQzIDI4IDQyLjM0NjQ5OSAyOCA0MS41IEwgMjggMzggeiIvPjwvc3ZnPg==", ) + +ASKUI_WEB_AGENT = Assistant( + id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcc", + name="AskUI Web Vision Agent", + avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0MDAiIGhlaWdodD0iNDAwIiB2aWV3Qm94PSIwIDAgNDAwIDQwMCIgZmlsbD0ibm9uZSI+CjxwYXRoIGQ9Ik0xMzYuNDQ0IDIyMS41NTZDMTIzLjU1OCAyMjUuMjEzIDExNS4xMDQgMjMxLjYyNSAxMDkuNTM1IDIzOC4wMzJDMTE0Ljg2OSAyMzMuMzY0IDEyMi4wMTQgMjI5LjA4IDEzMS42NTIgMjI2LjM0OEMxNDEuNTEgMjIzLjU1NCAxNDkuOTIgMjIzLjU3NCAxNTYuODY5IDIyNC45MTVWMjE5LjQ4MUMxNTAuOTQxIDIxOC45MzkgMTQ0LjE0NSAyMTkuMzcxIDEzNi40NDQgMjIxLjU1NlpNMTA4Ljk0NiAxNzUuODc2TDYxLjA4OTUgMTg4LjQ4NEM2MS4wODk1IDE4OC40ODQgNjEuOTYxNyAxODkuNzE2IDYzLjU3NjcgMTkxLjM2TDEwNC4xNTMgMTgwLjY2OEMxMDQuMTUzIDE4MC42NjggMTAzLjU3OCAxODguMDc3IDk4LjU4NDcgMTk0LjcwNUMxMDguMDMgMTg3LjU1OSAxMDguOTQ2IDE3NS44NzYgMTA4Ljk0NiAxNzUuODc2Wk0xNDkuMDA1IDI4OC4zNDdDODEuNjU4MiAzMDYuNDg2IDQ2LjAyNzIgMjI4LjQzOCAzNS4yMzk2IDE4Ny45MjhDMzAuMjU1NiAxNjkuMjI5IDI4LjA3OTkgMTU1LjA2NyAyNy41IDE0NS45MjhDMjcuNDM3NyAxNDQuOTc5IDI3LjQ2NjUgMTQ0LjE3OSAyNy41MzM2IDE0My40NDZDMjQuMDQgMTQzLjY1NyAyMi4zNjc0IDE0NS40NzMgMjIuNzA3NyAxNTAuNzIxQzIzLjI4NzYgMTU5Ljg1NSAyNS40NjMzIDE3NC4wMTYgMzAuNDQ3MyAxOTIuNzIxQzQxLjIzMDEgMjMzLjIyNSA3Ni44NjU5IDMxMS4yNzMgMTQ0LjIxMyAyOTMuMTM0QzE1OC44NzIgMjg5LjE4NSAxNjkuODg1IDI4MS45OTIgMTc4LjE1MiAyNzIuODFDMTcwLjUzMiAyNzkuNjkyIDE2MC45OTUgMjg1LjExMiAxNDkuMDA1IDI4OC4zNDdaTTE2MS42NjEgMTI4LjExVjEzMi45MDNIMTg4LjA3N0MxODcuNTM1IDEzMS4yMDYgMTg2Ljk4OSAxMjkuNjc3IDE4Ni40NDcgMTI4LjExSDE2MS42NjFaIiBmaWxsPSIjMkQ0NTUyIi8+CjxwYXRoIGQ9Ik0xOTMuOTgxIDE2Ny41ODRDMjA1Ljg2MSAxNzAuOTU4IDIxMi4xNDQgMTc5LjI4NyAyMTUuNDY1IDE4Ni42NThMMjI4LjcxMSAxOTAuNDJDMjI4LjcxMSAxOTAuNDIgMjI2LjkwNCAxNjQuNjIzIDIwMy41NyAxNTcuOTk1QzE4MS43NDEgMTUxLjc5MyAxNjguMzA4IDE3MC4xMjQgMTY2LjY3NCAxNzIuNDk2QzE3My4wMjQgMTY3Ljk3MiAxODIuMjk3IDE2NC4yNjggMTkzLjk4MSAxNjcuNTg0Wk0yOTkuNDIyIDE4Ni43NzdDMjc3LjU3MyAxODAuNTQ3IDI2NC4xNDUgMTk4LjkxNiAyNjIuNTM1IDIwMS4yNTVDMjY4Ljg5IDE5Ni43MzYgMjc4LjE1OCAxOTMuMDMxIDI4OS44MzcgMTk2LjM2MkMzMDEuNjk4IDE5OS43NDEgMzA3Ljk3NiAyMDguMDYgMzExLjMwNyAyMTUuNDM2TDMyNC41NzIgMjE5LjIxMkMzMjQuNTcyIDIxOS4yMTIgMzIyLjczNiAxOTMuNDEgMjk5LjQyMiAxODYuNzc3Wk0yODYuMjYyIDI1NC43OTVMMTc2LjA3MiAyMjMuOTlDMTc2LjA3MiAyMjMuOTkgMTc3LjI2NSAyMzAuMDM4IDE4MS44NDIgMjM3Ljg2OUwyNzQuNjE3IDI2My44MDVDMjgyLjI1NSAyNTkuMzg2IDI4Ni4yNjIgMjU0Ljc5NSAyODYuMjYyIDI1NC43OTVaTTIwOS44NjcgMzIxLjEwMkMxMjIuNjE4IDI5Ny43MSAxMzMuMTY2IDE4Ni41NDMgMTQ3LjI4NCAxMzMuODY1QzE1My4wOTcgMTEyLjE1NiAxNTkuMDczIDk2LjAyMDMgMTY0LjAyOSA4NS4yMDRDMTYxLjA3MiA4NC41OTUzIDE1OC42MjMgODYuMTUyOSAxNTYuMjAzIDkxLjA3NDZDMTUwLjk0MSAxMDEuNzQ3IDE0NC4yMTIgMTE5LjEyNCAxMzcuNyAxNDMuNDVDMTIzLjU4NiAxOTYuMTI3IDExMy4wMzggMzA3LjI5IDIwMC4yODMgMzMwLjY4MkMyNDEuNDA2IDM0MS42OTkgMjczLjQ0MiAzMjQuOTU1IDI5Ny4zMjMgMjk4LjY1OUMyNzQuNjU1IDMxOS4xOSAyNDUuNzE0IDMzMC43MDEgMjA5Ljg2NyAzMjEuMTAyWiIgZmlsbD0iIzJENDU1MiIvPgo8cGF0aCBkPSJNMTYxLjY2MSAyNjIuMjk2VjIzOS44NjNMOTkuMzMyNCAyNTcuNTM3Qzk5LjMzMjQgMjU3LjUzNyAxMDMuOTM4IDIzMC43NzcgMTM2LjQ0NCAyMjEuNTU2QzE0Ni4zMDIgMjE4Ljc2MiAxNTQuNzEzIDIxOC43ODEgMTYxLjY2MSAyMjAuMTIzVjEyOC4xMUgxOTIuODY5QzE4OS40NzEgMTE3LjYxIDE4Ni4xODQgMTA5LjUyNiAxODMuNDIzIDEwMy45MDlDMTc4Ljg1NiA5NC42MTIgMTc0LjE3NCAxMDAuNzc1IDE2My41NDUgMTA5LjY2NUMxNTYuMDU5IDExNS45MTkgMTM3LjEzOSAxMjkuMjYxIDEwOC42NjggMTM2LjkzM0M4MC4xOTY2IDE0NC42MSA1Ny4xNzkgMTQyLjU3NCA0Ny41NzUyIDE0MC45MTFDMzMuOTYwMSAxMzguNTYyIDI2LjgzODcgMTM1LjU3MiAyNy41MDQ5IDE0NS45MjhDMjguMDg0NyAxNTUuMDYyIDMwLjI2MDUgMTY5LjIyNCAzNS4yNDQ1IDE4Ny45MjhDNDYuMDI3MiAyMjguNDMzIDgxLjY2MyAzMDYuNDgxIDE0OS4wMSAyODguMzQyQzE2Ni42MDIgMjgzLjYwMiAxNzkuMDE5IDI3NC4yMzMgMTg3LjYyNiAyNjIuMjkxSDE2MS42NjFWMjYyLjI5NlpNNjEuMDg0OCAxODguNDg0TDEwOC45NDYgMTc1Ljg3NkMxMDguOTQ2IDE3NS44NzYgMTA3LjU1MSAxOTQuMjg4IDg5LjYwODcgMTk5LjAxOEM3MS42NjE0IDIwMy43NDMgNjEuMDg0OCAxODguNDg0IDYxLjA4NDggMTg4LjQ4NFoiIGZpbGw9IiNFMjU3NEMiLz4KPHBhdGggZD0iTTM0MS43ODYgMTI5LjE3NEMzMjkuMzQ1IDEzMS4zNTUgMjk5LjQ5OCAxMzQuMDcyIDI2Mi42MTIgMTI0LjE4NUMyMjUuNzE2IDExNC4zMDQgMjAxLjIzNiA5Ny4wMjI0IDE5MS41MzcgODguODk5NEMxNzcuNzg4IDc3LjM4MzQgMTcxLjc0IDY5LjM4MDIgMTY1Ljc4OCA4MS40ODU3QzE2MC41MjYgOTIuMTYzIDE1My43OTcgMTA5LjU0IDE0Ny4yODQgMTMzLjg2NkMxMzMuMTcxIDE4Ni41NDMgMTIyLjYyMyAyOTcuNzA2IDIwOS44NjcgMzIxLjA5OEMyOTcuMDkzIDM0NC40NyAzNDMuNTMgMjQyLjkyIDM1Ny42NDQgMTkwLjIzOEMzNjQuMTU3IDE2NS45MTcgMzY3LjAxMyAxNDcuNSAzNjcuNzk5IDEzNS42MjVDMzY4LjY5NSAxMjIuMTczIDM1OS40NTUgMTI2LjA3OCAzNDEuNzg2IDEyOS4xNzRaTTE2Ni40OTcgMTcyLjc1NkMxNjYuNDk3IDE3Mi43NTYgMTgwLjI0NiAxNTEuMzcyIDIwMy41NjUgMTU4QzIyNi44OTkgMTY0LjYyOCAyMjguNzA2IDE5MC40MjUgMjI4LjcwNiAxOTAuNDI1TDE2Ni40OTcgMTcyLjc1NlpNMjIzLjQyIDI2OC43MTNDMTgyLjQwMyAyNTYuNjk4IDE3Ni4wNzcgMjIzLjk5IDE3Ni4wNzcgMjIzLjk5TDI4Ni4yNjIgMjU0Ljc5NkMyODYuMjYyIDI1NC43OTEgMjY0LjAyMSAyODAuNTc4IDIyMy40MiAyNjguNzEzWk0yNjIuMzc3IDIwMS40OTVDMjYyLjM3NyAyMDEuNDk1IDI3Ni4xMDcgMTgwLjEyNiAyOTkuNDIyIDE4Ni43NzNDMzIyLjczNiAxOTMuNDExIDMyNC41NzIgMjE5LjIwOCAzMjQuNTcyIDIxOS4yMDhMMjYyLjM3NyAyMDEuNDk1WiIgZmlsbD0iIzJFQUQzMyIvPgo8cGF0aCBkPSJNMTM5Ljg4IDI0Ni4wNEw5OS4zMzI0IDI1Ny41MzJDOTkuMzMyNCAyNTcuNTMyIDEwMy43MzcgMjMyLjQ0IDEzMy42MDcgMjIyLjQ5NkwxMTAuNjQ3IDEzNi4zM0wxMDguNjYzIDEzNi45MzNDODAuMTkxOCAxNDQuNjExIDU3LjE3NDIgMTQyLjU3NCA0Ny41NzA0IDE0MC45MTFDMzMuOTU1NCAxMzguNTYzIDI2LjgzNCAxMzUuNTcyIDI3LjUwMDEgMTQ1LjkyOUMyOC4wOCAxNTUuMDYzIDMwLjI1NTcgMTY5LjIyNCAzNS4yMzk3IDE4Ny45MjlDNDYuMDIyNSAyMjguNDMzIDgxLjY1ODMgMzA2LjQ4MSAxNDkuMDA1IDI4OC4zNDJMMTUwLjk4OSAyODcuNzE5TDEzOS44OCAyNDYuMDRaTTYxLjA4NDggMTg4LjQ4NUwxMDguOTQ2IDE3NS44NzZDMTA4Ljk0NiAxNzUuODc2IDEwNy41NTEgMTk0LjI4OCA4OS42MDg3IDE5OS4wMThDNzEuNjYxNSAyMDMuNzQzIDYxLjA4NDggMTg4LjQ4NSA2MS4wODQ4IDE4OC40ODVaIiBmaWxsPSIjRDY1MzQ4Ii8+CjxwYXRoIGQ9Ik0yMjUuMjcgMjY5LjE2M0wyMjMuNDE1IDI2OC43MTJDMTgyLjM5OCAyNTYuNjk4IDE3Ni4wNzIgMjIzLjk5IDE3Ni4wNzIgMjIzLjk5TDIzMi44OSAyMzkuODcyTDI2Mi45NzEgMTI0LjI4MUwyNjIuNjA3IDEyNC4xODVDMjI1LjcxMSAxMTQuMzA0IDIwMS4yMzIgOTcuMDIyNCAxOTEuNTMyIDg4Ljg5OTRDMTc3Ljc4MyA3Ny4zODM0IDE3MS43MzUgNjkuMzgwMiAxNjUuNzgzIDgxLjQ4NTdDMTYwLjUyNiA5Mi4xNjMgMTUzLjc5NyAxMDkuNTQgMTQ3LjI4NCAxMzMuODY2QzEzMy4xNzEgMTg2LjU0MyAxMjIuNjIzIDI5Ny43MDYgMjA5Ljg2NyAzMjEuMDk3TDIxMS42NTUgMzIxLjVMMjI1LjI3IDI2OS4xNjNaTTE2Ni40OTcgMTcyLjc1NkMxNjYuNDk3IDE3Mi43NTYgMTgwLjI0NiAxNTEuMzcyIDIwMy41NjUgMTU4QzIyNi44OTkgMTY0LjYyOCAyMjguNzA2IDE5MC40MjUgMjI4LjcwNiAxOTAuNDI1TDE2Ni40OTcgMTcyLjc1NloiIGZpbGw9IiMxRDhEMjIiLz4KPHBhdGggZD0iTTE0MS45NDYgMjQ1LjQ1MUwxMzEuMDcyIDI0OC41MzdDMTMzLjY0MSAyNjMuMDE5IDEzOC4xNjkgMjc2LjkxNyAxNDUuMjc2IDI4OS4xOTVDMTQ2LjUxMyAyODguOTIyIDE0Ny43NCAyODguNjg3IDE0OSAyODguMzQyQzE1Mi4zMDIgMjg3LjQ1MSAxNTUuMzY0IDI4Ni4zNDggMTU4LjMxMiAyODUuMTQ1QzE1MC4zNzEgMjczLjM2MSAxNDUuMTE4IDI1OS43ODkgMTQxLjk0NiAyNDUuNDUxWk0xMzcuNyAxNDMuNDUxQzEzMi4xMTIgMTY0LjMwNyAxMjcuMTEzIDE5NC4zMjYgMTI4LjQ4OSAyMjQuNDM2QzEzMC45NTIgMjIzLjM2NyAxMzMuNTU0IDIyMi4zNzEgMTM2LjQ0NCAyMjEuNTUxTDEzOC40NTcgMjIxLjEwMUMxMzYuMDAzIDE4OC45MzkgMTQxLjMwOCAxNTYuMTY1IDE0Ny4yODQgMTMzLjg2NkMxNDguNzk5IDEyOC4yMjUgMTUwLjMxOCAxMjIuOTc4IDE1MS44MzIgMTE4LjA4NUMxNDkuMzkzIDExOS42MzcgMTQ2Ljc2NyAxMjEuMjI4IDE0My43NzYgMTIyLjg2N0MxNDEuNzU5IDEyOS4wOTMgMTM5LjcyMiAxMzUuODk4IDEzNy43IDE0My40NTFaIiBmaWxsPSIjQzA0QjQxIi8+Cjwvc3ZnPg==", +) + +SEEDS = [ + ASKUI_VISION_AGENT, + HUMAN_DEMONSTRATION_AGENT, + ANDROID_VISION_AGENT, + ASKUI_WEB_AGENT, +] diff --git a/src/chat/api/assistants/service.py b/src/chat/api/assistants/service.py index 391135dd..bc743400 100644 --- a/src/chat/api/assistants/service.py +++ b/src/chat/api/assistants/service.py @@ -3,11 +3,7 @@ from pydantic import BaseModel, Field from chat.api.assistants.models import Assistant -from chat.api.assistants.seeds import ( - ANDROID_VISION_AGENT, - ASKUI_VISION_AGENT, - HUMAN_DEMONSTRATION_AGENT, -) +from chat.api.assistants.seeds import SEEDS from chat.api.models import DO_NOT_PATCH, DoNotPatch, ListQuery, ListResponse @@ -166,6 +162,5 @@ def delete(self, assistant_id: str) -> None: def seed(self) -> None: """Seed the assistant service with default assistants.""" - self._save(ANDROID_VISION_AGENT) - self._save(ASKUI_VISION_AGENT) - self._save(HUMAN_DEMONSTRATION_AGENT) + for seed in SEEDS: + self._save(seed) diff --git a/src/chat/api/runs/runner/runner.py b/src/chat/api/runs/runner/runner.py index ca4ed422..8f12b8d8 100644 --- a/src/chat/api/runs/runner/runner.py +++ b/src/chat/api/runs/runner/runner.py @@ -16,6 +16,13 @@ from askui.models.shared.agent_on_message_cb import OnMessageCbParam from askui.tools.pynput_agent_os import PynputAgentOs from askui.utils.image_utils import ImageSource +from askui.web_agent import WebVisionAgent +from chat.api.assistants.seeds import ( + ANDROID_VISION_AGENT, + ASKUI_VISION_AGENT, + ASKUI_WEB_AGENT, + HUMAN_DEMONSTRATION_AGENT, +) from chat.api.messages.service import MessageCreateRequest, MessageService from chat.api.models import MAX_MESSAGES_PER_THREAD, ListQuery from chat.api.runs.models import Run, RunError @@ -35,11 +42,6 @@ logger = logging.getLogger(__name__) -ASKUI_VISION_AGENT_ID = "asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmca" -ASKUI_ANDROID_AGENT_ID = "asst_78da09fbf1ed43c7826fb1686f89f541" -HUMAN_AGENT_ID = "asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcb" - - class Runner: def __init__(self, run: Run, base_dir: Path) -> None: self._run = run @@ -152,8 +154,16 @@ def _run_askui_vision_agent(self, event_queue: queue.Queue[Events]) -> None: event_queue=event_queue, ) + def _run_askui_web_agent(self, event_queue: queue.Queue[Events]) -> None: + self._run_agent( + agent_type="web", + event_queue=event_queue, + ) + def _run_agent( - self, agent_type: Literal["android", "vision"], event_queue: queue.Queue[Events] + self, + agent_type: Literal["android", "vision", "web"], + event_queue: queue.Queue[Events], ) -> None: messages: list[MessageParam] = [ MessageParam( @@ -199,6 +209,14 @@ def on_message( ) return + if agent_type == "web": + with WebVisionAgent() as web_agent: + web_agent.act( + messages, + on_message=on_message, + ) + return + with VisionAgent() as agent: agent.act( messages, @@ -217,12 +235,14 @@ def run( ) ) try: - if self._run.assistant_id == HUMAN_AGENT_ID: + if self._run.assistant_id == HUMAN_DEMONSTRATION_AGENT.id: self._run_human_agent(event_queue) - elif self._run.assistant_id == ASKUI_VISION_AGENT_ID: + elif self._run.assistant_id == ASKUI_VISION_AGENT.id: self._run_askui_vision_agent(event_queue) - elif self._run.assistant_id == ASKUI_ANDROID_AGENT_ID: + elif self._run.assistant_id == ANDROID_VISION_AGENT.id: self._run_askui_android_agent(event_queue) + elif self._run.assistant_id == ASKUI_WEB_AGENT.id: + self._run_askui_web_agent(event_queue) updated_run = self._retrieve_run() if updated_run.status == "in_progress": updated_run.completed_at = datetime.now(tz=timezone.utc) From b717b29c01ec0be8bd23e1d3b1bf9d55b2438062 Mon Sep 17 00:00:00 2001 From: Adrian Stritzinger Date: Mon, 7 Jul 2025 12:24:45 +0200 Subject: [PATCH 2/5] chore: remove chat ui moved to and hosted on hub.askui.com --- README.md | 21 +- pyproject.toml | 2 - src/chat/ui/.gitignore | 33 - src/chat/ui/app/globals.css | 82 - src/chat/ui/app/layout.tsx | 31 - src/chat/ui/app/page.tsx | 33 - src/chat/ui/components.json | 20 - .../ui/components/chat/chat-container.tsx | 23 - src/chat/ui/components/chat/chat-header.tsx | 118 - src/chat/ui/components/chat/chat-input.tsx | 520 - src/chat/ui/components/chat/empty-state.tsx | 62 - .../components/chat/message-content-block.tsx | 126 - src/chat/ui/components/chat/message-item.tsx | 189 - src/chat/ui/components/chat/message-list.tsx | 157 - .../sidebar/rename-thread-dialog.tsx | 91 - src/chat/ui/components/sidebar/sidebar.tsx | 139 - .../components/sidebar/thread-item-menu.tsx | 106 - .../ui/components/sidebar/thread-list.tsx | 159 - src/chat/ui/components/ui/accordion.tsx | 58 - src/chat/ui/components/ui/alert-dialog.tsx | 141 - src/chat/ui/components/ui/alert.tsx | 59 - src/chat/ui/components/ui/aspect-ratio.tsx | 7 - src/chat/ui/components/ui/avatar.tsx | 50 - src/chat/ui/components/ui/badge.tsx | 36 - src/chat/ui/components/ui/breadcrumb.tsx | 115 - src/chat/ui/components/ui/button.tsx | 52 - src/chat/ui/components/ui/card.tsx | 86 - src/chat/ui/components/ui/carousel.tsx | 262 - src/chat/ui/components/ui/chart.tsx | 365 - src/chat/ui/components/ui/checkbox.tsx | 30 - src/chat/ui/components/ui/collapsible.tsx | 11 - src/chat/ui/components/ui/context-menu.tsx | 200 - src/chat/ui/components/ui/dialog.tsx | 122 - src/chat/ui/components/ui/dropdown-menu.tsx | 200 - src/chat/ui/components/ui/form.tsx | 179 - src/chat/ui/components/ui/hover-card.tsx | 29 - src/chat/ui/components/ui/input-otp.tsx | 71 - src/chat/ui/components/ui/input.tsx | 25 - src/chat/ui/components/ui/label.tsx | 26 - src/chat/ui/components/ui/menubar.tsx | 236 - src/chat/ui/components/ui/navigation-menu.tsx | 128 - src/chat/ui/components/ui/pagination.tsx | 117 - src/chat/ui/components/ui/popover.tsx | 31 - src/chat/ui/components/ui/progress.tsx | 28 - src/chat/ui/components/ui/radio-group.tsx | 44 - src/chat/ui/components/ui/resizable.tsx | 45 - src/chat/ui/components/ui/scroll-area.tsx | 48 - src/chat/ui/components/ui/select.tsx | 160 - src/chat/ui/components/ui/separator.tsx | 31 - src/chat/ui/components/ui/sheet.tsx | 140 - src/chat/ui/components/ui/skeleton.tsx | 15 - src/chat/ui/components/ui/slider.tsx | 28 - src/chat/ui/components/ui/sonner.tsx | 31 - src/chat/ui/components/ui/switch.tsx | 29 - src/chat/ui/components/ui/table.tsx | 117 - src/chat/ui/components/ui/tabs.tsx | 55 - src/chat/ui/components/ui/textarea.tsx | 24 - src/chat/ui/components/ui/toast.tsx | 129 - src/chat/ui/components/ui/toaster.tsx | 35 - src/chat/ui/components/ui/toggle-group.tsx | 61 - src/chat/ui/components/ui/toggle.tsx | 45 - src/chat/ui/components/ui/tooltip.tsx | 30 - src/chat/ui/hooks/use-toast.ts | 191 - src/chat/ui/lib/api.ts | 175 - src/chat/ui/lib/constants.ts | 16 - src/chat/ui/lib/store.ts | 34 - src/chat/ui/lib/types.ts | 167 - src/chat/ui/lib/utils.ts | 6 - src/chat/ui/next.config.js | 10 - src/chat/ui/package-lock.json | 8357 ----------------- src/chat/ui/package.json | 77 - src/chat/ui/postcss.config.js | 6 - src/chat/ui/tailwind.config.ts | 90 - src/chat/ui/tsconfig.json | 27 - 74 files changed, 8 insertions(+), 14791 deletions(-) delete mode 100644 src/chat/ui/.gitignore delete mode 100644 src/chat/ui/app/globals.css delete mode 100644 src/chat/ui/app/layout.tsx delete mode 100644 src/chat/ui/app/page.tsx delete mode 100644 src/chat/ui/components.json delete mode 100644 src/chat/ui/components/chat/chat-container.tsx delete mode 100644 src/chat/ui/components/chat/chat-header.tsx delete mode 100644 src/chat/ui/components/chat/chat-input.tsx delete mode 100644 src/chat/ui/components/chat/empty-state.tsx delete mode 100644 src/chat/ui/components/chat/message-content-block.tsx delete mode 100644 src/chat/ui/components/chat/message-item.tsx delete mode 100644 src/chat/ui/components/chat/message-list.tsx delete mode 100644 src/chat/ui/components/sidebar/rename-thread-dialog.tsx delete mode 100644 src/chat/ui/components/sidebar/sidebar.tsx delete mode 100644 src/chat/ui/components/sidebar/thread-item-menu.tsx delete mode 100644 src/chat/ui/components/sidebar/thread-list.tsx delete mode 100644 src/chat/ui/components/ui/accordion.tsx delete mode 100644 src/chat/ui/components/ui/alert-dialog.tsx delete mode 100644 src/chat/ui/components/ui/alert.tsx delete mode 100644 src/chat/ui/components/ui/aspect-ratio.tsx delete mode 100644 src/chat/ui/components/ui/avatar.tsx delete mode 100644 src/chat/ui/components/ui/badge.tsx delete mode 100644 src/chat/ui/components/ui/breadcrumb.tsx delete mode 100644 src/chat/ui/components/ui/button.tsx delete mode 100644 src/chat/ui/components/ui/card.tsx delete mode 100644 src/chat/ui/components/ui/carousel.tsx delete mode 100644 src/chat/ui/components/ui/chart.tsx delete mode 100644 src/chat/ui/components/ui/checkbox.tsx delete mode 100644 src/chat/ui/components/ui/collapsible.tsx delete mode 100644 src/chat/ui/components/ui/context-menu.tsx delete mode 100644 src/chat/ui/components/ui/dialog.tsx delete mode 100644 src/chat/ui/components/ui/dropdown-menu.tsx delete mode 100644 src/chat/ui/components/ui/form.tsx delete mode 100644 src/chat/ui/components/ui/hover-card.tsx delete mode 100644 src/chat/ui/components/ui/input-otp.tsx delete mode 100644 src/chat/ui/components/ui/input.tsx delete mode 100644 src/chat/ui/components/ui/label.tsx delete mode 100644 src/chat/ui/components/ui/menubar.tsx delete mode 100644 src/chat/ui/components/ui/navigation-menu.tsx delete mode 100644 src/chat/ui/components/ui/pagination.tsx delete mode 100644 src/chat/ui/components/ui/popover.tsx delete mode 100644 src/chat/ui/components/ui/progress.tsx delete mode 100644 src/chat/ui/components/ui/radio-group.tsx delete mode 100644 src/chat/ui/components/ui/resizable.tsx delete mode 100644 src/chat/ui/components/ui/scroll-area.tsx delete mode 100644 src/chat/ui/components/ui/select.tsx delete mode 100644 src/chat/ui/components/ui/separator.tsx delete mode 100644 src/chat/ui/components/ui/sheet.tsx delete mode 100644 src/chat/ui/components/ui/skeleton.tsx delete mode 100644 src/chat/ui/components/ui/slider.tsx delete mode 100644 src/chat/ui/components/ui/sonner.tsx delete mode 100644 src/chat/ui/components/ui/switch.tsx delete mode 100644 src/chat/ui/components/ui/table.tsx delete mode 100644 src/chat/ui/components/ui/tabs.tsx delete mode 100644 src/chat/ui/components/ui/textarea.tsx delete mode 100644 src/chat/ui/components/ui/toast.tsx delete mode 100644 src/chat/ui/components/ui/toaster.tsx delete mode 100644 src/chat/ui/components/ui/toggle-group.tsx delete mode 100644 src/chat/ui/components/ui/toggle.tsx delete mode 100644 src/chat/ui/components/ui/tooltip.tsx delete mode 100644 src/chat/ui/hooks/use-toast.ts delete mode 100644 src/chat/ui/lib/api.ts delete mode 100644 src/chat/ui/lib/constants.ts delete mode 100644 src/chat/ui/lib/store.ts delete mode 100644 src/chat/ui/lib/types.ts delete mode 100644 src/chat/ui/lib/utils.ts delete mode 100644 src/chat/ui/next.config.js delete mode 100644 src/chat/ui/package-lock.json delete mode 100644 src/chat/ui/package.json delete mode 100644 src/chat/ui/postcss.config.js delete mode 100644 src/chat/ui/tailwind.config.ts delete mode 100644 src/chat/ui/tsconfig.json diff --git a/README.md b/README.md index d356cd7e..bc5027a7 100644 --- a/README.md +++ b/README.md @@ -775,25 +775,28 @@ If you would like to disable the recording of usage data, set the `ASKUI__VA__TE ### AskUI Chat AskUI Chat is a web application that allows interacting with an AskUI Vision Agent similar how it can be -done with `VisionAgent.act()` but in a more interactive manner that involves less code. Aside from -telling the AskUI Vision Agent what to do, the user can also demonstrate what to do (currently, only +done with `VisionAgent.act()` or `AndroidVisionAgent.act()` but in a more interactive manner that involves less code. Aside from +telling the agent what to do, the user can also demonstrate what to do (currently, only clicking is supported). **⚠️ Warning:** AskUI Chat is currently in an experimental stage and has several limitations (see below). +#### Architecture + +This repository only includes the AskUI Chat API (`src/askui/chat`). The AskUI Chat UI can be accessed through the [AskUI Hub](https://hub.askui.com/) and connects to the local Chat API after it has been started. + #### Configuration To use the chat, configure the following environment variables: - `ASKUI_TOKEN`: AskUI Vision Agent behind chat uses currently the AskUI API - `ASKUI_WORKSPACE_ID`: AskUI Vision Agent behind chat uses currently the AskUI API -- `ASKUI__CHAT_API__DATA_DIR` (optional, defaults to `$(pwd)/chat`): Currently, the AskUI chat stores its data in a directory locally. You can change the default directory by setting this environment variable. +- `ASKUI__CHAT_API__DATA_DIR` (optional, defaults to `$(pwd)/chat`): Currently, the AskUI chat stores all data in a directory locally. You can change the default directory by setting this environment variable. #### Installation ```bash pdm install # is going to install the dependencies of the api -pdm run chat:ui:install # is going to install the dependencies of the ui ``` You may need to give permissions on the fast run of the Chat UI to demonstrate actions (aka record clicks). @@ -802,7 +805,6 @@ You may need to give permissions on the fast run of the Chat UI to demonstrate a ```bash pdm run chat:api # is going to start the api at port 8000 -pdm run chat:ui # is going to start the ui at port 3000 ``` You can use the chat to record a workflow and redo it later. For that, just tell the agent to redo all previous steps. @@ -815,7 +817,7 @@ You can use the chat to record a workflow and redo it later. For that, just tell #### Limitations - A lot of errors are not handled properly and we allow the user to do a lot of actions that can lead to errors instead of properly guiding the user. -- The chat currently only allows rerunning actions through `VisionAgent.act()` which can be expensive, slow and is not necessary the most reliable way to do it. +- The chat currently only allows rerunning actions through `VisionAgent.act()` (or `AndroidVisionAgent.act()` or `WebVisionAgent.act()`) which can be expensive, slow and is not necessary the most reliable way to do it. - A lot quirks in UI and API. - Currently, api and ui need to be run in dev mode. - When demonstrating actions, the corresponding screenshot may not reflect the correct state of the screen before the action. In this case, cancel demonstrating, delete messages and try again. @@ -824,10 +826,3 @@ You can use the chat to record a workflow and redo it later. For that, just tell - The agent is going to fail if there are no messages in the conversation, there is no tool use result message following the tool use message somewhere in the conversation, a message is too long etc. Just adding or deleting the message in this case should fix the issue. - You should not switch the conversation while waiting for an agent's answers or demonstrating actions. - - - -#### Architecture - -- The chat api/backend is a [FastAPI](https://fastapi.tiangolo.com/) application that provides a REST API similar to [OpenAI's Assistants API](https://platform.openai.com/docs/assistants/overview). -- The chat ui/frontend is a [Next.js](https://nextjs.org/) application that provides a web interface to the chat api. diff --git a/pyproject.toml b/pyproject.toml index bf9a71d6..f3e8008b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,8 +58,6 @@ lint = "ruff check src tests" typecheck = "mypy" "typecheck:all" = "mypy src tests" "chat:api" = "uvicorn chat.api.app:app --reload --port 8000" -"chat:ui:install" = {shell = "cd src/chat/ui && npm ci"} -"chat:ui" = {shell = "cd src/chat/ui && npm run dev"} "mcp:dev" = "mcp dev src/askui/mcp/__init__.py" [dependency-groups] diff --git a/src/chat/ui/.gitignore b/src/chat/ui/.gitignore deleted file mode 100644 index 5ab2f9bd..00000000 --- a/src/chat/ui/.gitignore +++ /dev/null @@ -1,33 +0,0 @@ -# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. - -# dependencies -/node_modules -/.pnp -.pnp.js -.yarn/install-state.gz - -# testing -/coverage - -# next.js -/.next/ -/out/ - -# production -/build - -# misc -.DS_Store -*.pem - -# debug -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# vercel -.vercel - -# typescript -*.tsbuildinfo -next-env.d.ts diff --git a/src/chat/ui/app/globals.css b/src/chat/ui/app/globals.css deleted file mode 100644 index 20b1c1db..00000000 --- a/src/chat/ui/app/globals.css +++ /dev/null @@ -1,82 +0,0 @@ -@tailwind base; -@tailwind components; -@tailwind utilities; - -:root { - --foreground-rgb: 0, 0, 0; - --background-start-rgb: 214, 219, 220; - --background-end-rgb: 255, 255, 255; -} - -@media (prefers-color-scheme: dark) { - :root { - --foreground-rgb: 255, 255, 255; - --background-start-rgb: 0, 0, 0; - --background-end-rgb: 0, 0, 0; - } -} - -@layer base { - :root { - --background: 0 0% 100%; - --foreground: 0 0% 3.9%; - --card: 0 0% 100%; - --card-foreground: 0 0% 3.9%; - --popover: 0 0% 100%; - --popover-foreground: 0 0% 3.9%; - --primary: 0 0% 9%; - --primary-foreground: 0 0% 98%; - --secondary: 0 0% 96.1%; - --secondary-foreground: 0 0% 9%; - --muted: 0 0% 96.1%; - --muted-foreground: 0 0% 45.1%; - --accent: 0 0% 96.1%; - --accent-foreground: 0 0% 9%; - --destructive: 0 84.2% 60.2%; - --destructive-foreground: 0 0% 98%; - --border: 0 0% 89.8%; - --input: 0 0% 89.8%; - --ring: 0 0% 3.9%; - --chart-1: 12 76% 61%; - --chart-2: 173 58% 39%; - --chart-3: 197 37% 24%; - --chart-4: 43 74% 66%; - --chart-5: 27 87% 67%; - --radius: 0.5rem; - } - .dark { - --background: 0 0% 3.9%; - --foreground: 0 0% 98%; - --card: 0 0% 3.9%; - --card-foreground: 0 0% 98%; - --popover: 0 0% 3.9%; - --popover-foreground: 0 0% 98%; - --primary: 0 0% 98%; - --primary-foreground: 0 0% 9%; - --secondary: 0 0% 14.9%; - --secondary-foreground: 0 0% 98%; - --muted: 0 0% 14.9%; - --muted-foreground: 0 0% 63.9%; - --accent: 0 0% 14.9%; - --accent-foreground: 0 0% 98%; - --destructive: 0 62.8% 30.6%; - --destructive-foreground: 0 0% 98%; - --border: 0 0% 14.9%; - --input: 0 0% 14.9%; - --ring: 0 0% 83.1%; - --chart-1: 220 70% 50%; - --chart-2: 160 60% 45%; - --chart-3: 30 80% 55%; - --chart-4: 280 65% 60%; - --chart-5: 340 75% 55%; - } -} - -@layer base { - * { - @apply border-border; - } - body { - @apply bg-background text-foreground; - } -} diff --git a/src/chat/ui/app/layout.tsx b/src/chat/ui/app/layout.tsx deleted file mode 100644 index 4ec63bdc..00000000 --- a/src/chat/ui/app/layout.tsx +++ /dev/null @@ -1,31 +0,0 @@ -import "./globals.css"; -import type { Metadata } from "next"; -import { Inter } from "next/font/google"; -import { ThemeProvider } from "next-themes"; - -const inter = Inter({ subsets: ["latin"] }); - -export const metadata: Metadata = { - title: "AskUI Chat", -}; - -export default function RootLayout({ - children, -}: { - children: React.ReactNode; -}) { - return ( - - - - {children} - - - - ); -} diff --git a/src/chat/ui/app/page.tsx b/src/chat/ui/app/page.tsx deleted file mode 100644 index 2f8a007a..00000000 --- a/src/chat/ui/app/page.tsx +++ /dev/null @@ -1,33 +0,0 @@ -"use client"; - -import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; -import { Toaster } from "sonner"; -import { Sidebar } from "@/components/sidebar/sidebar"; -import { ChatContainer } from "@/components/chat/chat-container"; - -const queryClient = new QueryClient({ - defaultOptions: { - queries: { - staleTime: 1000 * 60 * 5, // 5 minutes - retry: 1, - }, - }, -}); - -function ChatApp() { - return ( -
- - -
- ); -} - -export default function Home() { - return ( - - - - - ); -} diff --git a/src/chat/ui/components.json b/src/chat/ui/components.json deleted file mode 100644 index c5974621..00000000 --- a/src/chat/ui/components.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "$schema": "https://ui.shadcn.com/schema.json", - "style": "default", - "rsc": true, - "tsx": true, - "tailwind": { - "config": "tailwind.config.ts", - "css": "app/globals.css", - "baseColor": "neutral", - "cssVariables": true, - "prefix": "" - }, - "aliases": { - "components": "@/components", - "utils": "@/lib/utils", - "ui": "@/components/ui", - "lib": "@/lib", - "hooks": "@/hooks" - } -} diff --git a/src/chat/ui/components/chat/chat-container.tsx b/src/chat/ui/components/chat/chat-container.tsx deleted file mode 100644 index 221d5460..00000000 --- a/src/chat/ui/components/chat/chat-container.tsx +++ /dev/null @@ -1,23 +0,0 @@ -"use client"; - -import { useChatStore } from "@/lib/store"; -import { EmptyState } from "./empty-state"; -import { ChatHeader } from "./chat-header"; -import { MessageList } from "./message-list"; -import { ChatInput } from "./chat-input"; - -export function ChatContainer() { - const { selectedThread } = useChatStore(); - - if (!selectedThread) { - return ; - } - - return ( -
- - - -
- ); -} diff --git a/src/chat/ui/components/chat/chat-header.tsx b/src/chat/ui/components/chat/chat-header.tsx deleted file mode 100644 index 16da6302..00000000 --- a/src/chat/ui/components/chat/chat-header.tsx +++ /dev/null @@ -1,118 +0,0 @@ -"use client"; - -import { Bot, Zap } from "lucide-react"; -import { useQuery } from "@tanstack/react-query"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, -} from "@/components/ui/select"; -import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; -import { Badge } from "@/components/ui/badge"; -import { Skeleton } from "@/components/ui/skeleton"; -import { useChatStore } from "@/lib/store"; -import { apiClient } from "@/lib/api"; -import { HUMAN_DEMONSTRATION_AGENT_ID } from "@/lib/constants"; - -export function ChatHeader() { - const { selectedAssistant, setSelectedAssistant, currentRun } = - useChatStore(); - - const { data: assistantsListResponse, isLoading } = useQuery({ - queryKey: ["assistants"], - queryFn: () => - apiClient.listAssistants().then((response) => { - return { - ...response, - data: response.data.filter( - (a) => a.id !== HUMAN_DEMONSTRATION_AGENT_ID - ), - }; - }), - }); - - const handleAssistantChange = (assistantId: string) => { - const assistant = assistantsListResponse?.data.find( - (a) => a.id === assistantId - ); - if (assistant) { - setSelectedAssistant(assistant); - } - }; - - if (isLoading) { - return ( -
-
- - -
- -
- ); - } - - return ( -
-
- -
- - {currentRun && ( - - - {currentRun.status === "in_progress" - ? "Thinking..." - : currentRun.status} - - )} -
- ); -} diff --git a/src/chat/ui/components/chat/chat-input.tsx b/src/chat/ui/components/chat/chat-input.tsx deleted file mode 100644 index 32ef0ce6..00000000 --- a/src/chat/ui/components/chat/chat-input.tsx +++ /dev/null @@ -1,520 +0,0 @@ -"use client"; - -import { useState, useRef, useCallback } from "react"; -import { - Send, - Plus, - X, - Paperclip, - Square, - MousePointerClick, -} from "lucide-react"; -import { motion, AnimatePresence } from "framer-motion"; -import { useMutation, useQueryClient } from "@tanstack/react-query"; -import { toast } from "sonner"; -import { Button } from "@/components/ui/button"; -import { Textarea } from "@/components/ui/textarea"; -import { - Tooltip, - TooltipContent, - TooltipProvider, - TooltipTrigger, -} from "@/components/ui/tooltip"; -import { useChatStore } from "@/lib/store"; -import { apiClient } from "@/lib/api"; -import { Event } from "@/lib/types"; -import { HUMAN_DEMONSTRATION_AGENT_ID } from "@/lib/constants"; - -interface AttachedFile { - id: string; - file: File; - preview: string; - type: "image"; -} - -let buffer = ""; - -const SseSplitterStream = (): TransformStream => - new TransformStream({ - start() {}, - transform(chunk, controller) { - buffer += chunk; - const parts = buffer.split("\n\n"); - buffer = parts.pop()!; // Keep the last partial event in buffer - - for (const part of parts) { - controller.enqueue(part); - } - }, - flush(controller) {}, - }); - -function parseSseMessage(message: string): Event { - const lines = message.split("\n"); - let type = "message"; - const dataLines: string[] = []; - - for (const line of lines) { - if (line.startsWith("event:")) { - type = line.slice(6).trim(); - } else if (line.startsWith("data:")) { - dataLines.push(line.slice(5).trim()); - } - } - - if (dataLines.length === 0) { - throw new Error("No data field in SSE message"); - } - - const rawData = dataLines.join("\n"); - - try { - switch (type) { - case "thread.run.created": - case "thread.run.queued": - case "thread.run.in_progress": - case "thread.run.completed": - case "thread.run.cancelling": - case "thread.run.cancelled": - case "thread.run.failed": - case "thread.run.expired": - return { type, data: JSON.parse(rawData) }; - case "thread.message.created": - return { type, data: JSON.parse(rawData) }; - case "error": - return { type, data: JSON.parse(rawData) }; - case "done": - return { type, data: "[DONE]" }; - default: - throw new Error(`Unknown event type: ${type}`); - } - } catch (e) { - throw new Error( - `Failed to parse SSE data of event "${type}": ${ - e instanceof Error ? e.message : String(e) - }: ${rawData}` - ); - } -} - -export function ChatInput() { - const [message, setMessage] = useState(""); - const [attachedFiles, setAttachedFiles] = useState([]); - const [isDragOver, setIsDragOver] = useState(false); - const [runningAction, setRunningAction] = useState<"send" | "demo" | null>( - null - ); - const textareaRef = useRef(null); - const fileInputRef = useRef(null); - const queryClient = useQueryClient(); - - const { - selectedThread, - selectedAssistant, - currentRun, - setCurrentRun, - appendMessage, - clearMessages, - } = useChatStore(); - - const createMessageMutation = useMutation({ - mutationFn: async (data: { content: any; role: "user" }) => { - if (!selectedThread) throw new Error("No thread selected"); - return apiClient.createMessage(selectedThread.id, data); - }, - onSuccess: () => { - queryClient.invalidateQueries({ - queryKey: ["messages", selectedThread?.id], - }); - }, - onError: (error) => { - toast.error(`Failed to send message: ${error}`); - }, - }); - - const createRunMutation = useMutation({ - mutationFn: async (assistantId: string) => { - if (!selectedThread || !assistantId) { - throw new Error("Thread and assistant required"); - } - - clearMessages(); - const response = await fetch( - `${ - process.env.NEXT_PUBLIC_API_URL || "http://localhost:8000" - }/v1/threads/${selectedThread.id}/runs`, - { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - assistant_id: assistantId, - stream: true, - }), - } - ); - - if (!response.ok) { - throw new Error(`API Error: ${response.status} ${response.statusText}`); - } - - if (!response.body) { - throw new Error("No response body"); - } - - const reader = response.body - .pipeThrough(new TextDecoderStream()) - .pipeThrough(SseSplitterStream()) - .getReader(); - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - const event: Event = parseSseMessage(value); - switch (event.type) { - case "thread.run.created": - case "thread.run.queued": - case "thread.run.in_progress": - case "thread.run.completed": - case "thread.run.cancelling": - case "thread.run.cancelled": - case "thread.run.failed": - setCurrentRun(event.data); - break; - case "thread.run.expired": - setCurrentRun(event.data); - throw new Error("Run expired"); - case "thread.message.created": - appendMessage(event.data); - break; - case "error": - throw new Error(event.data.error.message); - case "done": - setCurrentRun(null); - break; - } - } - }, - onSuccess: () => { - queryClient.invalidateQueries({ - queryKey: ["messages", selectedThread?.id], - }); - setCurrentRun(null); - setRunningAction(null); - }, - onError: (error) => { - toast.error(`Run failed: ${error.message}`); - queryClient.invalidateQueries({ - queryKey: ["messages", selectedThread?.id], - }); - setCurrentRun(null); - setRunningAction(null); - }, - }); - - const handleFileSelect = (files: FileList | null) => { - if (!files) return; - - Array.from(files).forEach((file) => { - if (file.type.startsWith("image/")) { - const reader = new FileReader(); - reader.onload = (e) => { - const newFile: AttachedFile = { - id: Math.random().toString(36).substr(2, 9), - file, - preview: e.target?.result as string, - type: "image", - }; - setAttachedFiles((prev) => [...prev, newFile]); - }; - reader.readAsDataURL(file); - } else { - toast.error("Only image files are supported"); - } - }); - }; - - const handleSubmit = async (e: React.FormEvent) => { - e.preventDefault(); - - if (!selectedThread || !selectedAssistant) { - toast.error("Please select a thread and assistant"); - return; - } - - if (message.trim() || attachedFiles.length > 0) { - const content: any[] = []; - - if (message.trim()) { - content.push({ - type: "text", - text: message.trim(), - }); - } - - attachedFiles.forEach((file) => { - const base64Data = file.preview.split(",")[1]; - content.push({ - type: "image", - source: { - type: "base64", - media_type: file.file.type, - data: base64Data, - }, - }); - }); - - await createMessageMutation.mutateAsync({ - content: - content.length === 1 && content[0].type === "text" - ? content[0].text - : content, - role: "user", - }); - - setMessage(""); - setAttachedFiles([]); - } - - if (!selectedAssistant.id) { - toast.warning( - "Select an assistant and hit the send button again if you want to receive an answer" - ); - return; - } - - setRunningAction("send"); - await createRunMutation.mutateAsync(selectedAssistant.id); - }; - - const handleCancel = () => { - if (currentRun) { - // Cancel the run - apiClient - .cancelRun(currentRun.thread_id, currentRun.id) - .then(() => { - toast.success("Send request to cancel run"); - }) - .catch(() => { - toast.error("Failed to send request to cancel run"); - }); - } - }; - - const handleDemo = async () => { - setRunningAction("demo"); - await createRunMutation.mutateAsync(HUMAN_DEMONSTRATION_AGENT_ID); - }; - - const removeFile = (fileId: string) => { - setAttachedFiles((prev) => prev.filter((f) => f.id !== fileId)); - }; - - const handleDragOver = useCallback((e: React.DragEvent) => { - e.preventDefault(); - setIsDragOver(true); - }, []); - - const handleDragLeave = useCallback((e: React.DragEvent) => { - e.preventDefault(); - setIsDragOver(false); - }, []); - - const handleDrop = useCallback((e: React.DragEvent) => { - e.preventDefault(); - setIsDragOver(false); - handleFileSelect(e.dataTransfer.files); - }, []); - - const isLoading = - createMessageMutation.isPending || createRunMutation.isPending; - - return ( - -
-
- {/* File Attachments */} - - {attachedFiles.length > 0 && ( - - {attachedFiles.map((file) => ( -
- {file.file.name} - -
- ))} -
- )} -
- - {/* Input Area */} -
-