diff --git a/.vscode/launch.json b/.vscode/launch.json index 7c5ddf42..b1ad64d6 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -28,7 +28,7 @@ "preLaunchTask": "Create .env.tmp file", "postDebugTask": "Delete .env.tmp file", "module": "uvicorn", - "args": ["src.chat.api.app:app","--reload","--port","8000"], + "args": ["src.askui.chat.api.app:app","--reload","--port","9261"], "envFile": "${workspaceFolder}/.env.tmp", "env": { "ASKUI_WORKSPACES__LOG__FORMAT": "logfmt", diff --git a/README.md b/README.md index d356cd7e..8fcf51f6 100644 --- a/README.md +++ b/README.md @@ -775,25 +775,31 @@ If you would like to disable the recording of usage data, set the `ASKUI__VA__TE ### AskUI Chat AskUI Chat is a web application that allows interacting with an AskUI Vision Agent similar how it can be -done with `VisionAgent.act()` but in a more interactive manner that involves less code. Aside from -telling the AskUI Vision Agent what to do, the user can also demonstrate what to do (currently, only +done with `VisionAgent.act()` or `AndroidVisionAgent.act()` but in a more interactive manner that involves less code. Aside from +telling the agent what to do, the user can also demonstrate what to do (currently, only clicking is supported). **⚠️ Warning:** AskUI Chat is currently in an experimental stage and has several limitations (see below). +#### Architecture + +This repository only includes the AskUI Chat API (`src/askui/chat`). The AskUI Chat UI can be accessed through the [AskUI Hub](https://hub.askui.com/) and connects to the local Chat API after it has been started. + #### Configuration To use the chat, configure the following environment variables: - `ASKUI_TOKEN`: AskUI Vision Agent behind chat uses currently the AskUI API - `ASKUI_WORKSPACE_ID`: AskUI Vision Agent behind chat uses currently the AskUI API -- `ASKUI__CHAT_API__DATA_DIR` (optional, defaults to `$(pwd)/chat`): Currently, the AskUI chat stores its data in a directory locally. You can change the default directory by setting this environment variable. +- `ASKUI__CHAT_API__DATA_DIR` (optional, defaults to `$(pwd)/chat`): Currently, the AskUI chat stores all data in a directory locally. You can change the default directory by setting this environment variable. +- `ASKUI__CHAT_API__HOST` (optional, defaults to `127.0.0.1`): The host to bind the chat API to. +- `ASKUI__CHAT_API__PORT` (optional, defaults to `9261`): The port to bind the chat API to. +- `ASKUI__CHAT_API__LOG_LEVEL` (optional, defaults to `info`): The log level to use for the chat API. #### Installation ```bash -pdm install # is going to install the dependencies of the api -pdm run chat:ui:install # is going to install the dependencies of the ui +pip install askui[chat] ``` You may need to give permissions on the fast run of the Chat UI to demonstrate actions (aka record clicks). @@ -801,8 +807,7 @@ You may need to give permissions on the fast run of the Chat UI to demonstrate a #### Usage ```bash -pdm run chat:api # is going to start the api at port 8000 -pdm run chat:ui # is going to start the ui at port 3000 +python -m askui.chat ``` You can use the chat to record a workflow and redo it later. For that, just tell the agent to redo all previous steps. @@ -815,7 +820,7 @@ You can use the chat to record a workflow and redo it later. For that, just tell #### Limitations - A lot of errors are not handled properly and we allow the user to do a lot of actions that can lead to errors instead of properly guiding the user. -- The chat currently only allows rerunning actions through `VisionAgent.act()` which can be expensive, slow and is not necessary the most reliable way to do it. +- The chat currently only allows rerunning actions through `VisionAgent.act()` (or `AndroidVisionAgent.act()` or `WebVisionAgent.act()`) which can be expensive, slow and is not necessary the most reliable way to do it. - A lot quirks in UI and API. - Currently, api and ui need to be run in dev mode. - When demonstrating actions, the corresponding screenshot may not reflect the correct state of the screen before the action. In this case, cancel demonstrating, delete messages and try again. @@ -824,10 +829,3 @@ You can use the chat to record a workflow and redo it later. For that, just tell - The agent is going to fail if there are no messages in the conversation, there is no tool use result message following the tool use message somewhere in the conversation, a message is too long etc. Just adding or deleting the message in this case should fix the issue. - You should not switch the conversation while waiting for an agent's answers or demonstrating actions. - - - -#### Architecture - -- The chat api/backend is a [FastAPI](https://fastapi.tiangolo.com/) application that provides a REST API similar to [OpenAI's Assistants API](https://platform.openai.com/docs/assistants/overview). -- The chat ui/frontend is a [Next.js](https://nextjs.org/) application that provides a web interface to the chat api. diff --git a/pdm.lock b/pdm.lock index 86256aa0..88c11d9a 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "chat", "pynput", "test"] +groups = ["default", "android", "chat", "pynput", "test", "web"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:9d38242005523af1ed152c8b64eaf12264152a0ce121ca05efb3a21014c5798d" +content_hash = "sha256:cddcbf34746f07954e7be404593f3972cacc6c15aae3dcc3576937f46bf64a0a" [[metadata.targets]] requires_python = ">=3.10" @@ -151,7 +151,7 @@ name = "click" version = "8.1.8" requires_python = ">=3.7" summary = "Composable command line interface toolkit" -groups = ["default", "chat"] +groups = ["chat"] dependencies = [ "colorama; platform_system == \"Windows\"", "importlib-metadata; python_version < \"3.8\"", @@ -318,7 +318,7 @@ name = "evdev" version = "1.9.2" requires_python = ">=3.8" summary = "Bindings to the Linux input handling subsystem" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "\"linux\" in sys_platform" files = [ {file = "evdev-1.9.2.tar.gz", hash = "sha256:5d3278892ce1f92a74d6bf888cc8525d9f68af85dbe336c95d1c87fb8f423069"}, @@ -330,6 +330,7 @@ version = "1.2.2" requires_python = ">=3.7" summary = "Backport of PEP 654 (exception groups)" groups = ["default", "chat", "test"] +marker = "python_version < \"3.11\"" files = [ {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, @@ -362,27 +363,6 @@ files = [ {file = "fastapi-0.115.12.tar.gz", hash = "sha256:1e2c2a2646905f9e83d32f04a3f86aff4a286669c6c950ca95b5fd68c2602681"}, ] -[[package]] -name = "fastmcp" -version = "2.3.4" -requires_python = ">=3.10" -summary = "The fast, Pythonic way to build MCP servers." -groups = ["default"] -dependencies = [ - "exceptiongroup>=1.2.2", - "httpx>=0.28.1", - "mcp<2.0.0,>=1.8.1", - "openapi-pydantic>=0.5.1", - "python-dotenv>=1.1.0", - "rich>=13.9.4", - "typer>=0.15.2", - "websockets>=14.0", -] -files = [ - {file = "fastmcp-2.3.4-py3-none-any.whl", hash = "sha256:12a45f72dd95aeaa1a6a56281fff96ca46929def3ccd9f9eb125cb97b722fbab"}, - {file = "fastmcp-2.3.4.tar.gz", hash = "sha256:f3fe004b8735b365a65ec2547eeb47db8352d5613697254854bc7c9c3c360eea"}, -] - [[package]] name = "filelock" version = "3.18.0" @@ -424,6 +404,59 @@ files = [ {file = "gradio_client-1.8.0.tar.gz", hash = "sha256:a58c520c73fa7ff8bef54e41b19df2cd9071fd9d0cc00475eb397842baed19c8"}, ] +[[package]] +name = "greenlet" +version = "3.2.3" +requires_python = ">=3.9" +summary = "Lightweight in-process concurrent programming" +groups = ["chat", "test", "web"] +files = [ + {file = "greenlet-3.2.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:1afd685acd5597349ee6d7a88a8bec83ce13c106ac78c196ee9dde7c04fe87be"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:761917cac215c61e9dc7324b2606107b3b292a8349bdebb31503ab4de3f559ac"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a433dbc54e4a37e4fff90ef34f25a8c00aed99b06856f0119dcf09fbafa16392"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:72e77ed69312bab0434d7292316d5afd6896192ac4327d44f3d613ecb85b037c"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:68671180e3849b963649254a882cd544a3c75bfcd2c527346ad8bb53494444db"}, + {file = "greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:49c8cfb18fb419b3d08e011228ef8a25882397f3a859b9fe1436946140b6756b"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:efc6dc8a792243c31f2f5674b670b3a95d46fa1c6a912b8e310d6f542e7b0712"}, + {file = "greenlet-3.2.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:731e154aba8e757aedd0781d4b240f1225b075b4409f1bb83b05ff410582cf00"}, + {file = "greenlet-3.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:96c20252c2f792defe9a115d3287e14811036d51e78b3aaddbee23b69b216302"}, + {file = "greenlet-3.2.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:784ae58bba89fa1fa5733d170d42486580cab9decda3484779f4759345b29822"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0921ac4ea42a5315d3446120ad48f90c3a6b9bb93dd9b3cf4e4d84a66e42de83"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:d2971d93bb99e05f8c2c0c2f4aa9484a18d98c4c3bd3c62b65b7e6ae33dfcfaf"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c667c0bf9d406b77a15c924ef3285e1e05250948001220368e039b6aa5b5034b"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:592c12fb1165be74592f5de0d70f82bc5ba552ac44800d632214b76089945147"}, + {file = "greenlet-3.2.3-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:29e184536ba333003540790ba29829ac14bb645514fbd7e32af331e8202a62a5"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:93c0bb79844a367782ec4f429d07589417052e621aa39a5ac1fb99c5aa308edc"}, + {file = "greenlet-3.2.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:751261fc5ad7b6705f5f76726567375bb2104a059454e0226e1eef6c756748ba"}, + {file = "greenlet-3.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:83a8761c75312361aa2b5b903b79da97f13f556164a7dd2d5448655425bd4c34"}, + {file = "greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688"}, + {file = "greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c"}, + {file = "greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163"}, + {file = "greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849"}, + {file = "greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb"}, + {file = "greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0"}, + {file = "greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36"}, + {file = "greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3"}, + {file = "greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892"}, + {file = "greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141"}, + {file = "greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a"}, + {file = "greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365"}, +] + [[package]] name = "grpc-stubs" version = "1.53.0.6" @@ -589,17 +622,6 @@ files = [ {file = "httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc"}, ] -[[package]] -name = "httpx-sse" -version = "0.4.0" -requires_python = ">=3.8" -summary = "Consume Server-Sent Event (SSE) messages with HTTPX." -groups = ["default"] -files = [ - {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"}, - {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"}, -] - [[package]] name = "huggingface-hub" version = "0.30.1" @@ -791,28 +813,6 @@ files = [ {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, ] -[[package]] -name = "mcp" -version = "1.9.4" -requires_python = ">=3.10" -summary = "Model Context Protocol SDK" -groups = ["default"] -dependencies = [ - "anyio>=4.5", - "httpx-sse>=0.4", - "httpx>=0.27", - "pydantic-settings>=2.5.2", - "pydantic<3.0.0,>=2.7.2", - "python-multipart>=0.0.9", - "sse-starlette>=1.6.1", - "starlette>=0.27", - "uvicorn>=0.23.1; sys_platform != \"emscripten\"", -] -files = [ - {file = "mcp-1.9.4-py3-none-any.whl", hash = "sha256:7fcf36b62936adb8e63f89346bccca1268eeca9bf6dfb562ee10b1dfbda9dac0"}, - {file = "mcp-1.9.4.tar.gz", hash = "sha256:cfb0bcd1a9535b42edaef89947b9e18a8feb49362e1cc059d6e7fc636f2cb09f"}, -] - [[package]] name = "mdurl" version = "0.1.2" @@ -829,7 +829,7 @@ name = "mss" version = "10.0.0" requires_python = ">=3.9" summary = "An ultra fast cross-platform multiple screenshots module in pure python using ctypes." -groups = ["pynput"] +groups = ["chat", "pynput"] files = [ {file = "mss-10.0.0-py3-none-any.whl", hash = "sha256:82cf6460a53d09e79b7b6d871163c982e6c7e9649c426e7b7591b74956d5cb64"}, {file = "mss-10.0.0.tar.gz", hash = "sha256:d903e0d51262bf0f8782841cf16eaa6d7e3e1f12eae35ab41c2e318837c6637f"}, @@ -907,20 +907,6 @@ files = [ {file = "openai-1.85.0.tar.gz", hash = "sha256:6ba76e4ebc5725f71f2f6126c7cb5169ca8de60dd5aa61f350f9448ad162c913"}, ] -[[package]] -name = "openapi-pydantic" -version = "0.5.1" -requires_python = "<4.0,>=3.8" -summary = "Pydantic OpenAPI schema implementation" -groups = ["default"] -dependencies = [ - "pydantic>=1.8", -] -files = [ - {file = "openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146"}, - {file = "openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d"}, -] - [[package]] name = "packaging" version = "24.2" @@ -1001,6 +987,27 @@ files = [ {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"}, ] +[[package]] +name = "playwright" +version = "1.53.0" +requires_python = ">=3.9" +summary = "A high-level API to automate web browsers" +groups = ["chat", "test", "web"] +dependencies = [ + "greenlet<4.0.0,>=3.1.1", + "pyee<14,>=13", +] +files = [ + {file = "playwright-1.53.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:48a1a15ce810f0ffe512b6050de9871ea193b41dd3cc1bbed87b8431012419ba"}, + {file = "playwright-1.53.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a701f9498a5b87e3f929ec01cea3109fbde75821b19c7ba4bba54f6127b94f76"}, + {file = "playwright-1.53.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:f765498341c4037b4c01e742ae32dd335622f249488ccd77ca32d301d7c82c61"}, + {file = "playwright-1.53.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:db19cb5b58f3b15cad3e2419f4910c053e889202fc202461ee183f1530d1db60"}, + {file = "playwright-1.53.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9276c9c935fc062f51f4f5107e56420afd6d9a524348dc437793dc2e34c742e3"}, + {file = "playwright-1.53.0-py3-none-win32.whl", hash = "sha256:36eedec101724ff5a000cddab87dd9a72a39f9b3e65a687169c465484e667c06"}, + {file = "playwright-1.53.0-py3-none-win_amd64.whl", hash = "sha256:d68975807a0fd997433537f1dcf2893cda95884a39dc23c6f591b8d5f691e9e8"}, + {file = "playwright-1.53.0-py3-none-win_arm64.whl", hash = "sha256:fcfd481f76568d7b011571160e801b47034edd9e2383c43d83a5fb3f35c67885"}, +] + [[package]] name = "pluggy" version = "1.5.0" @@ -1032,7 +1039,7 @@ files = [ name = "pure-python-adb" version = "0.3.0.dev0" summary = "Pure python implementation of the adb client" -groups = ["default"] +groups = ["android", "chat"] files = [ {file = "pure-python-adb-0.3.0.dev0.tar.gz", hash = "sha256:0ecc89d780160cfe03260ba26df2c471a05263b2cad0318363573ee8043fb94d"}, ] @@ -1172,6 +1179,20 @@ files = [ {file = "pydantic_settings-2.9.1.tar.gz", hash = "sha256:c509bf79d27563add44e8446233359004ed85066cd096d8b510f715e6ef5d268"}, ] +[[package]] +name = "pyee" +version = "13.0.0" +requires_python = ">=3.8" +summary = "A rough port of Node.js's EventEmitter to Python with a few tricks of its own" +groups = ["chat", "test", "web"] +dependencies = [ + "typing-extensions", +] +files = [ + {file = "pyee-13.0.0-py3-none-any.whl", hash = "sha256:48195a3cddb3b1515ce0695ed76036b5ccc2ef3a9f963ff9f77aec0139845498"}, + {file = "pyee-13.0.0.tar.gz", hash = "sha256:b391e3c5a434d1f5118a25615001dbc8f669cf410ab67d04c4d4e07c55481c37"}, +] + [[package]] name = "pygments" version = "2.19.1" @@ -1198,7 +1219,7 @@ files = [ name = "pynput" version = "1.8.1" summary = "Monitor and control user input devices" -groups = ["pynput"] +groups = ["chat", "pynput"] dependencies = [ "enum34; python_version == \"2.7\"", "evdev>=1.3; \"linux\" in sys_platform", @@ -1217,7 +1238,7 @@ name = "pyobjc-core" version = "11.0" requires_python = ">=3.8" summary = "Python<->ObjC Interoperability Module" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "sys_platform == \"darwin\"" files = [ {file = "pyobjc_core-11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:10866b3a734d47caf48e456eea0d4815c2c9b21856157db5917b61dee06893a1"}, @@ -1233,7 +1254,7 @@ name = "pyobjc-framework-applicationservices" version = "11.0" requires_python = ">=3.9" summary = "Wrappers for the framework ApplicationServices on macOS" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "sys_platform == \"darwin\"" dependencies = [ "pyobjc-core>=11.0", @@ -1255,7 +1276,7 @@ name = "pyobjc-framework-cocoa" version = "11.0" requires_python = ">=3.9" summary = "Wrappers for the Cocoa frameworks on macOS" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "sys_platform == \"darwin\"" dependencies = [ "pyobjc-core>=11.0", @@ -1274,7 +1295,7 @@ name = "pyobjc-framework-coretext" version = "11.0" requires_python = ">=3.9" summary = "Wrappers for the framework CoreText on macOS" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "sys_platform == \"darwin\"" dependencies = [ "pyobjc-core>=11.0", @@ -1295,7 +1316,7 @@ name = "pyobjc-framework-quartz" version = "11.0" requires_python = ">=3.9" summary = "Wrappers for the Quartz frameworks on macOS" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "sys_platform == \"darwin\"" dependencies = [ "pyobjc-core>=11.0", @@ -1421,22 +1442,11 @@ files = [ {file = "python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5"}, ] -[[package]] -name = "python-multipart" -version = "0.0.20" -requires_python = ">=3.8" -summary = "A streaming multipart parser for Python" -groups = ["default"] -files = [ - {file = "python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104"}, - {file = "python_multipart-0.0.20.tar.gz", hash = "sha256:8dd0cab45b8e23064ae09147625994d090fa46f5b0d1e13af944c331a7fa9d13"}, -] - [[package]] name = "python-xlib" version = "0.33" summary = "Python X Library" -groups = ["pynput"] +groups = ["chat", "pynput"] marker = "\"linux\" in sys_platform" dependencies = [ "six>=1.10.0", @@ -1580,23 +1590,12 @@ files = [ {file = "setuptools-78.1.0.tar.gz", hash = "sha256:18fd474d4a82a5f83dac888df697af65afa82dec7323d09c3e37d1f14288da54"}, ] -[[package]] -name = "shellingham" -version = "1.5.4" -requires_python = ">=3.7" -summary = "Tool to Detect Surrounding Shell" -groups = ["default"] -files = [ - {file = "shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686"}, - {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, -] - [[package]] name = "six" version = "1.17.0" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" summary = "Python 2 and 3 compatibility utilities" -groups = ["default", "pynput"] +groups = ["default", "chat", "pynput"] files = [ {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, @@ -1613,27 +1612,12 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] -[[package]] -name = "sse-starlette" -version = "2.3.5" -requires_python = ">=3.9" -summary = "SSE plugin for Starlette" -groups = ["default"] -dependencies = [ - "anyio>=4.7.0", - "starlette>=0.41.3", -] -files = [ - {file = "sse_starlette-2.3.5-py3-none-any.whl", hash = "sha256:251708539a335570f10eaaa21d1848a10c42ee6dc3a9cf37ef42266cdb1c52a8"}, - {file = "sse_starlette-2.3.5.tar.gz", hash = "sha256:228357b6e42dcc73a427990e2b4a03c023e2495ecee82e14f07ba15077e334b2"}, -] - [[package]] name = "starlette" version = "0.46.2" requires_python = ">=3.9" summary = "The little ASGI library that shines." -groups = ["default", "chat"] +groups = ["chat"] dependencies = [ "anyio<5,>=3.6.2", "typing-extensions>=3.10.0; python_version < \"3.10\"", @@ -1710,23 +1694,6 @@ files = [ {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, ] -[[package]] -name = "typer" -version = "0.15.4" -requires_python = ">=3.7" -summary = "Typer, build great CLIs. Easy to code. Based on Python type hints." -groups = ["default"] -dependencies = [ - "click<8.2,>=8.0.0", - "rich>=10.11.0", - "shellingham>=1.3.0", - "typing-extensions>=3.7.4.3", -] -files = [ - {file = "typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173"}, - {file = "typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3"}, -] - [[package]] name = "types-pillow" version = "10.2.0.20240822" @@ -1801,7 +1768,7 @@ name = "typing-extensions" version = "4.13.1" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" -groups = ["default", "chat", "test"] +groups = ["default", "chat", "test", "web"] files = [ {file = "typing_extensions-4.13.1-py3-none-any.whl", hash = "sha256:4b6cf02909eb5495cfbc3f6e8fd49217e6cc7944e145cdda8caa3734777f9e69"}, {file = "typing_extensions-4.13.1.tar.gz", hash = "sha256:98795af00fb9640edec5b8e31fc647597b4691f099ad75f469a2616be1a76dff"}, @@ -1837,7 +1804,7 @@ name = "uvicorn" version = "0.34.3" requires_python = ">=3.9" summary = "The lightning-fast ASGI server." -groups = ["default", "chat"] +groups = ["chat"] dependencies = [ "click>=7.0", "h11>=0.8", diff --git a/pyproject.toml b/pyproject.toml index 9fe2e7f8..9b1b5663 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,25 +5,23 @@ authors = [ {name = "askui GmbH", email = "info@askui.com"}, ] dependencies = [ - "grpcio>=1.67.0", - "grpcio-tools>=1.67.0", - "pillow>=11.0.0", - "pydantic>=2.11.0", "anthropic>=0.54.0", - "rich>=13.9.4", - "pyperclip>=1.9.0", "gradio-client>=1.4.3", - "requests>=2.32.3", + "grpcio-tools>=1.67.0", + "grpcio>=1.67.0", + "httpx>=0.28.1", "Jinja2>=3.1.4", - "tenacity>=9.1.2", + "openai>=1.61.1", + "pillow>=11.0.0", + "py-machineid>=0.7.0", "pydantic-settings>=2.9.1", + "pydantic>=2.11.0", + "pyperclip>=1.9.0", "python-dateutil>=2.9.0.post0", - "openai>=1.61.1", + "requests>=2.32.3", + "rich>=13.9.4", "segment-analytics-python>=2.3.4", - "py-machineid>=0.7.0", - "httpx>=0.28.1", - "fastmcp>=2.3.4", - "pure-python-adb>=0.3.0.dev0", + "tenacity>=9.1.2", ] requires-python = ">=3.10" readme = "README.md" @@ -56,20 +54,10 @@ lint = "ruff check src tests" "lint:fix" = "ruff check --fix src tests" typecheck = "mypy" "typecheck:all" = "mypy src tests" -"chat:api" = "uvicorn chat.api.app:app --reload --port 8000" -"chat:ui:install" = {shell = "cd src/chat/ui && npm ci"} -"chat:ui" = {shell = "cd src/chat/ui && npm run dev"} +"chat:api" = "uvicorn askui.chat.api.app:app --reload --port 9261" "mcp:dev" = "mcp dev src/askui/mcp/__init__.py" [dependency-groups] -chat = [ - "fastapi>=0.115.12", - "uvicorn>=0.34.3", -] -pynput = [ - "mss>=10.0.0", - "pynput>=1.8.1", -] test = [ "pytest>=8.3.4", "ruff>=0.9.5", @@ -85,6 +73,7 @@ test = [ "types-pyperclip>=1.8.2.20240311", "pytest-timeout>=2.4.0", "types-pynput>=1.8.1.20250318", + "playwright>=1.41.0", ] @@ -111,7 +100,6 @@ warn_unreachable = true strict_optional = true plugins = ["pydantic.mypy"] exclude = [ - "src/askui/chat/.*", "src/askui/models/ui_tars_ep/ui_tars_api.py", "src/askui/tools/anthropic/computer.py", "src/askui/tools/askui/askui_ui_controller_grpc/.*", @@ -195,6 +183,7 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.per-file-ignores] "src/askui/agent.py" = ["E501"] "src/askui/android_agent.py" = ["E501"] +"src/askui/web_agent.py" = ["E501"] "src/askui/models/shared/android_agent.py" = ["E501"] "src/askui/chat/*" = ["E501", "F401", "F403"] "src/askui/tools/askui/askui_workspaces/*" = ["ALL"] @@ -225,3 +214,24 @@ multiline-quotes = "double" [tool.ruff.lint.isort] known-first-party = ["askui"] known-third-party = ["pytest", "mypy"] + +[project.optional-dependencies] +all = ["askui[android,chat,mcp,pynput,web]"] +android = [ + "pure-python-adb>=0.3.0.dev0" +] +chat = [ + "askui[android,pynput,web]", + "fastapi>=0.115.12", + "uvicorn>=0.34.3", +] +mcp = [ + "fastmcp>=2.3.4", +] +pynput = [ + "mss>=10.0.0", + "pynput>=1.8.1", +] +web = [ + "playwright>=1.41.0", +] diff --git a/src/askui/agent.py b/src/askui/agent.py index 582b2c3d..30dbce32 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -28,136 +28,12 @@ from .tools import AgentToolbox, ModifierKey, PcKey from .tools.askui import AskUiControllerClient -_PC_KEY = [ - "backspace", - "delete", - "enter", - "tab", - "escape", - "up", - "down", - "right", - "left", - "home", - "end", - "pageup", - "pagedown", - "f1", - "f2", - "f3", - "f4", - "f5", - "f6", - "f7", - "f8", - "f9", - "f10", - "f11", - "f12", - "space", - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", - "a", - "b", - "c", - "d", - "e", - "f", - "g", - "h", - "i", - "j", - "k", - "l", - "m", - "n", - "o", - "p", - "q", - "r", - "s", - "t", - "u", - "v", - "w", - "x", - "y", - "z", - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "H", - "I", - "J", - "K", - "L", - "M", - "N", - "O", - "P", - "Q", - "R", - "S", - "T", - "U", - "V", - "W", - "X", - "Y", - "Z", - "!", - '"', - "#", - "$", - "%", - "&", - "'", - "(", - ")", - "*", - "+", - ",", - "-", - ".", - "/", - ":", - ";", - "<", - "=", - ">", - "?", - "@", - "[", - "\\", - "]", - "^", - "_", - "`", - "{", - "|", - "}", - "~", -] - _SYSTEM_PROMPT = f""" * You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access. * When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems. * When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available. -* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* Valid keyboard keys available are {", ".join(_PC_KEY)} -* The current date is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y").replace(" 0", " ")}. +* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. @@ -211,7 +87,7 @@ class VisionAgent(AgentBase): ``` """ - @telemetry.record_call(exclude={"model_router", "reporters", "tools"}) + @telemetry.record_call(exclude={"model_router", "reporters", "tools", "act_tools"}) @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) def __init__( self, @@ -222,6 +98,7 @@ def __init__( model: ModelChoice | ModelComposition | str | None = None, retry: Retry | None = None, models: ModelRegistry | None = None, + act_tools: list[Tool] | None = None, ) -> None: reporter = CompositeReporter(reporters=reporters) self.tools = tools or AgentToolbox( @@ -238,7 +115,8 @@ def __init__( models=models, tools=[ ExceptionTool(), - ], + ] + + (act_tools or []), agent_os=self.tools.os, ) diff --git a/src/chat/__init__.py b/src/askui/chat/__init__.py similarity index 100% rename from src/chat/__init__.py rename to src/askui/chat/__init__.py diff --git a/src/askui/chat/__main__.py b/src/askui/chat/__main__.py new file mode 100644 index 00000000..c03101f7 --- /dev/null +++ b/src/askui/chat/__main__.py @@ -0,0 +1,15 @@ +import uvicorn + +from askui.chat.api.app import app +from askui.chat.api.dependencies import get_settings + +if __name__ == "__main__": + settings = get_settings() + uvicorn.run( + app, + host=settings.host, + port=settings.port, + log_level=settings.log_level, + reload=False, + workers=1, + ) diff --git a/src/chat/api/__init__.py b/src/askui/chat/api/__init__.py similarity index 100% rename from src/chat/api/__init__.py rename to src/askui/chat/api/__init__.py diff --git a/src/chat/api/app.py b/src/askui/chat/api/app.py similarity index 67% rename from src/chat/api/app.py rename to src/askui/chat/api/app.py index 4387fbb0..ebc4f826 100644 --- a/src/chat/api/app.py +++ b/src/askui/chat/api/app.py @@ -4,13 +4,13 @@ from fastapi import APIRouter, FastAPI from fastapi.middleware.cors import CORSMiddleware -from chat.api.assistants.dependencies import get_assistant_service -from chat.api.assistants.router import router as assistants_router -from chat.api.dependencies import get_settings -from chat.api.health.router import router as health_router -from chat.api.messages.router import router as messages_router -from chat.api.runs.router import router as runs_router -from chat.api.threads.router import router as threads_router +from askui.chat.api.assistants.dependencies import get_assistant_service +from askui.chat.api.assistants.router import router as assistants_router +from askui.chat.api.dependencies import get_settings +from askui.chat.api.health.router import router as health_router +from askui.chat.api.messages.router import router as messages_router +from askui.chat.api.runs.router import router as runs_router +from askui.chat.api.threads.router import router as threads_router @asynccontextmanager diff --git a/src/chat/api/assistants/__init__.py b/src/askui/chat/api/assistants/__init__.py similarity index 100% rename from src/chat/api/assistants/__init__.py rename to src/askui/chat/api/assistants/__init__.py diff --git a/src/chat/api/assistants/dependencies.py b/src/askui/chat/api/assistants/dependencies.py similarity index 61% rename from src/chat/api/assistants/dependencies.py rename to src/askui/chat/api/assistants/dependencies.py index 014f21a5..d0d99dfb 100644 --- a/src/chat/api/assistants/dependencies.py +++ b/src/askui/chat/api/assistants/dependencies.py @@ -1,8 +1,8 @@ from fastapi import Depends -from chat.api.assistants.service import AssistantService -from chat.api.dependencies import SettingsDep -from chat.api.settings import Settings +from askui.chat.api.assistants.service import AssistantService +from askui.chat.api.dependencies import SettingsDep +from askui.chat.api.settings import Settings def get_assistant_service(settings: Settings = SettingsDep) -> AssistantService: diff --git a/src/chat/api/assistants/models.py b/src/askui/chat/api/assistants/models.py similarity index 84% rename from src/chat/api/assistants/models.py rename to src/askui/chat/api/assistants/models.py index fcfb9d6d..af9645c8 100644 --- a/src/chat/api/assistants/models.py +++ b/src/askui/chat/api/assistants/models.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, Field -from chat.api.models import UnixDatetime -from chat.api.utils import generate_time_ordered_id +from askui.chat.api.models import UnixDatetime +from askui.chat.api.utils import generate_time_ordered_id class Assistant(BaseModel): diff --git a/src/chat/api/assistants/router.py b/src/askui/chat/api/assistants/router.py similarity index 85% rename from src/chat/api/assistants/router.py rename to src/askui/chat/api/assistants/router.py index 37140e46..6b136c8d 100644 --- a/src/chat/api/assistants/router.py +++ b/src/askui/chat/api/assistants/router.py @@ -1,12 +1,12 @@ from fastapi import APIRouter, HTTPException # from fastapi import status -from chat.api.assistants.dependencies import AssistantServiceDep -from chat.api.assistants.models import Assistant -from chat.api.assistants.service import ( - AssistantService, # AssistantModifyRequest, CreateAssistantRequest, -) -from chat.api.models import ListQuery, ListQueryDep, ListResponse +from askui.chat.api.assistants.dependencies import AssistantServiceDep +from askui.chat.api.assistants.models import Assistant +from askui.chat.api.assistants.service import ( + AssistantService, +) # AssistantModifyRequest, CreateAssistantRequest, +from askui.chat.api.models import ListQuery, ListQueryDep, ListResponse router = APIRouter(prefix="/assistants", tags=["assistants"]) diff --git a/src/askui/chat/api/assistants/seeds.py b/src/askui/chat/api/assistants/seeds.py new file mode 100644 index 00000000..8e5b2db0 --- /dev/null +++ b/src/askui/chat/api/assistants/seeds.py @@ -0,0 +1,32 @@ +from askui.chat.api.assistants.models import Assistant + +ASKUI_VISION_AGENT = Assistant( + id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmca", + name="AskUI Vision Agent", + avatar="data:image/svg+xml;base64,PHN2ZyAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogIHdpZHRoPSIyNCIKICBoZWlnaHQ9IjI0IgogIHZpZXdCb3g9IjAgMCAyNCAyNCIKICBmaWxsPSJub25lIgogIHN0cm9rZT0iIzAwMCIgc3R5bGU9ImJhY2tncm91bmQtY29sb3I6ICNmZmY7IGJvcmRlci1yYWRpdXM6IDJweCIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNMTIgOFY0SDgiIC8+CiAgPHJlY3Qgd2lkdGg9IjE2IiBoZWlnaHQ9IjEyIiB4PSI0IiB5PSI4IiByeD0iMiIgLz4KICA8cGF0aCBkPSJNMiAxNGgyIiAvPgogIDxwYXRoIGQ9Ik0yMCAxNGgyIiAvPgogIDxwYXRoIGQ9Ik0xNSAxM3YyIiAvPgogIDxwYXRoIGQ9Ik05IDEzdjIiIC8+Cjwvc3ZnPgo=", +) + +HUMAN_DEMONSTRATION_AGENT = Assistant( + id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcb", + name="Human DemonstrationAgent", + avatar="data:image/svg+xml;base64,PHN2ZyAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogIHdpZHRoPSIyNCIKICBoZWlnaHQ9IjI0IgogIHZpZXdCb3g9IjAgMCAyNCAyNCIKICBmaWxsPSJub25lIgogIHN0cm9rZT0iIzAwMCIgc3R5bGU9ImJhY2tncm91bmQtY29sb3I6ICNmZmY7IGJvcmRlci1yYWRpdXM6IDJweCIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNMTkgMjF2LTJhNCA0IDAgMCAwLTQtNEg5YTQgNCAwIDAgMC00IDR2MiIgLz4KICA8Y2lyY2xlIGN4PSIxMiIgY3k9IjciIHI9IjQiIC8+Cjwvc3ZnPgo=", +) + +ANDROID_VISION_AGENT = Assistant( + id="asst_78da09fbf1ed43c7826fb1686f89f541", + name="AskUI Android Vision Agent", + avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciICB2aWV3Qm94PSIwIDAgNDggNDgiIHdpZHRoPSIyNXB4IiBoZWlnaHQ9IjI1cHgiPjxwYXRoIGQ9Ik0gMzIuNTE5NTMxIDAuOTgyNDIxODggQSAxLjUwMDE1IDEuNTAwMTUgMCAwIDAgMzEuMjc5Mjk3IDEuNjI4OTA2MiBMIDI5LjQzNzUgNC4yMDg5ODQ0IEMgMjcuNzgwMjA3IDMuNDQwNTAwNiAyNS45NDE5MSAzIDI0IDMgQyAyMi4wNTgwOSAzIDIwLjIxOTc5MyAzLjQ0MDUwMDYgMTguNTYyNSA0LjIwODk4NDQgTCAxNi43MjA3MDMgMS42Mjg5MDYyIEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDE1LjQzNTU0NyAwLjk4NDM3NSBBIDEuNTAwMTUgMS41MDAxNSAwIDAgMCAxNC4yNzkyOTcgMy4zNzEwOTM4IEwgMTYgNS43NzkyOTY5IEMgMTMuMTM4ODk2IDguMDI0NzU4MiAxMS4yNDUxODggMTEuNDM2MDIgMTEuMDM1MTU2IDE1LjI5MTAxNiBDIDEwLjU1MzI2IDE1LjExMjgxOCAxMC4wNDA0MDggMTUgOS41IDE1IEMgNy4wMzI0OTkxIDE1IDUgMTcuMDMyNDk5IDUgMTkuNSBMIDUgMzAuNSBDIDUgMzIuOTY3NTAxIDcuMDMyNDk5MSAzNSA5LjUgMzUgQyAxMC4wOTAzMTMgMzUgMTAuNjUzMjI5IDM0Ljg3ODc0OSAxMS4xNzE4NzUgMzQuNjY3OTY5IEMgMTEuNTY0MzM2IDM2LjA3MjEwNSAxMi42MzEzMzMgMzcuMTk2OTk0IDE0IDM3LjY5MzM1OSBMIDE0IDQxLjUgQyAxNCA0My45Njc1MDEgMTYuMDMyNDk5IDQ2IDE4LjUgNDYgQyAyMC45Njc1MDEgNDYgMjMgNDMuOTY3NTAxIDIzIDQxLjUgTCAyMyAzOCBMIDI1IDM4IEwgMjUgNDEuNSBDIDI1IDQzLjk2NzUwMSAyNy4wMzI0OTkgNDYgMjkuNSA0NiBDIDMxLjk2NzUwMSA0NiAzNCA0My45Njc1MDEgMzQgNDEuNSBMIDM0IDM3LjY5MzM1OSBDIDM1LjM2ODY2NyAzNy4xOTY5OTQgMzYuNDM1NjY0IDM2LjA3MjEwNSAzNi44MjgxMjUgMzQuNjY3OTY5IEMgMzcuMzQ2NzcxIDM0Ljg3ODc0OSAzNy45MDk2ODcgMzUgMzguNSAzNSBDIDQwLjk2NzUwMSAzNSA0MyAzMi45Njc1MDEgNDMgMzAuNSBMIDQzIDE5LjUgQyA0MyAxNy4wMzI0OTkgNDAuOTY3NTAxIDE1IDM4LjUgMTUgQyAzNy45NTk1OTIgMTUgMzcuNDQ2NzQgMTUuMTEyODE4IDM2Ljk2NDg0NCAxNS4yOTEwMTYgQyAzNi43NTQ4MTIgMTEuNDM2MDIgMzQuODYxMTA0IDguMDI0NzU4MiAzMiA1Ljc3OTI5NjkgTCAzMy43MjA3MDMgMy4zNzEwOTM4IEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDMyLjUxOTUzMSAwLjk4MjQyMTg4IHogTSAyNCA2IEMgMjkuMTg1MTI3IDYgMzMuMjc2NzI3IDkuOTU3NTEzMiAzMy43OTg4MjggMTUgTCAxNC4yMDExNzIgMTUgQyAxNC43MjMyNzMgOS45NTc1MTMyIDE4LjgxNDg3MyA2IDI0IDYgeiBNIDE5LjUgMTAgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTMgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTAgeiBNIDI4LjUgMTAgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTMgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTAgeiBNIDkuNSAxOCBDIDEwLjM0NjQ5OSAxOCAxMSAxOC42NTM1MDEgMTEgMTkuNSBMIDExIDMwLjUgQyAxMSAzMS4zNDY0OTkgMTAuMzQ2NDk5IDMyIDkuNSAzMiBDIDguNjUzNTAwOSAzMiA4IDMxLjM0NjQ5OSA4IDMwLjUgTCA4IDE5LjUgQyA4IDE4LjY1MzUwMSA4LjY1MzUwMDkgMTggOS41IDE4IHogTSAxNCAxOCBMIDM0IDE4IEwgMzQgMTkuNSBMIDM0IDMwLjUgTCAzNCAzMy41IEMgMzQgMzQuMzQ2NDk5IDMzLjM0NjQ5OSAzNSAzMi41IDM1IEwgMjUgMzUgTCAyMyAzNSBMIDE1LjUgMzUgQyAxNC42NTM1MDEgMzUgMTQgMzQuMzQ2NDk5IDE0IDMzLjUgTCAxNCAzMC41IEwgMTQgMTkuNSBMIDE0IDE4IHogTSAzOC41IDE4IEMgMzkuMzQ2NDk5IDE4IDQwIDE4LjY1MzUwMSA0MCAxOS41IEwgNDAgMzAuNSBDIDQwIDMxLjM0NjQ5OSAzOS4zNDY0OTkgMzIgMzguNSAzMiBDIDM3LjY1MzUwMSAzMiAzNyAzMS4zNDY0OTkgMzcgMzAuNSBMIDM3IDE5LjUgQyAzNyAxOC42NTM1MDEgMzcuNjUzNTAxIDE4IDM4LjUgMTggeiBNIDE3IDM4IEwgMjAgMzggTCAyMCA0MS41IEMgMjAgNDIuMzQ2NDk5IDE5LjM0NjQ5OSA0MyAxOC41IDQzIEMgMTcuNjUzNTAxIDQzIDE3IDQyLjM0NjQ5OSAxNyA0MS41IEwgMTcgMzggeiBNIDI4IDM4IEwgMzEgMzggTCAzMSA0MS41IEMgMzEgNDIuMzQ2NDk5IDMwLjM0NjQ5OSA0MyAyOS41IDQzIEMgMjguNjUzNTAxIDQzIDI4IDQyLjM0NjQ5OSAyOCA0MS41IEwgMjggMzggeiIvPjwvc3ZnPg==", +) + +ASKUI_WEB_AGENT = Assistant( + id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcc", + name="AskUI Web Vision Agent", + avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0MDAiIGhlaWdodD0iNDAwIiB2aWV3Qm94PSIwIDAgNDAwIDQwMCIgZmlsbD0ibm9uZSI+CjxwYXRoIGQ9Ik0xMzYuNDQ0IDIyMS41NTZDMTIzLjU1OCAyMjUuMjEzIDExNS4xMDQgMjMxLjYyNSAxMDkuNTM1IDIzOC4wMzJDMTE0Ljg2OSAyMzMuMzY0IDEyMi4wMTQgMjI5LjA4IDEzMS42NTIgMjI2LjM0OEMxNDEuNTEgMjIzLjU1NCAxNDkuOTIgMjIzLjU3NCAxNTYuODY5IDIyNC45MTVWMjE5LjQ4MUMxNTAuOTQxIDIxOC45MzkgMTQ0LjE0NSAyMTkuMzcxIDEzNi40NDQgMjIxLjU1NlpNMTA4Ljk0NiAxNzUuODc2TDYxLjA4OTUgMTg4LjQ4NEM2MS4wODk1IDE4OC40ODQgNjEuOTYxNyAxODkuNzE2IDYzLjU3NjcgMTkxLjM2TDEwNC4xNTMgMTgwLjY2OEMxMDQuMTUzIDE4MC42NjggMTAzLjU3OCAxODguMDc3IDk4LjU4NDcgMTk0LjcwNUMxMDguMDMgMTg3LjU1OSAxMDguOTQ2IDE3NS44NzYgMTA4Ljk0NiAxNzUuODc2Wk0xNDkuMDA1IDI4OC4zNDdDODEuNjU4MiAzMDYuNDg2IDQ2LjAyNzIgMjI4LjQzOCAzNS4yMzk2IDE4Ny45MjhDMzAuMjU1NiAxNjkuMjI5IDI4LjA3OTkgMTU1LjA2NyAyNy41IDE0NS45MjhDMjcuNDM3NyAxNDQuOTc5IDI3LjQ2NjUgMTQ0LjE3OSAyNy41MzM2IDE0My40NDZDMjQuMDQgMTQzLjY1NyAyMi4zNjc0IDE0NS40NzMgMjIuNzA3NyAxNTAuNzIxQzIzLjI4NzYgMTU5Ljg1NSAyNS40NjMzIDE3NC4wMTYgMzAuNDQ3MyAxOTIuNzIxQzQxLjIzMDEgMjMzLjIyNSA3Ni44NjU5IDMxMS4yNzMgMTQ0LjIxMyAyOTMuMTM0QzE1OC44NzIgMjg5LjE4NSAxNjkuODg1IDI4MS45OTIgMTc4LjE1MiAyNzIuODFDMTcwLjUzMiAyNzkuNjkyIDE2MC45OTUgMjg1LjExMiAxNDkuMDA1IDI4OC4zNDdaTTE2MS42NjEgMTI4LjExVjEzMi45MDNIMTg4LjA3N0MxODcuNTM1IDEzMS4yMDYgMTg2Ljk4OSAxMjkuNjc3IDE4Ni40NDcgMTI4LjExSDE2MS42NjFaIiBmaWxsPSIjMkQ0NTUyIi8+CjxwYXRoIGQ9Ik0xOTMuOTgxIDE2Ny41ODRDMjA1Ljg2MSAxNzAuOTU4IDIxMi4xNDQgMTc5LjI4NyAyMTUuNDY1IDE4Ni42NThMMjI4LjcxMSAxOTAuNDJDMjI4LjcxMSAxOTAuNDIgMjI2LjkwNCAxNjQuNjIzIDIwMy41NyAxNTcuOTk1QzE4MS43NDEgMTUxLjc5MyAxNjguMzA4IDE3MC4xMjQgMTY2LjY3NCAxNzIuNDk2QzE3My4wMjQgMTY3Ljk3MiAxODIuMjk3IDE2NC4yNjggMTkzLjk4MSAxNjcuNTg0Wk0yOTkuNDIyIDE4Ni43NzdDMjc3LjU3MyAxODAuNTQ3IDI2NC4xNDUgMTk4LjkxNiAyNjIuNTM1IDIwMS4yNTVDMjY4Ljg5IDE5Ni43MzYgMjc4LjE1OCAxOTMuMDMxIDI4OS44MzcgMTk2LjM2MkMzMDEuNjk4IDE5OS43NDEgMzA3Ljk3NiAyMDguMDYgMzExLjMwNyAyMTUuNDM2TDMyNC41NzIgMjE5LjIxMkMzMjQuNTcyIDIxOS4yMTIgMzIyLjczNiAxOTMuNDEgMjk5LjQyMiAxODYuNzc3Wk0yODYuMjYyIDI1NC43OTVMMTc2LjA3MiAyMjMuOTlDMTc2LjA3MiAyMjMuOTkgMTc3LjI2NSAyMzAuMDM4IDE4MS44NDIgMjM3Ljg2OUwyNzQuNjE3IDI2My44MDVDMjgyLjI1NSAyNTkuMzg2IDI4Ni4yNjIgMjU0Ljc5NSAyODYuMjYyIDI1NC43OTVaTTIwOS44NjcgMzIxLjEwMkMxMjIuNjE4IDI5Ny43MSAxMzMuMTY2IDE4Ni41NDMgMTQ3LjI4NCAxMzMuODY1QzE1My4wOTcgMTEyLjE1NiAxNTkuMDczIDk2LjAyMDMgMTY0LjAyOSA4NS4yMDRDMTYxLjA3MiA4NC41OTUzIDE1OC42MjMgODYuMTUyOSAxNTYuMjAzIDkxLjA3NDZDMTUwLjk0MSAxMDEuNzQ3IDE0NC4yMTIgMTE5LjEyNCAxMzcuNyAxNDMuNDVDMTIzLjU4NiAxOTYuMTI3IDExMy4wMzggMzA3LjI5IDIwMC4yODMgMzMwLjY4MkMyNDEuNDA2IDM0MS42OTkgMjczLjQ0MiAzMjQuOTU1IDI5Ny4zMjMgMjk4LjY1OUMyNzQuNjU1IDMxOS4xOSAyNDUuNzE0IDMzMC43MDEgMjA5Ljg2NyAzMjEuMTAyWiIgZmlsbD0iIzJENDU1MiIvPgo8cGF0aCBkPSJNMTYxLjY2MSAyNjIuMjk2VjIzOS44NjNMOTkuMzMyNCAyNTcuNTM3Qzk5LjMzMjQgMjU3LjUzNyAxMDMuOTM4IDIzMC43NzcgMTM2LjQ0NCAyMjEuNTU2QzE0Ni4zMDIgMjE4Ljc2MiAxNTQuNzEzIDIxOC43ODEgMTYxLjY2MSAyMjAuMTIzVjEyOC4xMUgxOTIuODY5QzE4OS40NzEgMTE3LjYxIDE4Ni4xODQgMTA5LjUyNiAxODMuNDIzIDEwMy45MDlDMTc4Ljg1NiA5NC42MTIgMTc0LjE3NCAxMDAuNzc1IDE2My41NDUgMTA5LjY2NUMxNTYuMDU5IDExNS45MTkgMTM3LjEzOSAxMjkuMjYxIDEwOC42NjggMTM2LjkzM0M4MC4xOTY2IDE0NC42MSA1Ny4xNzkgMTQyLjU3NCA0Ny41NzUyIDE0MC45MTFDMzMuOTYwMSAxMzguNTYyIDI2LjgzODcgMTM1LjU3MiAyNy41MDQ5IDE0NS45MjhDMjguMDg0NyAxNTUuMDYyIDMwLjI2MDUgMTY5LjIyNCAzNS4yNDQ1IDE4Ny45MjhDNDYuMDI3MiAyMjguNDMzIDgxLjY2MyAzMDYuNDgxIDE0OS4wMSAyODguMzQyQzE2Ni42MDIgMjgzLjYwMiAxNzkuMDE5IDI3NC4yMzMgMTg3LjYyNiAyNjIuMjkxSDE2MS42NjFWMjYyLjI5NlpNNjEuMDg0OCAxODguNDg0TDEwOC45NDYgMTc1Ljg3NkMxMDguOTQ2IDE3NS44NzYgMTA3LjU1MSAxOTQuMjg4IDg5LjYwODcgMTk5LjAxOEM3MS42NjE0IDIwMy43NDMgNjEuMDg0OCAxODguNDg0IDYxLjA4NDggMTg4LjQ4NFoiIGZpbGw9IiNFMjU3NEMiLz4KPHBhdGggZD0iTTM0MS43ODYgMTI5LjE3NEMzMjkuMzQ1IDEzMS4zNTUgMjk5LjQ5OCAxMzQuMDcyIDI2Mi42MTIgMTI0LjE4NUMyMjUuNzE2IDExNC4zMDQgMjAxLjIzNiA5Ny4wMjI0IDE5MS41MzcgODguODk5NEMxNzcuNzg4IDc3LjM4MzQgMTcxLjc0IDY5LjM4MDIgMTY1Ljc4OCA4MS40ODU3QzE2MC41MjYgOTIuMTYzIDE1My43OTcgMTA5LjU0IDE0Ny4yODQgMTMzLjg2NkMxMzMuMTcxIDE4Ni41NDMgMTIyLjYyMyAyOTcuNzA2IDIwOS44NjcgMzIxLjA5OEMyOTcuMDkzIDM0NC40NyAzNDMuNTMgMjQyLjkyIDM1Ny42NDQgMTkwLjIzOEMzNjQuMTU3IDE2NS45MTcgMzY3LjAxMyAxNDcuNSAzNjcuNzk5IDEzNS42MjVDMzY4LjY5NSAxMjIuMTczIDM1OS40NTUgMTI2LjA3OCAzNDEuNzg2IDEyOS4xNzRaTTE2Ni40OTcgMTcyLjc1NkMxNjYuNDk3IDE3Mi43NTYgMTgwLjI0NiAxNTEuMzcyIDIwMy41NjUgMTU4QzIyNi44OTkgMTY0LjYyOCAyMjguNzA2IDE5MC40MjUgMjI4LjcwNiAxOTAuNDI1TDE2Ni40OTcgMTcyLjc1NlpNMjIzLjQyIDI2OC43MTNDMTgyLjQwMyAyNTYuNjk4IDE3Ni4wNzcgMjIzLjk5IDE3Ni4wNzcgMjIzLjk5TDI4Ni4yNjIgMjU0Ljc5NkMyODYuMjYyIDI1NC43OTEgMjY0LjAyMSAyODAuNTc4IDIyMy40MiAyNjguNzEzWk0yNjIuMzc3IDIwMS40OTVDMjYyLjM3NyAyMDEuNDk1IDI3Ni4xMDcgMTgwLjEyNiAyOTkuNDIyIDE4Ni43NzNDMzIyLjczNiAxOTMuNDExIDMyNC41NzIgMjE5LjIwOCAzMjQuNTcyIDIxOS4yMDhMMjYyLjM3NyAyMDEuNDk1WiIgZmlsbD0iIzJFQUQzMyIvPgo8cGF0aCBkPSJNMTM5Ljg4IDI0Ni4wNEw5OS4zMzI0IDI1Ny41MzJDOTkuMzMyNCAyNTcuNTMyIDEwMy43MzcgMjMyLjQ0IDEzMy42MDcgMjIyLjQ5NkwxMTAuNjQ3IDEzNi4zM0wxMDguNjYzIDEzNi45MzNDODAuMTkxOCAxNDQuNjExIDU3LjE3NDIgMTQyLjU3NCA0Ny41NzA0IDE0MC45MTFDMzMuOTU1NCAxMzguNTYzIDI2LjgzNCAxMzUuNTcyIDI3LjUwMDEgMTQ1LjkyOUMyOC4wOCAxNTUuMDYzIDMwLjI1NTcgMTY5LjIyNCAzNS4yMzk3IDE4Ny45MjlDNDYuMDIyNSAyMjguNDMzIDgxLjY1ODMgMzA2LjQ4MSAxNDkuMDA1IDI4OC4zNDJMMTUwLjk4OSAyODcuNzE5TDEzOS44OCAyNDYuMDRaTTYxLjA4NDggMTg4LjQ4NUwxMDguOTQ2IDE3NS44NzZDMTA4Ljk0NiAxNzUuODc2IDEwNy41NTEgMTk0LjI4OCA4OS42MDg3IDE5OS4wMThDNzEuNjYxNSAyMDMuNzQzIDYxLjA4NDggMTg4LjQ4NSA2MS4wODQ4IDE4OC40ODVaIiBmaWxsPSIjRDY1MzQ4Ii8+CjxwYXRoIGQ9Ik0yMjUuMjcgMjY5LjE2M0wyMjMuNDE1IDI2OC43MTJDMTgyLjM5OCAyNTYuNjk4IDE3Ni4wNzIgMjIzLjk5IDE3Ni4wNzIgMjIzLjk5TDIzMi44OSAyMzkuODcyTDI2Mi45NzEgMTI0LjI4MUwyNjIuNjA3IDEyNC4xODVDMjI1LjcxMSAxMTQuMzA0IDIwMS4yMzIgOTcuMDIyNCAxOTEuNTMyIDg4Ljg5OTRDMTc3Ljc4MyA3Ny4zODM0IDE3MS43MzUgNjkuMzgwMiAxNjUuNzgzIDgxLjQ4NTdDMTYwLjUyNiA5Mi4xNjMgMTUzLjc5NyAxMDkuNTQgMTQ3LjI4NCAxMzMuODY2QzEzMy4xNzEgMTg2LjU0MyAxMjIuNjIzIDI5Ny43MDYgMjA5Ljg2NyAzMjEuMDk3TDIxMS42NTUgMzIxLjVMMjI1LjI3IDI2OS4xNjNaTTE2Ni40OTcgMTcyLjc1NkMxNjYuNDk3IDE3Mi43NTYgMTgwLjI0NiAxNTEuMzcyIDIwMy41NjUgMTU4QzIyNi44OTkgMTY0LjYyOCAyMjguNzA2IDE5MC40MjUgMjI4LjcwNiAxOTAuNDI1TDE2Ni40OTcgMTcyLjc1NloiIGZpbGw9IiMxRDhEMjIiLz4KPHBhdGggZD0iTTE0MS45NDYgMjQ1LjQ1MUwxMzEuMDcyIDI0OC41MzdDMTMzLjY0MSAyNjMuMDE5IDEzOC4xNjkgMjc2LjkxNyAxNDUuMjc2IDI4OS4xOTVDMTQ2LjUxMyAyODguOTIyIDE0Ny43NCAyODguNjg3IDE0OSAyODguMzQyQzE1Mi4zMDIgMjg3LjQ1MSAxNTUuMzY0IDI4Ni4zNDggMTU4LjMxMiAyODUuMTQ1QzE1MC4zNzEgMjczLjM2MSAxNDUuMTE4IDI1OS43ODkgMTQxLjk0NiAyNDUuNDUxWk0xMzcuNyAxNDMuNDUxQzEzMi4xMTIgMTY0LjMwNyAxMjcuMTEzIDE5NC4zMjYgMTI4LjQ4OSAyMjQuNDM2QzEzMC45NTIgMjIzLjM2NyAxMzMuNTU0IDIyMi4zNzEgMTM2LjQ0NCAyMjEuNTUxTDEzOC40NTcgMjIxLjEwMUMxMzYuMDAzIDE4OC45MzkgMTQxLjMwOCAxNTYuMTY1IDE0Ny4yODQgMTMzLjg2NkMxNDguNzk5IDEyOC4yMjUgMTUwLjMxOCAxMjIuOTc4IDE1MS44MzIgMTE4LjA4NUMxNDkuMzkzIDExOS42MzcgMTQ2Ljc2NyAxMjEuMjI4IDE0My43NzYgMTIyLjg2N0MxNDEuNzU5IDEyOS4wOTMgMTM5LjcyMiAxMzUuODk4IDEzNy43IDE0My40NTFaIiBmaWxsPSIjQzA0QjQxIi8+Cjwvc3ZnPg==", +) + +SEEDS = [ + ASKUI_VISION_AGENT, + HUMAN_DEMONSTRATION_AGENT, + ANDROID_VISION_AGENT, + ASKUI_WEB_AGENT, +] diff --git a/src/chat/api/assistants/service.py b/src/askui/chat/api/assistants/service.py similarity index 93% rename from src/chat/api/assistants/service.py rename to src/askui/chat/api/assistants/service.py index 391135dd..0b674312 100644 --- a/src/chat/api/assistants/service.py +++ b/src/askui/chat/api/assistants/service.py @@ -2,13 +2,9 @@ from pydantic import BaseModel, Field -from chat.api.assistants.models import Assistant -from chat.api.assistants.seeds import ( - ANDROID_VISION_AGENT, - ASKUI_VISION_AGENT, - HUMAN_DEMONSTRATION_AGENT, -) -from chat.api.models import DO_NOT_PATCH, DoNotPatch, ListQuery, ListResponse +from askui.chat.api.assistants.models import Assistant +from askui.chat.api.assistants.seeds import SEEDS +from askui.chat.api.models import DO_NOT_PATCH, DoNotPatch, ListQuery, ListResponse class CreateAssistantRequest(BaseModel): @@ -166,6 +162,5 @@ def delete(self, assistant_id: str) -> None: def seed(self) -> None: """Seed the assistant service with default assistants.""" - self._save(ANDROID_VISION_AGENT) - self._save(ASKUI_VISION_AGENT) - self._save(HUMAN_DEMONSTRATION_AGENT) + for seed in SEEDS: + self._save(seed) diff --git a/src/chat/api/dependencies.py b/src/askui/chat/api/dependencies.py similarity index 78% rename from src/chat/api/dependencies.py rename to src/askui/chat/api/dependencies.py index ef35a3d7..a9c78c2f 100644 --- a/src/chat/api/dependencies.py +++ b/src/askui/chat/api/dependencies.py @@ -1,6 +1,6 @@ from fastapi import Depends -from chat.api.settings import Settings +from askui.chat.api.settings import Settings def get_settings() -> Settings: diff --git a/src/chat/api/health/__init__.py b/src/askui/chat/api/health/__init__.py similarity index 100% rename from src/chat/api/health/__init__.py rename to src/askui/chat/api/health/__init__.py diff --git a/src/chat/api/health/router.py b/src/askui/chat/api/health/router.py similarity index 100% rename from src/chat/api/health/router.py rename to src/askui/chat/api/health/router.py diff --git a/src/chat/api/messages/__init__.py b/src/askui/chat/api/messages/__init__.py similarity index 100% rename from src/chat/api/messages/__init__.py rename to src/askui/chat/api/messages/__init__.py diff --git a/src/chat/api/messages/dependencies.py b/src/askui/chat/api/messages/dependencies.py similarity index 62% rename from src/chat/api/messages/dependencies.py rename to src/askui/chat/api/messages/dependencies.py index 59ec54d2..51bff5af 100644 --- a/src/chat/api/messages/dependencies.py +++ b/src/askui/chat/api/messages/dependencies.py @@ -1,8 +1,8 @@ from fastapi import Depends -from chat.api.dependencies import SettingsDep -from chat.api.messages.service import MessageService -from chat.api.settings import Settings +from askui.chat.api.dependencies import SettingsDep +from askui.chat.api.messages.service import MessageService +from askui.chat.api.settings import Settings def get_message_service( diff --git a/src/chat/api/messages/router.py b/src/askui/chat/api/messages/router.py similarity index 88% rename from src/chat/api/messages/router.py rename to src/askui/chat/api/messages/router.py index f35e8e36..d7b7462e 100644 --- a/src/chat/api/messages/router.py +++ b/src/askui/chat/api/messages/router.py @@ -1,8 +1,18 @@ from fastapi import APIRouter, HTTPException, status -from chat.api.messages.dependencies import MessageServiceDep -from chat.api.messages.service import Message, MessageCreateRequest, MessageService -from chat.api.models import ListQuery, ListQueryDep, ListResponse, MessageId, ThreadId +from askui.chat.api.messages.dependencies import MessageServiceDep +from askui.chat.api.messages.service import ( + Message, + MessageCreateRequest, + MessageService, +) +from askui.chat.api.models import ( + ListQuery, + ListQueryDep, + ListResponse, + MessageId, + ThreadId, +) router = APIRouter(prefix="/threads/{thread_id}/messages", tags=["messages"]) diff --git a/src/chat/api/messages/service.py b/src/askui/chat/api/messages/service.py similarity index 97% rename from src/chat/api/messages/service.py rename to src/askui/chat/api/messages/service.py index 4e30c426..517f8456 100644 --- a/src/chat/api/messages/service.py +++ b/src/askui/chat/api/messages/service.py @@ -4,8 +4,7 @@ from pydantic import Field -from askui.models.shared.agent_message_param import MessageParam -from chat.api.models import ( +from askui.chat.api.models import ( MAX_MESSAGES_PER_THREAD, AssistantId, ListQuery, @@ -14,7 +13,8 @@ ThreadId, UnixDatetime, ) -from chat.api.utils import generate_time_ordered_id +from askui.chat.api.utils import generate_time_ordered_id +from askui.models.shared.agent_message_param import MessageParam class MessageBase(MessageParam): diff --git a/src/chat/api/models.py b/src/askui/chat/api/models.py similarity index 100% rename from src/chat/api/models.py rename to src/askui/chat/api/models.py diff --git a/src/chat/api/runs/__init__.py b/src/askui/chat/api/runs/__init__.py similarity index 100% rename from src/chat/api/runs/__init__.py rename to src/askui/chat/api/runs/__init__.py diff --git a/src/chat/api/runs/dependencies.py b/src/askui/chat/api/runs/dependencies.py similarity index 72% rename from src/chat/api/runs/dependencies.py rename to src/askui/chat/api/runs/dependencies.py index 440f07d4..772c2545 100644 --- a/src/chat/api/runs/dependencies.py +++ b/src/askui/chat/api/runs/dependencies.py @@ -1,7 +1,7 @@ from fastapi import Depends -from chat.api.dependencies import SettingsDep -from chat.api.settings import Settings +from askui.chat.api.dependencies import SettingsDep +from askui.chat.api.settings import Settings from .service import RunService diff --git a/src/chat/api/runs/models.py b/src/askui/chat/api/runs/models.py similarity index 92% rename from src/chat/api/runs/models.py rename to src/askui/chat/api/runs/models.py index a2a45190..ef2b3a15 100644 --- a/src/chat/api/runs/models.py +++ b/src/askui/chat/api/runs/models.py @@ -3,8 +3,8 @@ from pydantic import BaseModel, Field, computed_field -from chat.api.models import AssistantId, RunId, ThreadId, UnixDatetime -from chat.api.utils import generate_time_ordered_id +from askui.chat.api.models import AssistantId, RunId, ThreadId, UnixDatetime +from askui.chat.api.utils import generate_time_ordered_id RunStatus = Literal[ "queued", diff --git a/src/chat/api/runs/router.py b/src/askui/chat/api/runs/router.py similarity index 94% rename from src/chat/api/runs/router.py rename to src/askui/chat/api/runs/router.py index c6def410..c56d3025 100644 --- a/src/chat/api/runs/router.py +++ b/src/askui/chat/api/runs/router.py @@ -5,8 +5,8 @@ from fastapi.responses import JSONResponse, StreamingResponse from pydantic import BaseModel -from chat.api.models import ListQuery, ListQueryDep, ListResponse, RunId, ThreadId -from chat.api.runs.service import CreateRunRequest +from askui.chat.api.models import ListQuery, ListQueryDep, ListResponse, RunId, ThreadId +from askui.chat.api.runs.service import CreateRunRequest from .dependencies import RunServiceDep from .models import Run diff --git a/src/chat/api/runs/runner/__init__.py b/src/askui/chat/api/runs/runner/__init__.py similarity index 100% rename from src/chat/api/runs/runner/__init__.py rename to src/askui/chat/api/runs/runner/__init__.py diff --git a/src/askui/chat/api/runs/runner/events/__init__.py b/src/askui/chat/api/runs/runner/events/__init__.py new file mode 100644 index 00000000..11475a34 --- /dev/null +++ b/src/askui/chat/api/runs/runner/events/__init__.py @@ -0,0 +1,15 @@ +from askui.chat.api.runs.runner.events.done_events import DoneEvent +from askui.chat.api.runs.runner.events.error_events import ErrorEvent +from askui.chat.api.runs.runner.events.event_base import EventBase +from askui.chat.api.runs.runner.events.events import Events +from askui.chat.api.runs.runner.events.message_events import MessageEvent +from askui.chat.api.runs.runner.events.run_events import RunEvent + +__all__ = [ + "DoneEvent", + "ErrorEvent", + "EventBase", + "Events", + "MessageEvent", + "RunEvent", +] diff --git a/src/chat/api/runs/runner/events/done_events.py b/src/askui/chat/api/runs/runner/events/done_events.py similarity index 66% rename from src/chat/api/runs/runner/events/done_events.py rename to src/askui/chat/api/runs/runner/events/done_events.py index 88b86a82..64be93fd 100644 --- a/src/chat/api/runs/runner/events/done_events.py +++ b/src/askui/chat/api/runs/runner/events/done_events.py @@ -1,6 +1,6 @@ from typing import Literal -from chat.api.runs.runner.events.event_base import EventBase +from askui.chat.api.runs.runner.events.event_base import EventBase class DoneEvent(EventBase): diff --git a/src/chat/api/runs/runner/events/error_events.py b/src/askui/chat/api/runs/runner/events/error_events.py similarity index 80% rename from src/chat/api/runs/runner/events/error_events.py rename to src/askui/chat/api/runs/runner/events/error_events.py index efb536be..82688d54 100644 --- a/src/chat/api/runs/runner/events/error_events.py +++ b/src/askui/chat/api/runs/runner/events/error_events.py @@ -2,7 +2,7 @@ from pydantic import BaseModel -from chat.api.runs.runner.events.event_base import EventBase +from askui.chat.api.runs.runner.events.event_base import EventBase class ErrorEventDataError(BaseModel): diff --git a/src/chat/api/runs/runner/events/event_base.py b/src/askui/chat/api/runs/runner/events/event_base.py similarity index 100% rename from src/chat/api/runs/runner/events/event_base.py rename to src/askui/chat/api/runs/runner/events/event_base.py diff --git a/src/askui/chat/api/runs/runner/events/events.py b/src/askui/chat/api/runs/runner/events/events.py new file mode 100644 index 00000000..dac1d80f --- /dev/null +++ b/src/askui/chat/api/runs/runner/events/events.py @@ -0,0 +1,6 @@ +from askui.chat.api.runs.runner.events.done_events import DoneEvent +from askui.chat.api.runs.runner.events.error_events import ErrorEvent +from askui.chat.api.runs.runner.events.message_events import MessageEvent +from askui.chat.api.runs.runner.events.run_events import RunEvent + +Events = DoneEvent | ErrorEvent | MessageEvent | RunEvent diff --git a/src/chat/api/runs/runner/events/message_events.py b/src/askui/chat/api/runs/runner/events/message_events.py similarity index 51% rename from src/chat/api/runs/runner/events/message_events.py rename to src/askui/chat/api/runs/runner/events/message_events.py index 3a1b2d88..51807d84 100644 --- a/src/chat/api/runs/runner/events/message_events.py +++ b/src/askui/chat/api/runs/runner/events/message_events.py @@ -1,7 +1,7 @@ from typing import Literal -from chat.api.messages.service import Message -from chat.api.runs.runner.events.event_base import EventBase +from askui.chat.api.messages.service import Message +from askui.chat.api.runs.runner.events.event_base import EventBase class MessageEvent(EventBase): diff --git a/src/chat/api/runs/runner/events/run_events.py b/src/askui/chat/api/runs/runner/events/run_events.py similarity index 75% rename from src/chat/api/runs/runner/events/run_events.py rename to src/askui/chat/api/runs/runner/events/run_events.py index 033ef15f..84952e34 100644 --- a/src/chat/api/runs/runner/events/run_events.py +++ b/src/askui/chat/api/runs/runner/events/run_events.py @@ -1,7 +1,7 @@ from typing import Literal -from chat.api.runs.models import Run -from chat.api.runs.runner.events.event_base import EventBase +from askui.chat.api.runs.models import Run +from askui.chat.api.runs.runner.events.event_base import EventBase class RunEvent(EventBase): diff --git a/src/chat/api/runs/runner/runner.py b/src/askui/chat/api/runs/runner/runner.py similarity index 86% rename from src/chat/api/runs/runner/runner.py rename to src/askui/chat/api/runs/runner/runner.py index ca4ed422..9eb5449a 100644 --- a/src/chat/api/runs/runner/runner.py +++ b/src/askui/chat/api/runs/runner/runner.py @@ -7,6 +7,24 @@ from askui.agent import VisionAgent from askui.android_agent import AndroidVisionAgent +from askui.chat.api.assistants.seeds import ( + ANDROID_VISION_AGENT, + ASKUI_VISION_AGENT, + ASKUI_WEB_AGENT, + HUMAN_DEMONSTRATION_AGENT, +) +from askui.chat.api.messages.service import MessageCreateRequest, MessageService +from askui.chat.api.models import MAX_MESSAGES_PER_THREAD, ListQuery +from askui.chat.api.runs.models import Run, RunError +from askui.chat.api.runs.runner.events.done_events import DoneEvent +from askui.chat.api.runs.runner.events.error_events import ( + ErrorEvent, + ErrorEventData, + ErrorEventDataError, +) +from askui.chat.api.runs.runner.events.events import Events +from askui.chat.api.runs.runner.events.message_events import MessageEvent +from askui.chat.api.runs.runner.events.run_events import RunEvent from askui.models.shared.agent_message_param import ( Base64ImageSourceParam, ImageBlockParam, @@ -16,18 +34,7 @@ from askui.models.shared.agent_on_message_cb import OnMessageCbParam from askui.tools.pynput_agent_os import PynputAgentOs from askui.utils.image_utils import ImageSource -from chat.api.messages.service import MessageCreateRequest, MessageService -from chat.api.models import MAX_MESSAGES_PER_THREAD, ListQuery -from chat.api.runs.models import Run, RunError -from chat.api.runs.runner.events.done_events import DoneEvent -from chat.api.runs.runner.events.error_events import ( - ErrorEvent, - ErrorEventData, - ErrorEventDataError, -) -from chat.api.runs.runner.events.events import Events -from chat.api.runs.runner.events.message_events import MessageEvent -from chat.api.runs.runner.events.run_events import RunEvent +from askui.web_agent import WebVisionAgent if TYPE_CHECKING: from askui.tools.agent_os import InputEvent @@ -35,11 +42,6 @@ logger = logging.getLogger(__name__) -ASKUI_VISION_AGENT_ID = "asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmca" -ASKUI_ANDROID_AGENT_ID = "asst_78da09fbf1ed43c7826fb1686f89f541" -HUMAN_AGENT_ID = "asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcb" - - class Runner: def __init__(self, run: Run, base_dir: Path) -> None: self._run = run @@ -152,8 +154,16 @@ def _run_askui_vision_agent(self, event_queue: queue.Queue[Events]) -> None: event_queue=event_queue, ) + def _run_askui_web_agent(self, event_queue: queue.Queue[Events]) -> None: + self._run_agent( + agent_type="web", + event_queue=event_queue, + ) + def _run_agent( - self, agent_type: Literal["android", "vision"], event_queue: queue.Queue[Events] + self, + agent_type: Literal["android", "vision", "web"], + event_queue: queue.Queue[Events], ) -> None: messages: list[MessageParam] = [ MessageParam( @@ -199,6 +209,14 @@ def on_message( ) return + if agent_type == "web": + with WebVisionAgent() as web_agent: + web_agent.act( + messages, + on_message=on_message, + ) + return + with VisionAgent() as agent: agent.act( messages, @@ -217,12 +235,14 @@ def run( ) ) try: - if self._run.assistant_id == HUMAN_AGENT_ID: + if self._run.assistant_id == HUMAN_DEMONSTRATION_AGENT.id: self._run_human_agent(event_queue) - elif self._run.assistant_id == ASKUI_VISION_AGENT_ID: + elif self._run.assistant_id == ASKUI_VISION_AGENT.id: self._run_askui_vision_agent(event_queue) - elif self._run.assistant_id == ASKUI_ANDROID_AGENT_ID: + elif self._run.assistant_id == ANDROID_VISION_AGENT.id: self._run_askui_android_agent(event_queue) + elif self._run.assistant_id == ASKUI_WEB_AGENT.id: + self._run_askui_web_agent(event_queue) updated_run = self._retrieve_run() if updated_run.status == "in_progress": updated_run.completed_at = datetime.now(tz=timezone.utc) diff --git a/src/chat/api/runs/service.py b/src/askui/chat/api/runs/service.py similarity index 92% rename from src/chat/api/runs/service.py rename to src/askui/chat/api/runs/service.py index a3469896..e68311f0 100644 --- a/src/chat/api/runs/service.py +++ b/src/askui/chat/api/runs/service.py @@ -8,13 +8,13 @@ from pydantic import BaseModel -from chat.api.models import AssistantId, ListQuery, ListResponse, RunId, ThreadId -from chat.api.runs.models import Run -from chat.api.runs.runner.events import Events -from chat.api.runs.runner.events.done_events import DoneEvent -from chat.api.runs.runner.events.error_events import ErrorEvent -from chat.api.runs.runner.events.run_events import RunEvent -from chat.api.runs.runner.runner import Runner +from askui.chat.api.models import AssistantId, ListQuery, ListResponse, RunId, ThreadId +from askui.chat.api.runs.models import Run +from askui.chat.api.runs.runner.events import Events +from askui.chat.api.runs.runner.events.done_events import DoneEvent +from askui.chat.api.runs.runner.events.error_events import ErrorEvent +from askui.chat.api.runs.runner.events.run_events import RunEvent +from askui.chat.api.runs.runner.runner import Runner class CreateRunRequest(BaseModel): diff --git a/src/chat/api/settings.py b/src/askui/chat/api/settings.py similarity index 56% rename from src/chat/api/settings.py rename to src/askui/chat/api/settings.py index c091a6cf..7db215ac 100644 --- a/src/chat/api/settings.py +++ b/src/askui/chat/api/settings.py @@ -15,3 +15,17 @@ class Settings(BaseSettings): default_factory=lambda: Path.cwd() / "chat", description="Base directory for storing chat data", ) + host: str = Field( + default="127.0.0.1", + description="Host for the chat API", + ) + log_level: str | int = Field( + default="info", + description="Log level for the chat API", + ) + port: int = Field( + default=9261, + description="Port for the chat API", + ge=1024, + le=65535, + ) diff --git a/src/chat/api/threads/__init__.py b/src/askui/chat/api/threads/__init__.py similarity index 100% rename from src/chat/api/threads/__init__.py rename to src/askui/chat/api/threads/__init__.py diff --git a/src/chat/api/threads/dependencies.py b/src/askui/chat/api/threads/dependencies.py similarity index 56% rename from src/chat/api/threads/dependencies.py rename to src/askui/chat/api/threads/dependencies.py index 926fdf9e..46f0840d 100644 --- a/src/chat/api/threads/dependencies.py +++ b/src/askui/chat/api/threads/dependencies.py @@ -1,10 +1,10 @@ from fastapi import Depends -from chat.api.dependencies import SettingsDep -from chat.api.messages.dependencies import MessageServiceDep -from chat.api.messages.service import MessageService -from chat.api.settings import Settings -from chat.api.threads.service import ThreadService +from askui.chat.api.dependencies import SettingsDep +from askui.chat.api.messages.dependencies import MessageServiceDep +from askui.chat.api.messages.service import MessageService +from askui.chat.api.settings import Settings +from askui.chat.api.threads.service import ThreadService def get_thread_service( diff --git a/src/chat/api/threads/router.py b/src/askui/chat/api/threads/router.py similarity index 89% rename from src/chat/api/threads/router.py rename to src/askui/chat/api/threads/router.py index e018440c..e29fc336 100644 --- a/src/chat/api/threads/router.py +++ b/src/askui/chat/api/threads/router.py @@ -1,8 +1,8 @@ from fastapi import APIRouter, HTTPException, status -from chat.api.models import ListQuery, ListQueryDep, ListResponse, ThreadId -from chat.api.threads.dependencies import ThreadServiceDep -from chat.api.threads.service import ( +from askui.chat.api.models import ListQuery, ListQueryDep, ListResponse, ThreadId +from askui.chat.api.threads.dependencies import ThreadServiceDep +from askui.chat.api.threads.service import ( Thread, ThreadCreateRequest, ThreadModifyRequest, diff --git a/src/chat/api/threads/service.py b/src/askui/chat/api/threads/service.py similarity index 94% rename from src/chat/api/threads/service.py rename to src/askui/chat/api/threads/service.py index 1ce88aef..1c1fb4f2 100644 --- a/src/chat/api/threads/service.py +++ b/src/askui/chat/api/threads/service.py @@ -4,9 +4,15 @@ from pydantic import BaseModel, Field -from chat.api.messages.service import MessageCreateRequest, MessageService -from chat.api.models import DoNotPatch, ListQuery, ListResponse, ThreadId, UnixDatetime -from chat.api.utils import generate_time_ordered_id +from askui.chat.api.messages.service import MessageCreateRequest, MessageService +from askui.chat.api.models import ( + DoNotPatch, + ListQuery, + ListResponse, + ThreadId, + UnixDatetime, +) +from askui.chat.api.utils import generate_time_ordered_id class Thread(BaseModel): diff --git a/src/chat/api/utils.py b/src/askui/chat/api/utils.py similarity index 100% rename from src/chat/api/utils.py rename to src/askui/chat/api/utils.py diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index d0e36af9..bd845130 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -4,11 +4,15 @@ from PIL import Image from pydantic import BaseModel -ModifierKey = Literal["command", "alt", "control", "shift", "right_shift"] +ModifierKey = Literal[ + "command", + "alt", + "control", + "shift", + "right_shift", +] """Modifier keys for keyboard actions.""" -ModifierKeys: list[ModifierKey] = ["command", "alt", "control", "shift", "right_shift"] - PcKey = Literal[ "backspace", "delete", @@ -308,7 +312,6 @@ def keyboard_tap( """ raise NotImplementedError - @abstractmethod def set_display(self, display: int = 1) -> None: """ Sets the active display for screen interactions. @@ -319,7 +322,6 @@ def set_display(self, display: int = 1) -> None: """ raise NotImplementedError - @abstractmethod def run_command(self, command: str, timeout_ms: int = 30000) -> None: """ Executes a shell command. diff --git a/src/askui/tools/computer.py b/src/askui/tools/computer.py index 780eca1b..8289b591 100644 --- a/src/askui/tools/computer.py +++ b/src/askui/tools/computer.py @@ -1,5 +1,8 @@ +import sys +import time from abc import ABC -from typing import Annotated, Literal, TypedDict, get_args +from dataclasses import dataclass +from typing import Annotated, Literal, TypedDict, cast, get_args from anthropic.types.beta import ( BetaToolComputerUse20241022Param, @@ -7,102 +10,167 @@ ) from PIL import Image from pydantic import Field, validate_call -from typing_extensions import override +from typing_extensions import Self, override -from askui.tools.agent_os import AgentOs, PcKey -from askui.utils.dict_utils import IdentityDefaultDict +from askui.tools.agent_os import AgentOs, ModifierKey, PcKey from askui.utils.image_utils import scale_coordinates_back, scale_image_with_padding from ..models.shared.tools import InputSchema, Tool Action20241022 = Literal[ + "cursor_position", + "double_click", "key", - "type", - "mouse_move", "left_click", "left_click_drag", - "right_click", "middle_click", - "double_click", + "mouse_move", + "right_click", "screenshot", - "cursor_position", + "type", ] Action20250124 = ( Action20241022 | Literal[ + "hold_key", "left_mouse_down", "left_mouse_up", "scroll", - "hold_key", - "wait", "triple_click", + "wait", ] ) ScrollDirection = Literal["up", "down", "left", "right"] -KeysToMap = Literal[ - "BackSpace", - "Delete", - "Return", - "Enter", - "Tab", - "Escape", - "Up", - "Down", - "Right", - "Left", - "Home", - "End", - "Page_Up", - "Page_Down", - "F1", - "F2", - "F3", - "F4", - "F5", - "F6", - "F7", - "F8", - "F9", - "F10", - "F11", - "F12", -] +XDOTOOL_TO_MODIFIER_KEY_MAP: dict[str, ModifierKey] = { + # Aliases + "alt": "alt", + "ctrl": "command" if sys.platform == "darwin" else "control", + "cmd": "command", + "shift": "shift", + "super": "command", + "meta": "command", + # Real keys + "Control_L": "control", + "Control_R": "control", + "Shift_L": "shift", + "Shift_R": "right_shift", + "Alt_L": "alt", + "Alt_R": "alt", + "Super_L": "command", + "Super_R": "command", + "Meta_L": "command", + "Meta_R": "command", +} + +XDOTOOL_TO_PC_KEY_MAP: dict[str, PcKey] = { + "space": "space", + # Navigation and control + "BackSpace": "backspace", + "Delete": "delete", + "Return": "enter", + "Tab": "tab", + "Escape": "escape", + "Up": "up", + "Down": "down", + "Right": "right", + "Left": "left", + "Home": "home", + "End": "end", + "Page_Up": "pageup", + "Page_Down": "pagedown", + # Function keys + **{f"F{i}": cast("PcKey", f"f{i}") for i in range(1, 13)}, + # Symbols + "exclam": "!", + "quotedbl": '"', + "numbersign": "#", + "dollar": "$", + "percent": "%", + "ampersand": "&", + "apostrophe": "'", + "parenleft": "(", + "parenright": ")", + "asterisk": "*", + "plus": "+", + "comma": ",", + "minus": "-", + "period": ".", + "slash": "/", + "colon": ":", + "semicolon": ";", + "less": "<", + "equal": "=", + "greater": ">", + "question": "?", + "at": "@", + "bracketleft": "[", + "backslash": "\\", + "bracketright": "]", + "asciicircum": "^", + "underscore": "_", + "grave": "`", + "braceleft": "{", + "bar": "|", + "braceright": "}", + "asciitilde": "~", + # Digits and letters + **{ + ch: cast("PcKey", ch) + for ch in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + }, +} + +XDOTOOL_TO_KEY_MAP = XDOTOOL_TO_MODIFIER_KEY_MAP | XDOTOOL_TO_PC_KEY_MAP + +RELATIVE_SCROLL_FACTOR = 0.1 +""" +The factor by which the scroll amount is multiplied together with the real screen +resolution to get the actual scroll amount. Represents the relative height/width +of the screen (e.g., 0.1 means that 1 scroll amount equals 10% of the screen height +or width) that equals 1 scroll amount. + +Example of how the scroll amount is calculated: +- real screen resolution: 1920x1080 +- scroll amount: 1 +- relative scroll factor: 0.1 +- actual scroll amount: 1 * 0.1 * 1920 = 192 or 1 * 0.1 * 1080 = 108 +""" + + +@dataclass +class KeyboardParam: + key: PcKey | ModifierKey + modifier_keys: list[ModifierKey] | None = None + + @classmethod + def from_xdotool(cls, keystroke: str) -> Self: + """ + Convert an xdotool keystroke (see + [xdotool documentation](https://www.mankier.com/1/xdotool#Keyboard_Commands)) + to a `KeyboardParam`. -Key = PcKey | KeysToMap - -KEYS_MAPPING: IdentityDefaultDict[Key, PcKey] = IdentityDefaultDict( - { - "BackSpace": "backspace", - "Delete": "delete", - "Return": "enter", - "Enter": "enter", - "Tab": "tab", - "Escape": "escape", - "Up": "up", - "Down": "down", - "Right": "right", - "Left": "left", - "Home": "home", - "End": "end", - "Page_Up": "pageup", - "Page_Down": "pagedown", - "F1": "f1", - "F2": "f2", - "F3": "f3", - "F4": "f4", - "F5": "f5", - "F6": "f6", - "F7": "f7", - "F8": "f8", - "F9": "f9", - "F10": "f10", - "F11": "f11", - "F12": "f12", - } -) + Args: + keystroke (str): The xdotool keystroke to convert. + + Example: + `"ctrl+shift+a"` -> `KeyboardParam(key="a", modifier_keys=["control", "shift"])` + """ + keys = keystroke.split("+") + key = keys.pop() + if key not in XDOTOOL_TO_KEY_MAP: + err_msg = ( + f"Unknown key: {key} " + f"(expected one of {list(XDOTOOL_TO_KEY_MAP.keys())})" + ) + raise ValueError(err_msg) + + return cls( + key=XDOTOOL_TO_KEY_MAP[key], + modifier_keys=[XDOTOOL_TO_MODIFIER_KEY_MAP[k] for k in keys], + ) class ActionNotImplementedError(NotImplementedError): @@ -149,72 +217,80 @@ def params_base( @override @validate_call - def __call__( + def __call__( # noqa: C901 self, - action: Action20250124, + action: Action20241022, text: str | None = None, coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]] | None = None, ) -> Image.Image | None: match action: - case "mouse_move": - self._mouse_move(coordinate) # type: ignore[arg-type] + case "cursor_position": + raise ActionNotImplementedError(action, self.name) + case "double_click": + return self._agent_os.click("left", 2) + case "key": + return self._key(keystroke=text) # type: ignore[arg-type] + case "left_click": + return self._agent_os.click("left") case "left_click_drag": - # does not seem to work - self._left_click_drag(coordinate) # type: ignore[arg-type] + return self._left_click_drag(coordinate) # type: ignore[arg-type] + case "middle_click": + return self._agent_os.click("middle") + case "mouse_move": + return self._mouse_move(coordinate) # type: ignore[arg-type] + case "right_click": + return self._agent_os.click("right") case "screenshot": return self._screenshot() - case "left_click": - self._agent_os.click("left") - case "right_click": - self._agent_os.click("right") - case "middle_click": - self._agent_os.click("middle") - case "double_click": - self._agent_os.click("left", 2) case "type": - self._type(text) # type: ignore[arg-type] - case "key": - # we do not seem to support all kinds of key nor modifier keys - # + key combinations - self._key(text) # type: ignore[arg-type] - case _: - raise ActionNotImplementedError(action, self.name) - return None + return self._type(text) # type: ignore[arg-type] @validate_call def _type(self, text: str) -> None: self._agent_os.type(text) @validate_call - def _key(self, key: Key) -> None: - _key = KEYS_MAPPING[key] - self._agent_os.keyboard_pressed(_key) - self._agent_os.keyboard_release(_key) + def _key(self, keystroke: str) -> None: + keyboard_param = KeyboardParam.from_xdotool(keystroke) + self._agent_os.keyboard_pressed( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) + self._agent_os.keyboard_release( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) @validate_call - def _keyboard_pressed(self, key: Key) -> None: - _key = KEYS_MAPPING[key] - self._agent_os.keyboard_pressed(_key) + def _keyboard_pressed(self, keystroke: str) -> None: + keyboard_param = KeyboardParam.from_xdotool(keystroke) + self._agent_os.keyboard_pressed( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) @validate_call - def _keyboard_released(self, key: Key) -> None: - _key = KEYS_MAPPING[key] - self._agent_os.keyboard_release(_key) + def _keyboard_released(self, keystroke: str) -> None: + keyboard_param = KeyboardParam.from_xdotool(keystroke) + self._agent_os.keyboard_release( + key=keyboard_param.key, modifier_keys=keyboard_param.modifier_keys + ) - def _scale_coordinates_back( - self, - coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]], - ) -> tuple[int, int]: + def _get_real_screen_resolution(self) -> tuple[int, int]: if self._real_screen_width is None or self._real_screen_height is None: screenshot = self._agent_os.screenshot() self._real_screen_width = screenshot.width self._real_screen_height = screenshot.height + return self._real_screen_width, self._real_screen_height + + def _scale_coordinates_back( + self, + coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]], + ) -> tuple[int, int]: + real_screen_width, real_screen_height = self._get_real_screen_resolution() x, y = scale_coordinates_back( coordinate[0], coordinate[1], - self._real_screen_width, # - self._real_screen_height, + real_screen_width, + real_screen_height, self._width, self._height, ) @@ -340,7 +416,7 @@ def to_params( @override @validate_call - def __call__( + def __call__( # noqa: C901 self, action: Action20250124, text: str | None = None, @@ -352,6 +428,8 @@ def __call__( key: str | None = None, # maybe not all keys supported ) -> Image.Image | None: match action: + case "hold_key": + self._hold_key(keystroke=text, duration=duration) # type: ignore[arg-type] case "left_mouse_down": self._agent_os.mouse_down("left") case "left_mouse_up": @@ -364,12 +442,62 @@ def __call__( self._click("middle", coordinate=coordinate, key=key) case "double_click": self._click("left", count=2, coordinate=coordinate, key=key) + case "scroll": + self._scroll( + scroll_direction=scroll_direction, # type: ignore[arg-type] + scroll_amount=scroll_amount, # type: ignore[arg-type] + text=text, + coordinate=coordinate, + ) + return self._screenshot() case "triple_click": self._click("left", count=3, coordinate=coordinate, key=key) + case "wait": + self._wait(duration=duration) # type: ignore[arg-type] case _: return super().__call__(action, text, coordinate) return None + @validate_call + def _hold_key( + self, keystroke: str, duration: Annotated[float, Field(ge=0.0, le=100.0)] + ) -> None: + self._keyboard_pressed(keystroke=keystroke) + time.sleep(duration) + self._keyboard_released(keystroke=keystroke) + + @validate_call + def _scroll( + self, + scroll_direction: ScrollDirection, + scroll_amount: Annotated[int, Field(ge=0)], + text: str | None = None, + coordinate: tuple[Annotated[int, Field(ge=0)], Annotated[int, Field(ge=0)]] + | None = None, + ) -> None: + real_screen_width, real_screen_height = self._get_real_screen_resolution() + x = int(RELATIVE_SCROLL_FACTOR * scroll_amount * real_screen_width) + y = int(RELATIVE_SCROLL_FACTOR * scroll_amount * real_screen_height) + if coordinate is not None: + self._mouse_move(coordinate) + if text is not None: + self._keyboard_pressed(text) + match scroll_direction: + case "up": + self._agent_os.mouse_scroll(0, y) + case "down": + self._agent_os.mouse_scroll(0, -y) + case "left": + self._agent_os.mouse_scroll(x, 0) + case "right": + self._agent_os.mouse_scroll(-x, 0) + if text is not None: + self._keyboard_released(text) + + @validate_call + def _wait(self, duration: Annotated[float, Field(ge=0.0, le=100.0)]) -> None: + time.sleep(duration) + def _click( self, button: Literal["left", "right", "middle"], @@ -381,7 +509,7 @@ def _click( if coordinate is not None: self._mouse_move(coordinate) if key is not None: - self._keyboard_pressed(key) # type: ignore[arg-type] + self._keyboard_pressed(keystroke=key) self._agent_os.click(button, count) if key is not None: - self._keyboard_released(key) # type: ignore[arg-type] + self._keyboard_released(keystroke=key) diff --git a/src/askui/tools/playwright/__init__.py b/src/askui/tools/playwright/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/askui/tools/playwright/agent_os.py b/src/askui/tools/playwright/agent_os.py new file mode 100644 index 00000000..21d3aeeb --- /dev/null +++ b/src/askui/tools/playwright/agent_os.py @@ -0,0 +1,462 @@ +from __future__ import annotations + +import io +import subprocess +from typing import Literal + +from PIL import Image +from playwright.sync_api import ( + Browser, + BrowserContext, + BrowserType, + Page, + Playwright, + ViewportSize, + sync_playwright, +) +from typing_extensions import override + +from ..agent_os import AgentOs, InputEvent, ModifierKey, PcKey + + +class PlaywrightAgentOs(AgentOs): + """ + Playwright-based implementation of AgentOs. + + This implementation uses Playwright's Python SDK to control browser automation + and simulate user interactions. It provides mouse control, keyboard input, + and screen capture functionality through a browser context. + + Args: + browser_type (Literal["chromium", "firefox", "webkit"], optional): The browser + type to use. Defaults to `"chromium"`. + headless (bool, optional): Whether to run the browser in headless mode. + Defaults to `False`. + viewport_size (ViewportSize | None, optional): The viewport size. + Defaults to `None` (uses default). + slow_mo (int, optional): Slows down Playwright operations by the specified + amount of milliseconds. Defaults to `0`. + install_browser (bool, optional): Whether to install browser on connection. + Defaults to `True`. + install_dependencies (bool, optional): Whether to install system dependencies + (requires root permissions). Defaults to `False`. + """ + + def __init__( + self, + browser_type: Literal["chromium", "firefox", "webkit"] = "chromium", + headless: bool = False, + viewport_size: ViewportSize | None = None, + slow_mo: int = 0, + install_browser: bool = True, + install_dependencies: bool = False, + ) -> None: + self._browser_type = browser_type + self._headless = headless + self._viewport_size = viewport_size + self._slow_mo = slow_mo + self._install_browser = install_browser + self._install_dependencies = install_dependencies + + # Playwright objects + self._playwright: Playwright | None = None + self._browser: Browser | None = None + self._context: BrowserContext | None = None + self._page: Page | None = None + + # Event listening state + self._listening = False + self._event_queue: list[InputEvent] = [] + + def _install_playwright_browser(self) -> None: + """Install Playwright browser if requested.""" + if not self._install_browser: + return + + try: + # Install the specific browser type + subprocess.run( + ["playwright", "install", self._browser_type], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + error_msg = f"Failed to install {self._browser_type} browser: {e}" + raise RuntimeError(error_msg) from e + except FileNotFoundError as e: + error_msg = ( + "Playwright CLI not found. Install with `pip install playwright`" + ) + raise RuntimeError(error_msg) from e + + def _install_system_dependencies(self) -> None: + """Install system dependencies if requested (requires root permissions).""" + if not self._install_dependencies: + return + + try: + # Install system dependencies + subprocess.run( + ["playwright", "install-deps"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as e: + error_msg = f"Failed to install system dependencies: {e}" + raise RuntimeError(error_msg) from e + except FileNotFoundError as e: + error_msg = ( + "Playwright CLI not found. Install with `pip install playwright`" + ) + raise RuntimeError(error_msg) from e + + @override + def connect(self) -> None: + """Establishes a synchronous connection to the browser.""" + + # Install browser and dependencies if requested + if self._install_dependencies: + self._install_system_dependencies() + + if self._install_browser: + self._install_playwright_browser() + + self._playwright = sync_playwright().start() + browser_launcher: BrowserType = getattr(self._playwright, self._browser_type) + self._browser = browser_launcher.launch( + headless=self._headless, + slow_mo=self._slow_mo, + ) + self._context = self._browser.new_context( + viewport=self._viewport_size, + ) + + self._page = self._context.new_page() + # Navigate to a blank page to ensure we have a working page + self._page.goto("data:text/html,") + + @override + def disconnect(self) -> None: + """Terminates the connection to the browser.""" + if self._listening: + self.stop_listening() + + if self._page: + self._page.close() + self._page = None + + if self._context: + self._context.close() + self._context = None + + if self._browser: + self._browser.close() + self._browser = None + + if self._playwright: + self._playwright.stop() + self._playwright = None + + @override + def screenshot(self, report: bool = True) -> Image.Image: + """ + Captures a screenshot of the current page. + + Args: + report (bool, optional): Whether to include the screenshot in + reporting. Defaults to `True`. + + Returns: + Image.Image: A PIL Image object containing the screenshot. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + screenshot_bytes = self._page.screenshot() + return Image.open(io.BytesIO(screenshot_bytes)) + + @override + def mouse_move(self, x: int, y: int) -> None: + """ + Moves the mouse cursor to specified coordinates on the page. + + Args: + x (int): The horizontal coordinate (in pixels) to move to. + y (int): The vertical coordinate (in pixels) to move to. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.move(x, y) + + @override + def type(self, text: str, typing_speed: int = 50) -> None: + """ + Simulates typing text as if entered on a keyboard. + + Args: + text (str): The text to be typed. + typing_speed (int, optional): The speed of typing in characters per + minute. Defaults to `50`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + # Convert typing speed from CPM to delay between characters + delay = (60 / typing_speed) * 1000 if typing_speed > 0 else 0 + self._page.keyboard.type(text, delay=delay) + + @override + def click( + self, button: Literal["left", "middle", "right"] = "left", count: int = 1 + ) -> None: + """ + Simulates clicking a mouse button. + + Args: + button (Literal["left", "middle", "right"], optional): The mouse + button to click. Defaults to `"left"`. + count (int, optional): Number of times to click. Defaults to `1`. + """ + for _ in range(count): + self.mouse_down(button) + self.mouse_up(button) + + @override + def mouse_down(self, button: Literal["left", "middle", "right"] = "left") -> None: + """ + Simulates pressing and holding a mouse button. + + Args: + button (Literal["left", "middle", "right"], optional): The mouse + button to press. Defaults to `"left"`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.down(button=button) + + @override + def mouse_up(self, button: Literal["left", "middle", "right"] = "left") -> None: + """ + Simulates releasing a mouse button. + + Args: + button (Literal["left", "middle", "right"], optional): The mouse + button to release. Defaults to `"left"`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.up(button=button) + + @override + def mouse_scroll(self, x: int, y: int) -> None: + """ + Simulates scrolling the mouse wheel. + + Args: + x (int): The horizontal scroll amount. Positive values scroll right, + negative values scroll left. + y (int): The vertical scroll amount. Positive values scroll down, + negative values scroll up. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.mouse.wheel(delta_x=x, delta_y=y) + + @override + def keyboard_pressed( + self, key: PcKey | ModifierKey, modifier_keys: list[ModifierKey] | None = None + ) -> None: + """ + Simulates pressing and holding a keyboard key. + + Args: + key (PcKey | ModifierKey): The key to press. + modifier_keys (list[ModifierKey] | None, optional): List of modifier keys to + press along with the main key. Defaults to `None`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + # Press modifier keys first + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.down(self._convert_key(modifier)) + + # Press the main key + self._page.keyboard.down(self._convert_key(key)) + + @override + def keyboard_release( + self, key: PcKey | ModifierKey, modifier_keys: list[ModifierKey] | None = None + ) -> None: + """ + Simulates releasing a keyboard key. + + Args: + key (PcKey | ModifierKey): The key to release. + modifier_keys (list[ModifierKey] | None, optional): List of modifier keys to + release along with the main key. Defaults to `None`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + # Release the main key first + self._page.keyboard.up(self._convert_key(key)) + + # Release modifier keys + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.up(self._convert_key(modifier)) + + @override + def keyboard_tap( + self, + key: PcKey | ModifierKey, + modifier_keys: list[ModifierKey] | None = None, + count: int = 1, + ) -> None: + """ + Simulates pressing and immediately releasing a keyboard key. + + Args: + key (PcKey | ModifierKey): The key to tap. + modifier_keys (list[ModifierKey] | None, optional): List of modifier keys to + press along with the main key. Defaults to `None`. + count (int, optional): The number of times to tap the key. Defaults to `1`. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + for _ in range(count): + # Press modifier keys first + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.down(self._convert_key(modifier)) + + # Press and release the main key + self._page.keyboard.press(self._convert_key(key)) + + # Release modifier keys + if modifier_keys: + for modifier in modifier_keys: + self._page.keyboard.up(self._convert_key(modifier)) + + def _convert_key(self, key: PcKey | ModifierKey) -> str: + """ + Convert our key format to Playwright's key format. + + Args: + key (PcKey | ModifierKey): The key to convert. + + Returns: + str: The Playwright-compatible key string. + """ + # Map our modifier keys to Playwright format + modifier_map: dict[PcKey | ModifierKey, str] = { + "command": "Meta", + "alt": "Alt", + "control": "Control", + "shift": "Shift", + "right_shift": "Shift", + } + + if key in modifier_map: + return modifier_map[key] + + # For regular keys, Playwright uses similar format + # but some keys might need conversion + key_map: dict[PcKey | ModifierKey, str] = { + "backspace": "Backspace", + "delete": "Delete", + "enter": "Enter", + "tab": "Tab", + "escape": "Escape", + "up": "ArrowUp", + "down": "ArrowDown", + "right": "ArrowRight", + "left": "ArrowLeft", + "home": "Home", + "end": "End", + "pageup": "PageUp", + "pagedown": "PageDown", + "space": " ", + } + + if key in key_map: + return key_map[key] + + # Function keys + if key.startswith("f") and key[1:].isdigit(): + return key.upper() + + # For most other keys, return as-is + return key + + # --- Extra browser-oriented actions --- + def goto(self, url: str) -> None: + """ + Navigate to a specific URL. + + Args: + url (str): The URL to navigate to. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.goto(url) + + def back(self) -> None: + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.go_back() + + def forward(self) -> None: + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + self._page.go_forward() + + def get_page_title(self) -> str: + """ + Get the title of the current page. + + Returns: + str: The page title. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + return self._page.title() + + def get_page_url(self) -> str: + """ + Get the URL of the current page. + + Returns: + str: The current page URL. + """ + if not self._page: + error_msg = "No active page. Call connect() first." + raise RuntimeError(error_msg) + + return self._page.url diff --git a/src/askui/tools/playwright/tools.py b/src/askui/tools/playwright/tools.py new file mode 100644 index 00000000..96aa7843 --- /dev/null +++ b/src/askui/tools/playwright/tools.py @@ -0,0 +1,140 @@ +from typing_extensions import override + +from askui.models.shared.tools import Tool +from askui.tools.playwright.agent_os import PlaywrightAgentOs + + +class PlaywrightGotoTool(Tool): + """ + Navigates to a specific URL in the browser. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_goto_tool", + description=( + """ + Navigates the browser to a specific URL. + This will load the webpage at the given URL and make it the current + page. The browser will wait for the page to load completely before + proceeding. + """ + ), + input_schema={ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": ( + "The URL to navigate to. Must be a valid URL including " + "the protocol (e.g., 'https://example.com')." + ), + }, + }, + "required": ["url"], + }, + ) + self._agent_os = agent_os + + @override + def __call__(self, url: str) -> str: + self._agent_os.goto(url) + return f"Navigated to: {url}" + + +class PlaywrightBackTool(Tool): + """ + Navigates back to the previous page in the browser history. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_back_tool", + description=( + """ + Navigates back to the previous page in the browser history. + This is equivalent to clicking the back button in a browser. + If there is no previous page in the history, this action will have no + effect. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + self._agent_os.back() + return "Navigated back to the previous page" + + +class PlaywrightForwardTool(Tool): + """ + Navigates forward to the next page in the browser history. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_forward_tool", + description=( + """ + Navigates forward to the next page in the browser history. + This is equivalent to clicking the forward button in a browser. + If there is no next page in the history, this action will have no + effect. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + self._agent_os.forward() + return "Navigated forward to the next page" + + +class PlaywrightGetPageTitleTool(Tool): + """ + Gets the title of the current page. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_get_page_title_tool", + description=( + """ + Retrieves the title of the currently loaded webpage. + The title is typically displayed in the browser tab and represents + the main heading or name of the page content. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + title = self._agent_os.get_page_title() + return f"Page title: {title}" + + +class PlaywrightGetPageUrlTool(Tool): + """ + Gets the URL of the current page. + """ + + def __init__(self, agent_os: PlaywrightAgentOs) -> None: + super().__init__( + name="playwright_get_page_url_tool", + description=( + """ + Retrieves the URL of the currently loaded webpage. + This returns the full URL including protocol, domain, path, and query + parameters. + """ + ), + ) + self._agent_os = agent_os + + @override + def __call__(self) -> str: + url = self._agent_os.get_page_url() + return f"Current page URL: {url}" diff --git a/src/askui/web_agent.py b/src/askui/web_agent.py new file mode 100644 index 00000000..24d69e61 --- /dev/null +++ b/src/askui/web_agent.py @@ -0,0 +1,98 @@ +import logging +from datetime import datetime, timezone + +from pydantic import ConfigDict, validate_call +from typing_extensions import override + +from askui.agent import VisionAgent +from askui.container import telemetry +from askui.models.shared.settings import ( + COMPUTER_USE_20241022_BETA_FLAG, + COMPUTER_USE_20250124_BETA_FLAG, + ActSettings, + MessageSettings, +) +from askui.tools.exception_tool import ExceptionTool +from askui.tools.playwright.agent_os import PlaywrightAgentOs +from askui.tools.playwright.tools import ( + PlaywrightBackTool, + PlaywrightForwardTool, + PlaywrightGetPageTitleTool, + PlaywrightGetPageUrlTool, + PlaywrightGotoTool, +) +from askui.tools.toolbox import AgentToolbox + +from .models import ModelComposition +from .models.models import ModelChoice, ModelName, ModelRegistry +from .reporting import Reporter +from .retry import Retry + +_SYSTEM_PROMPT = f""" + +* You are utilizing a webbrowser in full-screen mode. So you are only seeing the content of the currently opened webpage (tab). +* It can be helpful to zoom in/out or scroll down/up so that you can see everything on the page. Make sure to that before deciding something isn't available. +* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. + +""" + +_ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings( + messages=MessageSettings( + model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022.value, + system=_SYSTEM_PROMPT, + betas=[COMPUTER_USE_20241022_BETA_FLAG], + ), +) + +_CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings( + messages=MessageSettings( + model=ModelName.CLAUDE__SONNET__4__20250514.value, + system=_SYSTEM_PROMPT, + betas=[COMPUTER_USE_20250124_BETA_FLAG], + thinking={"type": "enabled", "budget_tokens": 2048}, + ), +) + + +class WebVisionAgent(VisionAgent): + @telemetry.record_call(exclude={"model_router", "reporters", "tools"}) + @validate_call(config=ConfigDict(arbitrary_types_allowed=True)) + def __init__( + self, + log_level: int | str = logging.INFO, + reporters: list[Reporter] | None = None, + model: ModelChoice | ModelComposition | str | None = None, + retry: Retry | None = None, + models: ModelRegistry | None = None, + ) -> None: + agent_os = PlaywrightAgentOs() + tools = AgentToolbox( + agent_os=agent_os, + ) + super().__init__( + log_level=log_level, + reporters=reporters, + model=model, + retry=retry, + models=models, + tools=tools, + act_tools=[ + PlaywrightGotoTool(agent_os=agent_os), + PlaywrightBackTool(agent_os=agent_os), + PlaywrightForwardTool(agent_os=agent_os), + PlaywrightGetPageTitleTool(agent_os=agent_os), + PlaywrightGetPageUrlTool(agent_os=agent_os), + ExceptionTool(), + ], + ) + + @override + def _get_default_settings_for_act(self, model_choice: str) -> ActSettings: + match model_choice: + case ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: + return _ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS + case ModelName.CLAUDE__SONNET__4__20250514 | ModelName.ASKUI: + return _CLAUDE__SONNET__4__20250514__ACT_SETTINGS + case _: + return ActSettings() diff --git a/src/chat/api/assistants/seeds.py b/src/chat/api/assistants/seeds.py deleted file mode 100644 index 5d7bf97b..00000000 --- a/src/chat/api/assistants/seeds.py +++ /dev/null @@ -1,19 +0,0 @@ -from chat.api.assistants.models import Assistant - -ASKUI_VISION_AGENT = Assistant( - id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmca", - name="AskUI Vision Agent", - avatar="data:image/svg+xml;base64,PHN2ZyAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogIHdpZHRoPSIyNCIKICBoZWlnaHQ9IjI0IgogIHZpZXdCb3g9IjAgMCAyNCAyNCIKICBmaWxsPSJub25lIgogIHN0cm9rZT0iIzAwMCIgc3R5bGU9ImJhY2tncm91bmQtY29sb3I6ICNmZmY7IGJvcmRlci1yYWRpdXM6IDJweCIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNMTIgOFY0SDgiIC8+CiAgPHJlY3Qgd2lkdGg9IjE2IiBoZWlnaHQ9IjEyIiB4PSI0IiB5PSI4IiByeD0iMiIgLz4KICA8cGF0aCBkPSJNMiAxNGgyIiAvPgogIDxwYXRoIGQ9Ik0yMCAxNGgyIiAvPgogIDxwYXRoIGQ9Ik0xNSAxM3YyIiAvPgogIDxwYXRoIGQ9Ik05IDEzdjIiIC8+Cjwvc3ZnPgo=", -) - -HUMAN_DEMONSTRATION_AGENT = Assistant( - id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcb", - name="Human DemonstrationAgent", - avatar="data:image/svg+xml;base64,PHN2ZyAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogIHdpZHRoPSIyNCIKICBoZWlnaHQ9IjI0IgogIHZpZXdCb3g9IjAgMCAyNCAyNCIKICBmaWxsPSJub25lIgogIHN0cm9rZT0iIzAwMCIgc3R5bGU9ImJhY2tncm91bmQtY29sb3I6ICNmZmY7IGJvcmRlci1yYWRpdXM6IDJweCIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNMTkgMjF2LTJhNCA0IDAgMCAwLTQtNEg5YTQgNCAwIDAgMC00IDR2MiIgLz4KICA8Y2lyY2xlIGN4PSIxMiIgY3k9IjciIHI9IjQiIC8+Cjwvc3ZnPgo=", -) - -ANDROID_VISION_AGENT = Assistant( - id="asst_78da09fbf1ed43c7826fb1686f89f541", - name="AskUI Android Vision Agent", - avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciICB2aWV3Qm94PSIwIDAgNDggNDgiIHdpZHRoPSIyNXB4IiBoZWlnaHQ9IjI1cHgiPjxwYXRoIGQ9Ik0gMzIuNTE5NTMxIDAuOTgyNDIxODggQSAxLjUwMDE1IDEuNTAwMTUgMCAwIDAgMzEuMjc5Mjk3IDEuNjI4OTA2MiBMIDI5LjQzNzUgNC4yMDg5ODQ0IEMgMjcuNzgwMjA3IDMuNDQwNTAwNiAyNS45NDE5MSAzIDI0IDMgQyAyMi4wNTgwOSAzIDIwLjIxOTc5MyAzLjQ0MDUwMDYgMTguNTYyNSA0LjIwODk4NDQgTCAxNi43MjA3MDMgMS42Mjg5MDYyIEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDE1LjQzNTU0NyAwLjk4NDM3NSBBIDEuNTAwMTUgMS41MDAxNSAwIDAgMCAxNC4yNzkyOTcgMy4zNzEwOTM4IEwgMTYgNS43NzkyOTY5IEMgMTMuMTM4ODk2IDguMDI0NzU4MiAxMS4yNDUxODggMTEuNDM2MDIgMTEuMDM1MTU2IDE1LjI5MTAxNiBDIDEwLjU1MzI2IDE1LjExMjgxOCAxMC4wNDA0MDggMTUgOS41IDE1IEMgNy4wMzI0OTkxIDE1IDUgMTcuMDMyNDk5IDUgMTkuNSBMIDUgMzAuNSBDIDUgMzIuOTY3NTAxIDcuMDMyNDk5MSAzNSA5LjUgMzUgQyAxMC4wOTAzMTMgMzUgMTAuNjUzMjI5IDM0Ljg3ODc0OSAxMS4xNzE4NzUgMzQuNjY3OTY5IEMgMTEuNTY0MzM2IDM2LjA3MjEwNSAxMi42MzEzMzMgMzcuMTk2OTk0IDE0IDM3LjY5MzM1OSBMIDE0IDQxLjUgQyAxNCA0My45Njc1MDEgMTYuMDMyNDk5IDQ2IDE4LjUgNDYgQyAyMC45Njc1MDEgNDYgMjMgNDMuOTY3NTAxIDIzIDQxLjUgTCAyMyAzOCBMIDI1IDM4IEwgMjUgNDEuNSBDIDI1IDQzLjk2NzUwMSAyNy4wMzI0OTkgNDYgMjkuNSA0NiBDIDMxLjk2NzUwMSA0NiAzNCA0My45Njc1MDEgMzQgNDEuNSBMIDM0IDM3LjY5MzM1OSBDIDM1LjM2ODY2NyAzNy4xOTY5OTQgMzYuNDM1NjY0IDM2LjA3MjEwNSAzNi44MjgxMjUgMzQuNjY3OTY5IEMgMzcuMzQ2NzcxIDM0Ljg3ODc0OSAzNy45MDk2ODcgMzUgMzguNSAzNSBDIDQwLjk2NzUwMSAzNSA0MyAzMi45Njc1MDEgNDMgMzAuNSBMIDQzIDE5LjUgQyA0MyAxNy4wMzI0OTkgNDAuOTY3NTAxIDE1IDM4LjUgMTUgQyAzNy45NTk1OTIgMTUgMzcuNDQ2NzQgMTUuMTEyODE4IDM2Ljk2NDg0NCAxNS4yOTEwMTYgQyAzNi43NTQ4MTIgMTEuNDM2MDIgMzQuODYxMTA0IDguMDI0NzU4MiAzMiA1Ljc3OTI5NjkgTCAzMy43MjA3MDMgMy4zNzEwOTM4IEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDMyLjUxOTUzMSAwLjk4MjQyMTg4IHogTSAyNCA2IEMgMjkuMTg1MTI3IDYgMzMuMjc2NzI3IDkuOTU3NTEzMiAzMy43OTg4MjggMTUgTCAxNC4yMDExNzIgMTUgQyAxNC43MjMyNzMgOS45NTc1MTMyIDE4LjgxNDg3MyA2IDI0IDYgeiBNIDE5LjUgMTAgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTMgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTAgeiBNIDI4LjUgMTAgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTMgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTAgeiBNIDkuNSAxOCBDIDEwLjM0NjQ5OSAxOCAxMSAxOC42NTM1MDEgMTEgMTkuNSBMIDExIDMwLjUgQyAxMSAzMS4zNDY0OTkgMTAuMzQ2NDk5IDMyIDkuNSAzMiBDIDguNjUzNTAwOSAzMiA4IDMxLjM0NjQ5OSA4IDMwLjUgTCA4IDE5LjUgQyA4IDE4LjY1MzUwMSA4LjY1MzUwMDkgMTggOS41IDE4IHogTSAxNCAxOCBMIDM0IDE4IEwgMzQgMTkuNSBMIDM0IDMwLjUgTCAzNCAzMy41IEMgMzQgMzQuMzQ2NDk5IDMzLjM0NjQ5OSAzNSAzMi41IDM1IEwgMjUgMzUgTCAyMyAzNSBMIDE1LjUgMzUgQyAxNC42NTM1MDEgMzUgMTQgMzQuMzQ2NDk5IDE0IDMzLjUgTCAxNCAzMC41IEwgMTQgMTkuNSBMIDE0IDE4IHogTSAzOC41IDE4IEMgMzkuMzQ2NDk5IDE4IDQwIDE4LjY1MzUwMSA0MCAxOS41IEwgNDAgMzAuNSBDIDQwIDMxLjM0NjQ5OSAzOS4zNDY0OTkgMzIgMzguNSAzMiBDIDM3LjY1MzUwMSAzMiAzNyAzMS4zNDY0OTkgMzcgMzAuNSBMIDM3IDE5LjUgQyAzNyAxOC42NTM1MDEgMzcuNjUzNTAxIDE4IDM4LjUgMTggeiBNIDE3IDM4IEwgMjAgMzggTCAyMCA0MS41IEMgMjAgNDIuMzQ2NDk5IDE5LjM0NjQ5OSA0MyAxOC41IDQzIEMgMTcuNjUzNTAxIDQzIDE3IDQyLjM0NjQ5OSAxNyA0MS41IEwgMTcgMzggeiBNIDI4IDM4IEwgMzEgMzggTCAzMSA0MS41IEMgMzEgNDIuMzQ2NDk5IDMwLjM0NjQ5OSA0MyAyOS41IDQzIEMgMjguNjUzNTAxIDQzIDI4IDQyLjM0NjQ5OSAyOCA0MS41IEwgMjggMzggeiIvPjwvc3ZnPg==", -) diff --git a/src/chat/api/runs/runner/events/__init__.py b/src/chat/api/runs/runner/events/__init__.py deleted file mode 100644 index 89dabf59..00000000 --- a/src/chat/api/runs/runner/events/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from chat.api.runs.runner.events.done_events import DoneEvent -from chat.api.runs.runner.events.error_events import ErrorEvent -from chat.api.runs.runner.events.event_base import EventBase -from chat.api.runs.runner.events.events import Events -from chat.api.runs.runner.events.message_events import MessageEvent -from chat.api.runs.runner.events.run_events import RunEvent - -__all__ = [ - "DoneEvent", - "ErrorEvent", - "EventBase", - "Events", - "MessageEvent", - "RunEvent", -] diff --git a/src/chat/api/runs/runner/events/events.py b/src/chat/api/runs/runner/events/events.py deleted file mode 100644 index b11e2b12..00000000 --- a/src/chat/api/runs/runner/events/events.py +++ /dev/null @@ -1,6 +0,0 @@ -from chat.api.runs.runner.events.done_events import DoneEvent -from chat.api.runs.runner.events.error_events import ErrorEvent -from chat.api.runs.runner.events.message_events import MessageEvent -from chat.api.runs.runner.events.run_events import RunEvent - -Events = DoneEvent | ErrorEvent | MessageEvent | RunEvent diff --git a/src/chat/ui/.gitignore b/src/chat/ui/.gitignore deleted file mode 100644 index 5ab2f9bd..00000000 --- a/src/chat/ui/.gitignore +++ /dev/null @@ -1,33 +0,0 @@ -# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. - -# dependencies -/node_modules -/.pnp -.pnp.js -.yarn/install-state.gz - -# testing -/coverage - -# next.js -/.next/ -/out/ - -# production -/build - -# misc -.DS_Store -*.pem - -# debug -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# vercel -.vercel - -# typescript -*.tsbuildinfo -next-env.d.ts diff --git a/src/chat/ui/app/globals.css b/src/chat/ui/app/globals.css deleted file mode 100644 index 20b1c1db..00000000 --- a/src/chat/ui/app/globals.css +++ /dev/null @@ -1,82 +0,0 @@ -@tailwind base; -@tailwind components; -@tailwind utilities; - -:root { - --foreground-rgb: 0, 0, 0; - --background-start-rgb: 214, 219, 220; - --background-end-rgb: 255, 255, 255; -} - -@media (prefers-color-scheme: dark) { - :root { - --foreground-rgb: 255, 255, 255; - --background-start-rgb: 0, 0, 0; - --background-end-rgb: 0, 0, 0; - } -} - -@layer base { - :root { - --background: 0 0% 100%; - --foreground: 0 0% 3.9%; - --card: 0 0% 100%; - --card-foreground: 0 0% 3.9%; - --popover: 0 0% 100%; - --popover-foreground: 0 0% 3.9%; - --primary: 0 0% 9%; - --primary-foreground: 0 0% 98%; - --secondary: 0 0% 96.1%; - --secondary-foreground: 0 0% 9%; - --muted: 0 0% 96.1%; - --muted-foreground: 0 0% 45.1%; - --accent: 0 0% 96.1%; - --accent-foreground: 0 0% 9%; - --destructive: 0 84.2% 60.2%; - --destructive-foreground: 0 0% 98%; - --border: 0 0% 89.8%; - --input: 0 0% 89.8%; - --ring: 0 0% 3.9%; - --chart-1: 12 76% 61%; - --chart-2: 173 58% 39%; - --chart-3: 197 37% 24%; - --chart-4: 43 74% 66%; - --chart-5: 27 87% 67%; - --radius: 0.5rem; - } - .dark { - --background: 0 0% 3.9%; - --foreground: 0 0% 98%; - --card: 0 0% 3.9%; - --card-foreground: 0 0% 98%; - --popover: 0 0% 3.9%; - --popover-foreground: 0 0% 98%; - --primary: 0 0% 98%; - --primary-foreground: 0 0% 9%; - --secondary: 0 0% 14.9%; - --secondary-foreground: 0 0% 98%; - --muted: 0 0% 14.9%; - --muted-foreground: 0 0% 63.9%; - --accent: 0 0% 14.9%; - --accent-foreground: 0 0% 98%; - --destructive: 0 62.8% 30.6%; - --destructive-foreground: 0 0% 98%; - --border: 0 0% 14.9%; - --input: 0 0% 14.9%; - --ring: 0 0% 83.1%; - --chart-1: 220 70% 50%; - --chart-2: 160 60% 45%; - --chart-3: 30 80% 55%; - --chart-4: 280 65% 60%; - --chart-5: 340 75% 55%; - } -} - -@layer base { - * { - @apply border-border; - } - body { - @apply bg-background text-foreground; - } -} diff --git a/src/chat/ui/app/layout.tsx b/src/chat/ui/app/layout.tsx deleted file mode 100644 index 4ec63bdc..00000000 --- a/src/chat/ui/app/layout.tsx +++ /dev/null @@ -1,31 +0,0 @@ -import "./globals.css"; -import type { Metadata } from "next"; -import { Inter } from "next/font/google"; -import { ThemeProvider } from "next-themes"; - -const inter = Inter({ subsets: ["latin"] }); - -export const metadata: Metadata = { - title: "AskUI Chat", -}; - -export default function RootLayout({ - children, -}: { - children: React.ReactNode; -}) { - return ( - - - - {children} - - - - ); -} diff --git a/src/chat/ui/app/page.tsx b/src/chat/ui/app/page.tsx deleted file mode 100644 index 2f8a007a..00000000 --- a/src/chat/ui/app/page.tsx +++ /dev/null @@ -1,33 +0,0 @@ -"use client"; - -import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; -import { Toaster } from "sonner"; -import { Sidebar } from "@/components/sidebar/sidebar"; -import { ChatContainer } from "@/components/chat/chat-container"; - -const queryClient = new QueryClient({ - defaultOptions: { - queries: { - staleTime: 1000 * 60 * 5, // 5 minutes - retry: 1, - }, - }, -}); - -function ChatApp() { - return ( -
- - -
- ); -} - -export default function Home() { - return ( - - - - - ); -} diff --git a/src/chat/ui/components.json b/src/chat/ui/components.json deleted file mode 100644 index c5974621..00000000 --- a/src/chat/ui/components.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "$schema": "https://ui.shadcn.com/schema.json", - "style": "default", - "rsc": true, - "tsx": true, - "tailwind": { - "config": "tailwind.config.ts", - "css": "app/globals.css", - "baseColor": "neutral", - "cssVariables": true, - "prefix": "" - }, - "aliases": { - "components": "@/components", - "utils": "@/lib/utils", - "ui": "@/components/ui", - "lib": "@/lib", - "hooks": "@/hooks" - } -} diff --git a/src/chat/ui/components/chat/chat-container.tsx b/src/chat/ui/components/chat/chat-container.tsx deleted file mode 100644 index 221d5460..00000000 --- a/src/chat/ui/components/chat/chat-container.tsx +++ /dev/null @@ -1,23 +0,0 @@ -"use client"; - -import { useChatStore } from "@/lib/store"; -import { EmptyState } from "./empty-state"; -import { ChatHeader } from "./chat-header"; -import { MessageList } from "./message-list"; -import { ChatInput } from "./chat-input"; - -export function ChatContainer() { - const { selectedThread } = useChatStore(); - - if (!selectedThread) { - return ; - } - - return ( -
- - - -
- ); -} diff --git a/src/chat/ui/components/chat/chat-header.tsx b/src/chat/ui/components/chat/chat-header.tsx deleted file mode 100644 index 16da6302..00000000 --- a/src/chat/ui/components/chat/chat-header.tsx +++ /dev/null @@ -1,118 +0,0 @@ -"use client"; - -import { Bot, Zap } from "lucide-react"; -import { useQuery } from "@tanstack/react-query"; -import { - Select, - SelectContent, - SelectItem, - SelectTrigger, -} from "@/components/ui/select"; -import { Avatar, AvatarFallback, AvatarImage } from "@/components/ui/avatar"; -import { Badge } from "@/components/ui/badge"; -import { Skeleton } from "@/components/ui/skeleton"; -import { useChatStore } from "@/lib/store"; -import { apiClient } from "@/lib/api"; -import { HUMAN_DEMONSTRATION_AGENT_ID } from "@/lib/constants"; - -export function ChatHeader() { - const { selectedAssistant, setSelectedAssistant, currentRun } = - useChatStore(); - - const { data: assistantsListResponse, isLoading } = useQuery({ - queryKey: ["assistants"], - queryFn: () => - apiClient.listAssistants().then((response) => { - return { - ...response, - data: response.data.filter( - (a) => a.id !== HUMAN_DEMONSTRATION_AGENT_ID - ), - }; - }), - }); - - const handleAssistantChange = (assistantId: string) => { - const assistant = assistantsListResponse?.data.find( - (a) => a.id === assistantId - ); - if (assistant) { - setSelectedAssistant(assistant); - } - }; - - if (isLoading) { - return ( -
-
- - -
- -
- ); - } - - return ( -
-
- -
- - {currentRun && ( - - - {currentRun.status === "in_progress" - ? "Thinking..." - : currentRun.status} - - )} -
- ); -} diff --git a/src/chat/ui/components/chat/chat-input.tsx b/src/chat/ui/components/chat/chat-input.tsx deleted file mode 100644 index 32ef0ce6..00000000 --- a/src/chat/ui/components/chat/chat-input.tsx +++ /dev/null @@ -1,520 +0,0 @@ -"use client"; - -import { useState, useRef, useCallback } from "react"; -import { - Send, - Plus, - X, - Paperclip, - Square, - MousePointerClick, -} from "lucide-react"; -import { motion, AnimatePresence } from "framer-motion"; -import { useMutation, useQueryClient } from "@tanstack/react-query"; -import { toast } from "sonner"; -import { Button } from "@/components/ui/button"; -import { Textarea } from "@/components/ui/textarea"; -import { - Tooltip, - TooltipContent, - TooltipProvider, - TooltipTrigger, -} from "@/components/ui/tooltip"; -import { useChatStore } from "@/lib/store"; -import { apiClient } from "@/lib/api"; -import { Event } from "@/lib/types"; -import { HUMAN_DEMONSTRATION_AGENT_ID } from "@/lib/constants"; - -interface AttachedFile { - id: string; - file: File; - preview: string; - type: "image"; -} - -let buffer = ""; - -const SseSplitterStream = (): TransformStream => - new TransformStream({ - start() {}, - transform(chunk, controller) { - buffer += chunk; - const parts = buffer.split("\n\n"); - buffer = parts.pop()!; // Keep the last partial event in buffer - - for (const part of parts) { - controller.enqueue(part); - } - }, - flush(controller) {}, - }); - -function parseSseMessage(message: string): Event { - const lines = message.split("\n"); - let type = "message"; - const dataLines: string[] = []; - - for (const line of lines) { - if (line.startsWith("event:")) { - type = line.slice(6).trim(); - } else if (line.startsWith("data:")) { - dataLines.push(line.slice(5).trim()); - } - } - - if (dataLines.length === 0) { - throw new Error("No data field in SSE message"); - } - - const rawData = dataLines.join("\n"); - - try { - switch (type) { - case "thread.run.created": - case "thread.run.queued": - case "thread.run.in_progress": - case "thread.run.completed": - case "thread.run.cancelling": - case "thread.run.cancelled": - case "thread.run.failed": - case "thread.run.expired": - return { type, data: JSON.parse(rawData) }; - case "thread.message.created": - return { type, data: JSON.parse(rawData) }; - case "error": - return { type, data: JSON.parse(rawData) }; - case "done": - return { type, data: "[DONE]" }; - default: - throw new Error(`Unknown event type: ${type}`); - } - } catch (e) { - throw new Error( - `Failed to parse SSE data of event "${type}": ${ - e instanceof Error ? e.message : String(e) - }: ${rawData}` - ); - } -} - -export function ChatInput() { - const [message, setMessage] = useState(""); - const [attachedFiles, setAttachedFiles] = useState([]); - const [isDragOver, setIsDragOver] = useState(false); - const [runningAction, setRunningAction] = useState<"send" | "demo" | null>( - null - ); - const textareaRef = useRef(null); - const fileInputRef = useRef(null); - const queryClient = useQueryClient(); - - const { - selectedThread, - selectedAssistant, - currentRun, - setCurrentRun, - appendMessage, - clearMessages, - } = useChatStore(); - - const createMessageMutation = useMutation({ - mutationFn: async (data: { content: any; role: "user" }) => { - if (!selectedThread) throw new Error("No thread selected"); - return apiClient.createMessage(selectedThread.id, data); - }, - onSuccess: () => { - queryClient.invalidateQueries({ - queryKey: ["messages", selectedThread?.id], - }); - }, - onError: (error) => { - toast.error(`Failed to send message: ${error}`); - }, - }); - - const createRunMutation = useMutation({ - mutationFn: async (assistantId: string) => { - if (!selectedThread || !assistantId) { - throw new Error("Thread and assistant required"); - } - - clearMessages(); - const response = await fetch( - `${ - process.env.NEXT_PUBLIC_API_URL || "http://localhost:8000" - }/v1/threads/${selectedThread.id}/runs`, - { - method: "POST", - headers: { - "Content-Type": "application/json", - }, - body: JSON.stringify({ - assistant_id: assistantId, - stream: true, - }), - } - ); - - if (!response.ok) { - throw new Error(`API Error: ${response.status} ${response.statusText}`); - } - - if (!response.body) { - throw new Error("No response body"); - } - - const reader = response.body - .pipeThrough(new TextDecoderStream()) - .pipeThrough(SseSplitterStream()) - .getReader(); - - while (true) { - const { done, value } = await reader.read(); - if (done) break; - const event: Event = parseSseMessage(value); - switch (event.type) { - case "thread.run.created": - case "thread.run.queued": - case "thread.run.in_progress": - case "thread.run.completed": - case "thread.run.cancelling": - case "thread.run.cancelled": - case "thread.run.failed": - setCurrentRun(event.data); - break; - case "thread.run.expired": - setCurrentRun(event.data); - throw new Error("Run expired"); - case "thread.message.created": - appendMessage(event.data); - break; - case "error": - throw new Error(event.data.error.message); - case "done": - setCurrentRun(null); - break; - } - } - }, - onSuccess: () => { - queryClient.invalidateQueries({ - queryKey: ["messages", selectedThread?.id], - }); - setCurrentRun(null); - setRunningAction(null); - }, - onError: (error) => { - toast.error(`Run failed: ${error.message}`); - queryClient.invalidateQueries({ - queryKey: ["messages", selectedThread?.id], - }); - setCurrentRun(null); - setRunningAction(null); - }, - }); - - const handleFileSelect = (files: FileList | null) => { - if (!files) return; - - Array.from(files).forEach((file) => { - if (file.type.startsWith("image/")) { - const reader = new FileReader(); - reader.onload = (e) => { - const newFile: AttachedFile = { - id: Math.random().toString(36).substr(2, 9), - file, - preview: e.target?.result as string, - type: "image", - }; - setAttachedFiles((prev) => [...prev, newFile]); - }; - reader.readAsDataURL(file); - } else { - toast.error("Only image files are supported"); - } - }); - }; - - const handleSubmit = async (e: React.FormEvent) => { - e.preventDefault(); - - if (!selectedThread || !selectedAssistant) { - toast.error("Please select a thread and assistant"); - return; - } - - if (message.trim() || attachedFiles.length > 0) { - const content: any[] = []; - - if (message.trim()) { - content.push({ - type: "text", - text: message.trim(), - }); - } - - attachedFiles.forEach((file) => { - const base64Data = file.preview.split(",")[1]; - content.push({ - type: "image", - source: { - type: "base64", - media_type: file.file.type, - data: base64Data, - }, - }); - }); - - await createMessageMutation.mutateAsync({ - content: - content.length === 1 && content[0].type === "text" - ? content[0].text - : content, - role: "user", - }); - - setMessage(""); - setAttachedFiles([]); - } - - if (!selectedAssistant.id) { - toast.warning( - "Select an assistant and hit the send button again if you want to receive an answer" - ); - return; - } - - setRunningAction("send"); - await createRunMutation.mutateAsync(selectedAssistant.id); - }; - - const handleCancel = () => { - if (currentRun) { - // Cancel the run - apiClient - .cancelRun(currentRun.thread_id, currentRun.id) - .then(() => { - toast.success("Send request to cancel run"); - }) - .catch(() => { - toast.error("Failed to send request to cancel run"); - }); - } - }; - - const handleDemo = async () => { - setRunningAction("demo"); - await createRunMutation.mutateAsync(HUMAN_DEMONSTRATION_AGENT_ID); - }; - - const removeFile = (fileId: string) => { - setAttachedFiles((prev) => prev.filter((f) => f.id !== fileId)); - }; - - const handleDragOver = useCallback((e: React.DragEvent) => { - e.preventDefault(); - setIsDragOver(true); - }, []); - - const handleDragLeave = useCallback((e: React.DragEvent) => { - e.preventDefault(); - setIsDragOver(false); - }, []); - - const handleDrop = useCallback((e: React.DragEvent) => { - e.preventDefault(); - setIsDragOver(false); - handleFileSelect(e.dataTransfer.files); - }, []); - - const isLoading = - createMessageMutation.isPending || createRunMutation.isPending; - - return ( - -
-
- {/* File Attachments */} - - {attachedFiles.length > 0 && ( - - {attachedFiles.map((file) => ( -
- {file.file.name} - -
- ))} -
- )} -
- - {/* Input Area */} -
-