diff --git a/pyproject.toml b/pyproject.toml index 2cca01f2..737e9b37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -180,26 +180,27 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.per-file-ignores] "src/askui/agent.py" = ["E501"] "src/askui/android_agent.py" = ["E501"] -"src/askui/web_agent.py" = ["E501"] -"src/askui/models/shared/android_agent.py" = ["E501"] "src/askui/chat/*" = ["E501", "F401", "F403"] -"src/askui/tools/askui/askui_ui_controller_grpc/*" = ["ALL"] "src/askui/locators/locators.py" = ["E501"] "src/askui/locators/relatable.py" = ["E501", "SLF001"] "src/askui/locators/serializers.py" = ["E501", "SLF001"] "src/askui/models/anthropic/computer_agent.py" = ["E501"] "src/askui/models/askui/ai_element_utils.py" = ["E501"] "src/askui/models/huggingface/spaces_api.py" = ["E501"] +"src/askui/models/shared/android_agent.py" = ["E501"] "src/askui/models/ui_tars_ep/ui_tars_api.py" = ["E501"] +"src/askui/prompts/system.py" = ["E501"] "src/askui/reporting.py" = ["E501"] "src/askui/telemetry/telemetry.py" = ["E501"] +"src/askui/tools/askui/askui_ui_controller_grpc/*" = ["ALL"] "src/askui/utils/image_utils.py" = ["E501"] +"src/askui/web_agent.py" = ["E501"] "tests/*" = ["S101", "PLR2004", "SLF001"] "tests/e2e/agent/test_get.py" = ["E501"] "tests/e2e/agent/test_locate_with_relations.py" = ["E501"] -"tests/unit/locators/test_locators.py" = ["E501"] "tests/unit/locators/serializers/test_askui_locator_serializer.py" = ["E501"] "tests/unit/locators/serializers/test_locator_string_representation.py" = ["E501"] +"tests/unit/locators/test_locators.py" = ["E501"] "tests/unit/utils/test_image_utils.py" = ["E501"] [tool.ruff.lint.flake8-quotes] diff --git a/src/askui/agent.py b/src/askui/agent.py index fd53cd2e..4e35283a 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -1,7 +1,4 @@ import logging -import platform -import sys -from datetime import datetime, timezone from typing import Annotated, Literal, Optional from pydantic import ConfigDict, Field, validate_call @@ -17,6 +14,7 @@ MessageSettings, ) from askui.models.shared.tools import Tool +from askui.prompts.system import COMPUTER_AGENT_SYSTEM_PROMPT from askui.tools.computer import Computer20241022Tool, Computer20250124Tool from askui.tools.exception_tool import ExceptionTool from askui.tools.list_displays_tool import ListDisplaysTool @@ -31,25 +29,10 @@ from .tools import AgentToolbox, ModifierKey, PcKey from .tools.askui import AskUiControllerClient -_SYSTEM_PROMPT = f""" -* You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access. -* When you cannot find something (application window, ui element etc.) on the currently selected/active displa/screen, check the other available displays by listing them and checking which one is currently active and then going through the other displays one by one until you find it or you have checked all of them. -* When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems. -* When viewing a page it can be helpful to zoom out/in so that you can see everything on the page. Either that, or make sure you scroll down/up to see everything before deciding something isn't available. -* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. - - - -* When using Firefox, if a startup wizard appears, IGNORE IT. Do not even click "skip this step". Instead, click on the address bar where it says "Search or enter address", and enter the appropriate search term or URL there. -* If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext to convert it to a text file, and then read that text file directly with your StrReplaceEditTool. -""" # noqa: DTZ002, E501 - - _ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022, - system=_SYSTEM_PROMPT, + system=COMPUTER_AGENT_SYSTEM_PROMPT, betas=[COMPUTER_USE_20241022_BETA_FLAG], ), ) @@ -57,7 +40,7 @@ _CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.CLAUDE__SONNET__4__20250514, - system=_SYSTEM_PROMPT, + system=COMPUTER_AGENT_SYSTEM_PROMPT, betas=[COMPUTER_USE_20250124_BETA_FLAG], thinking={"type": "enabled", "budget_tokens": 2048}, ), diff --git a/src/askui/android_agent.py b/src/askui/android_agent.py index 41b96720..a673c64d 100644 --- a/src/askui/android_agent.py +++ b/src/askui/android_agent.py @@ -8,6 +8,7 @@ from askui.container import telemetry from askui.locators.locators import Locator from askui.models.shared.settings import ActSettings, MessageSettings +from askui.prompts.system import ANDROID_AGENT_SYSTEM_PROMPT from askui.tools.android.agent_os import ANDROID_KEY from askui.tools.android.agent_os_facade import AndroidAgentOsFacade from askui.tools.android.ppadb_agent_os import PpadbAgentOs @@ -29,82 +30,10 @@ from .reporting import CompositeReporter, Reporter from .retry import Retry -_SYSTEM_PROMPT = """ -You are an autonomous Android device control agent operating via ADB on a test device with full system access. -Your primary goal is to execute tasks efficiently and reliably while maintaining system stability. - - -* Autonomy: Operate independently and make informed decisions without requiring user input. -* Never ask for other tasks to be done, only do the task you are given. -* Reliability: Ensure actions are repeatable and maintain system stability. -* Efficiency: Optimize operations to minimize latency and resource usage. -* Safety: Always verify actions before execution, even with full system access. - - - -1. Tool Usage: - * Verify tool availability before starting any operation - * Use the most direct and efficient tool for each task - * Combine tools strategically for complex operations - * Prefer built-in tools over shell commands when possible - -2. Error Handling: - * Assess failures systematically: check tool availability, permissions, and device state - * Implement retry logic with exponential backoff for transient failures - * Use fallback strategies when primary approaches fail - * Provide clear, actionable error messages with diagnostic information - -3. Performance Optimization: - * Use one-liner shell commands with inline filtering (grep, cut, awk, jq) for efficiency - * Minimize screen captures and coordinate calculations - * Cache device state information when appropriate - * Batch related operations when possible - -4. Screen Interaction: - * Ensure all coordinates are integers and within screen bounds - * Implement smart scrolling for off-screen elements - * Use appropriate gestures (tap, swipe, drag) based on context - * Verify element visibility before interaction - -5. System Access: - * Leverage full system access responsibly - * Use shell commands for system-level operations - * Monitor system state and resource usage - * Maintain system stability during operations - -6. Recovery Strategies: - * If an element is not visible, try: - - Scrolling in different directions - - Adjusting view parameters - - Using alternative interaction methods - * If a tool fails: - - Check device connection and state - - Verify tool availability and permissions - - Try alternative tools or approaches - * If stuck: - - Provide clear diagnostic information - - Suggest potential solutions - - Request user intervention only if necessary - -7. Best Practices: - * Document all significant operations - * Maintain operation logs for debugging - * Implement proper cleanup after operations - * Follow Android best practices for UI interaction - - -* This is a test device with full system access - use this capability responsibly -* Always verify the success of critical operations -* Maintain system stability as the highest priority -* Provide clear, actionable feedback for all operations -* Use the most efficient method for each task - -""" - _ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022, - system=_SYSTEM_PROMPT, + system=ANDROID_AGENT_SYSTEM_PROMPT, betas=[], ), ) @@ -112,7 +41,7 @@ _CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.CLAUDE__SONNET__4__20250514, - system=_SYSTEM_PROMPT, + system=ANDROID_AGENT_SYSTEM_PROMPT, thinking={"type": "enabled", "budget_tokens": 2048}, betas=[], ), diff --git a/src/askui/chat/api/assistants/seeds.py b/src/askui/chat/api/assistants/seeds.py index 7a873315..cc7397f2 100644 --- a/src/askui/chat/api/assistants/seeds.py +++ b/src/askui/chat/api/assistants/seeds.py @@ -1,45 +1,55 @@ from askui.chat.api.assistants.models import Assistant +from askui.prompts.system import ( + ANDROID_AGENT_SYSTEM_PROMPT, + COMPUTER_AGENT_SYSTEM_PROMPT, + TESTING_AGENT_SYSTEM_PROMPT, + WEB_AGENT_SYSTEM_PROMPT, +) from askui.utils.datetime_utils import now -ASKUI_VISION_AGENT = Assistant( +COMPUTER_AGENT = Assistant( id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmca", created_at=now(), - name="AskUI Vision Agent", + name="Computer Agent", avatar="data:image/svg+xml;base64,PHN2ZyAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogIHdpZHRoPSIyNCIKICBoZWlnaHQ9IjI0IgogIHZpZXdCb3g9IjAgMCAyNCAyNCIKICBmaWxsPSJub25lIgogIHN0cm9rZT0iIzAwMCIgc3R5bGU9ImJhY2tncm91bmQtY29sb3I6ICNmZmY7IGJvcmRlci1yYWRpdXM6IDJweCIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNMTIgOFY0SDgiIC8+CiAgPHJlY3Qgd2lkdGg9IjE2IiBoZWlnaHQ9IjEyIiB4PSI0IiB5PSI4IiByeD0iMiIgLz4KICA8cGF0aCBkPSJNMiAxNGgyIiAvPgogIDxwYXRoIGQ9Ik0yMCAxNGgyIiAvPgogIDxwYXRoIGQ9Ik0xNSAxM3YyIiAvPgogIDxwYXRoIGQ9Ik05IDEzdjIiIC8+Cjwvc3ZnPgo=", + system=COMPUTER_AGENT_SYSTEM_PROMPT, ) HUMAN_DEMONSTRATION_AGENT = Assistant( id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcb", created_at=now(), - name="Human DemonstrationAgent", + name="Human Demonstration Agent", avatar="data:image/svg+xml;base64,PHN2ZyAgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIgogIHdpZHRoPSIyNCIKICBoZWlnaHQ9IjI0IgogIHZpZXdCb3g9IjAgMCAyNCAyNCIKICBmaWxsPSJub25lIgogIHN0cm9rZT0iIzAwMCIgc3R5bGU9ImJhY2tncm91bmQtY29sb3I6ICNmZmY7IGJvcmRlci1yYWRpdXM6IDJweCIKICBzdHJva2Utd2lkdGg9IjIiCiAgc3Ryb2tlLWxpbmVjYXA9InJvdW5kIgogIHN0cm9rZS1saW5lam9pbj0icm91bmQiCj4KICA8cGF0aCBkPSJNMTkgMjF2LTJhNCA0IDAgMCAwLTQtNEg5YTQgNCAwIDAgMC00IDR2MiIgLz4KICA8Y2lyY2xlIGN4PSIxMiIgY3k9IjciIHI9IjQiIC8+Cjwvc3ZnPgo=", ) -ANDROID_VISION_AGENT = Assistant( +ANDROID_AGENT = Assistant( id="asst_78da09fbf1ed43c7826fb1686f89f541", created_at=now(), - name="AskUI Android Vision Agent", + name="Android Agent", avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciICB2aWV3Qm94PSIwIDAgNDggNDgiIHdpZHRoPSIyNXB4IiBoZWlnaHQ9IjI1cHgiPjxwYXRoIGQ9Ik0gMzIuNTE5NTMxIDAuOTgyNDIxODggQSAxLjUwMDE1IDEuNTAwMTUgMCAwIDAgMzEuMjc5Mjk3IDEuNjI4OTA2MiBMIDI5LjQzNzUgNC4yMDg5ODQ0IEMgMjcuNzgwMjA3IDMuNDQwNTAwNiAyNS45NDE5MSAzIDI0IDMgQyAyMi4wNTgwOSAzIDIwLjIxOTc5MyAzLjQ0MDUwMDYgMTguNTYyNSA0LjIwODk4NDQgTCAxNi43MjA3MDMgMS42Mjg5MDYyIEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDE1LjQzNTU0NyAwLjk4NDM3NSBBIDEuNTAwMTUgMS41MDAxNSAwIDAgMCAxNC4yNzkyOTcgMy4zNzEwOTM4IEwgMTYgNS43NzkyOTY5IEMgMTMuMTM4ODk2IDguMDI0NzU4MiAxMS4yNDUxODggMTEuNDM2MDIgMTEuMDM1MTU2IDE1LjI5MTAxNiBDIDEwLjU1MzI2IDE1LjExMjgxOCAxMC4wNDA0MDggMTUgOS41IDE1IEMgNy4wMzI0OTkxIDE1IDUgMTcuMDMyNDk5IDUgMTkuNSBMIDUgMzAuNSBDIDUgMzIuOTY3NTAxIDcuMDMyNDk5MSAzNSA5LjUgMzUgQyAxMC4wOTAzMTMgMzUgMTAuNjUzMjI5IDM0Ljg3ODc0OSAxMS4xNzE4NzUgMzQuNjY3OTY5IEMgMTEuNTY0MzM2IDM2LjA3MjEwNSAxMi42MzEzMzMgMzcuMTk2OTk0IDE0IDM3LjY5MzM1OSBMIDE0IDQxLjUgQyAxNCA0My45Njc1MDEgMTYuMDMyNDk5IDQ2IDE4LjUgNDYgQyAyMC45Njc1MDEgNDYgMjMgNDMuOTY3NTAxIDIzIDQxLjUgTCAyMyAzOCBMIDI1IDM4IEwgMjUgNDEuNSBDIDI1IDQzLjk2NzUwMSAyNy4wMzI0OTkgNDYgMjkuNSA0NiBDIDMxLjk2NzUwMSA0NiAzNCA0My45Njc1MDEgMzQgNDEuNSBMIDM0IDM3LjY5MzM1OSBDIDM1LjM2ODY2NyAzNy4xOTY5OTQgMzYuNDM1NjY0IDM2LjA3MjEwNSAzNi44MjgxMjUgMzQuNjY3OTY5IEMgMzcuMzQ2NzcxIDM0Ljg3ODc0OSAzNy45MDk2ODcgMzUgMzguNSAzNSBDIDQwLjk2NzUwMSAzNSA0MyAzMi45Njc1MDEgNDMgMzAuNSBMIDQzIDE5LjUgQyA0MyAxNy4wMzI0OTkgNDAuOTY3NTAxIDE1IDM4LjUgMTUgQyAzNy45NTk1OTIgMTUgMzcuNDQ2NzQgMTUuMTEyODE4IDM2Ljk2NDg0NCAxNS4yOTEwMTYgQyAzNi43NTQ4MTIgMTEuNDM2MDIgMzQuODYxMTA0IDguMDI0NzU4MiAzMiA1Ljc3OTI5NjkgTCAzMy43MjA3MDMgMy4zNzEwOTM4IEEgMS41MDAxNSAxLjUwMDE1IDAgMCAwIDMyLjUxOTUzMSAwLjk4MjQyMTg4IHogTSAyNCA2IEMgMjkuMTg1MTI3IDYgMzMuMjc2NzI3IDkuOTU3NTEzMiAzMy43OTg4MjggMTUgTCAxNC4yMDExNzIgMTUgQyAxNC43MjMyNzMgOS45NTc1MTMyIDE4LjgxNDg3MyA2IDI0IDYgeiBNIDE5LjUgMTAgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTMgQSAxLjUgMS41IDAgMCAwIDE5LjUgMTAgeiBNIDI4LjUgMTAgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTMgQSAxLjUgMS41IDAgMCAwIDI4LjUgMTAgeiBNIDkuNSAxOCBDIDEwLjM0NjQ5OSAxOCAxMSAxOC42NTM1MDEgMTEgMTkuNSBMIDExIDMwLjUgQyAxMSAzMS4zNDY0OTkgMTAuMzQ2NDk5IDMyIDkuNSAzMiBDIDguNjUzNTAwOSAzMiA4IDMxLjM0NjQ5OSA4IDMwLjUgTCA4IDE5LjUgQyA4IDE4LjY1MzUwMSA4LjY1MzUwMDkgMTggOS41IDE4IHogTSAxNCAxOCBMIDM0IDE4IEwgMzQgMTkuNSBMIDM0IDMwLjUgTCAzNCAzMy41IEMgMzQgMzQuMzQ2NDk5IDMzLjM0NjQ5OSAzNSAzMi41IDM1IEwgMjUgMzUgTCAyMyAzNSBMIDE1LjUgMzUgQyAxNC42NTM1MDEgMzUgMTQgMzQuMzQ2NDk5IDE0IDMzLjUgTCAxNCAzMC41IEwgMTQgMTkuNSBMIDE0IDE4IHogTSAzOC41IDE4IEMgMzkuMzQ2NDk5IDE4IDQwIDE4LjY1MzUwMSA0MCAxOS41IEwgNDAgMzAuNSBDIDQwIDMxLjM0NjQ5OSAzOS4zNDY0OTkgMzIgMzguNSAzMiBDIDM3LjY1MzUwMSAzMiAzNyAzMS4zNDY0OTkgMzcgMzAuNSBMIDM3IDE5LjUgQyAzNyAxOC42NTM1MDEgMzcuNjUzNTAxIDE4IDM4LjUgMTggeiBNIDE3IDM4IEwgMjAgMzggTCAyMCA0MS41IEMgMjAgNDIuMzQ2NDk5IDE5LjM0NjQ5OSA0MyAxOC41IDQzIEMgMTcuNjUzNTAxIDQzIDE3IDQyLjM0NjQ5OSAxNyA0MS41IEwgMTcgMzggeiBNIDI4IDM4IEwgMzEgMzggTCAzMSA0MS41IEMgMzEgNDIuMzQ2NDk5IDMwLjM0NjQ5OSA0MyAyOS41IDQzIEMgMjguNjUzNTAxIDQzIDI4IDQyLjM0NjQ5OSAyOCA0MS41IEwgMjggMzggeiIvPjwvc3ZnPg==", + system=ANDROID_AGENT_SYSTEM_PROMPT, ) -ASKUI_WEB_AGENT = Assistant( +WEB_AGENT = Assistant( id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcc", created_at=now(), - name="AskUI Web Vision Agent", + name="Web Agent", avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI0MDAiIGhlaWdodD0iNDAwIiB2aWV3Qm94PSIwIDAgNDAwIDQwMCIgZmlsbD0ibm9uZSI+CjxwYXRoIGQ9Ik0xMzYuNDQ0IDIyMS41NTZDMTIzLjU1OCAyMjUuMjEzIDExNS4xMDQgMjMxLjYyNSAxMDkuNTM1IDIzOC4wMzJDMTE0Ljg2OSAyMzMuMzY0IDEyMi4wMTQgMjI5LjA4IDEzMS42NTIgMjI2LjM0OEMxNDEuNTEgMjIzLjU1NCAxNDkuOTIgMjIzLjU3NCAxNTYuODY5IDIyNC45MTVWMjE5LjQ4MUMxNTAuOTQxIDIxOC45MzkgMTQ0LjE0NSAyMTkuMzcxIDEzNi40NDQgMjIxLjU1NlpNMTA4Ljk0NiAxNzUuODc2TDYxLjA4OTUgMTg4LjQ4NEM2MS4wODk1IDE4OC40ODQgNjEuOTYxNyAxODkuNzE2IDYzLjU3NjcgMTkxLjM2TDEwNC4xNTMgMTgwLjY2OEMxMDQuMTUzIDE4MC42NjggMTAzLjU3OCAxODguMDc3IDk4LjU4NDcgMTk0LjcwNUMxMDguMDMgMTg3LjU1OSAxMDguOTQ2IDE3NS44NzYgMTA4Ljk0NiAxNzUuODc2Wk0xNDkuMDA1IDI4OC4zNDdDODEuNjU4MiAzMDYuNDg2IDQ2LjAyNzIgMjI4LjQzOCAzNS4yMzk2IDE4Ny45MjhDMzAuMjU1NiAxNjkuMjI5IDI4LjA3OTkgMTU1LjA2NyAyNy41IDE0NS45MjhDMjcuNDM3NyAxNDQuOTc5IDI3LjQ2NjUgMTQ0LjE3OSAyNy41MzM2IDE0My40NDZDMjQuMDQgMTQzLjY1NyAyMi4zNjc0IDE0NS40NzMgMjIuNzA3NyAxNTAuNzIxQzIzLjI4NzYgMTU5Ljg1NSAyNS40NjMzIDE3NC4wMTYgMzAuNDQ3MyAxOTIuNzIxQzQxLjIzMDEgMjMzLjIyNSA3Ni44NjU5IDMxMS4yNzMgMTQ0LjIxMyAyOTMuMTM0QzE1OC44NzIgMjg5LjE4NSAxNjkuODg1IDI4MS45OTIgMTc4LjE1MiAyNzIuODFDMTcwLjUzMiAyNzkuNjkyIDE2MC45OTUgMjg1LjExMiAxNDkuMDA1IDI4OC4zNDdaTTE2MS42NjEgMTI4LjExVjEzMi45MDNIMTg4LjA3N0MxODcuNTM1IDEzMS4yMDYgMTg2Ljk4OSAxMjkuNjc3IDE4Ni40NDcgMTI4LjExSDE2MS42NjFaIiBmaWxsPSIjMkQ0NTUyIi8+CjxwYXRoIGQ9Ik0xOTMuOTgxIDE2Ny41ODRDMjA1Ljg2MSAxNzAuOTU4IDIxMi4xNDQgMTc5LjI4NyAyMTUuNDY1IDE4Ni42NThMMjI4LjcxMSAxOTAuNDJDMjI4LjcxMSAxOTAuNDIgMjI2LjkwNCAxNjQuNjIzIDIwMy41NyAxNTcuOTk1QzE4MS43NDEgMTUxLjc5MyAxNjguMzA4IDE3MC4xMjQgMTY2LjY3NCAxNzIuNDk2QzE3My4wMjQgMTY3Ljk3MiAxODIuMjk3IDE2NC4yNjggMTkzLjk4MSAxNjcuNTg0Wk0yOTkuNDIyIDE4Ni43NzdDMjc3LjU3MyAxODAuNTQ3IDI2NC4xNDUgMTk4LjkxNiAyNjIuNTM1IDIwMS4yNTVDMjY4Ljg5IDE5Ni43MzYgMjc4LjE1OCAxOTMuMDMxIDI4OS44MzcgMTk2LjM2MkMzMDEuNjk4IDE5OS43NDEgMzA3Ljk3NiAyMDguMDYgMzExLjMwNyAyMTUuNDM2TDMyNC41NzIgMjE5LjIxMkMzMjQuNTcyIDIxOS4yMTIgMzIyLjczNiAxOTMuNDEgMjk5LjQyMiAxODYuNzc3Wk0yODYuMjYyIDI1NC43OTVMMTc2LjA3MiAyMjMuOTlDMTc2LjA3MiAyMjMuOTkgMTc3LjI2NSAyMzAuMDM4IDE4MS44NDIgMjM3Ljg2OUwyNzQuNjE3IDI2My44MDVDMjgyLjI1NSAyNTkuMzg2IDI4Ni4yNjIgMjU0Ljc5NSAyODYuMjYyIDI1NC43OTVaTTIwOS44NjcgMzIxLjEwMkMxMjIuNjE4IDI5Ny43MSAxMzMuMTY2IDE4Ni41NDMgMTQ3LjI4NCAxMzMuODY1QzE1My4wOTcgMTEyLjE1NiAxNTkuMDczIDk2LjAyMDMgMTY0LjAyOSA4NS4yMDRDMTYxLjA3MiA4NC41OTUzIDE1OC42MjMgODYuMTUyOSAxNTYuMjAzIDkxLjA3NDZDMTUwLjk0MSAxMDEuNzQ3IDE0NC4yMTIgMTE5LjEyNCAxMzcuNyAxNDMuNDVDMTIzLjU4NiAxOTYuMTI3IDExMy4wMzggMzA3LjI5IDIwMC4yODMgMzMwLjY4MkMyNDEuNDA2IDM0MS42OTkgMjczLjQ0MiAzMjQuOTU1IDI5Ny4zMjMgMjk4LjY1OUMyNzQuNjU1IDMxOS4xOSAyNDUuNzE0IDMzMC43MDEgMjA5Ljg2NyAzMjEuMTAyWiIgZmlsbD0iIzJENDU1MiIvPgo8cGF0aCBkPSJNMTYxLjY2MSAyNjIuMjk2VjIzOS44NjNMOTkuMzMyNCAyNTcuNTM3Qzk5LjMzMjQgMjU3LjUzNyAxMDMuOTM4IDIzMC43NzcgMTM2LjQ0NCAyMjEuNTU2QzE0Ni4zMDIgMjE4Ljc2MiAxNTQuNzEzIDIxOC43ODEgMTYxLjY2MSAyMjAuMTIzVjEyOC4xMUgxOTIuODY5QzE4OS40NzEgMTE3LjYxIDE4Ni4xODQgMTA5LjUyNiAxODMuNDIzIDEwMy45MDlDMTc4Ljg1NiA5NC42MTIgMTc0LjE3NCAxMDAuNzc1IDE2My41NDUgMTA5LjY2NUMxNTYuMDU5IDExNS45MTkgMTM3LjEzOSAxMjkuMjYxIDEwOC42NjggMTM2LjkzM0M4MC4xOTY2IDE0NC42MSA1Ny4xNzkgMTQyLjU3NCA0Ny41NzUyIDE0MC45MTFDMzMuOTYwMSAxMzguNTYyIDI2LjgzODcgMTM1LjU3MiAyNy41MDQ5IDE0NS45MjhDMjguMDg0NyAxNTUuMDYyIDMwLjI2MDUgMTY5LjIyNCAzNS4yNDQ1IDE4Ny45MjhDNDYuMDI3MiAyMjguNDMzIDgxLjY2MyAzMDYuNDgxIDE0OS4wMSAyODguMzQyQzE2Ni42MDIgMjgzLjYwMiAxNzkuMDE5IDI3NC4yMzMgMTg3LjYyNiAyNjIuMjkxSDE2MS42NjFWMjYyLjI5NlpNNjEuMDg0OCAxODguNDg0TDEwOC45NDYgMTc1Ljg3NkMxMDguOTQ2IDE3NS44NzYgMTA3LjU1MSAxOTQuMjg4IDg5LjYwODcgMTk5LjAxOEM3MS42NjE0IDIwMy43NDMgNjEuMDg0OCAxODguNDg0IDYxLjA4NDggMTg4LjQ4NFoiIGZpbGw9IiNFMjU3NEMiLz4KPHBhdGggZD0iTTM0MS43ODYgMTI5LjE3NEMzMjkuMzQ1IDEzMS4zNTUgMjk5LjQ5OCAxMzQuMDcyIDI2Mi42MTIgMTI0LjE4NUMyMjUuNzE2IDExNC4zMDQgMjAxLjIzNiA5Ny4wMjI0IDE5MS41MzcgODguODk5NEMxNzcuNzg4IDc3LjM4MzQgMTcxLjc0IDY5LjM4MDIgMTY1Ljc4OCA4MS40ODU3QzE2MC41MjYgOTIuMTYzIDE1My43OTcgMTA5LjU0IDE0Ny4yODQgMTMzLjg2NkMxMzMuMTcxIDE4Ni41NDMgMTIyLjYyMyAyOTcuNzA2IDIwOS44NjcgMzIxLjA5OEMyOTcuMDkzIDM0NC40NyAzNDMuNTMgMjQyLjkyIDM1Ny42NDQgMTkwLjIzOEMzNjQuMTU3IDE2NS45MTcgMzY3LjAxMyAxNDcuNSAzNjcuNzk5IDEzNS42MjVDMzY4LjY5NSAxMjIuMTczIDM1OS40NTUgMTI2LjA3OCAzNDEuNzg2IDEyOS4xNzRaTTE2Ni40OTcgMTcyLjc1NkMxNjYuNDk3IDE3Mi43NTYgMTgwLjI0NiAxNTEuMzcyIDIwMy41NjUgMTU4QzIyNi44OTkgMTY0LjYyOCAyMjguNzA2IDE5MC40MjUgMjI4LjcwNiAxOTAuNDI1TDE2Ni40OTcgMTcyLjc1NlpNMjIzLjQyIDI2OC43MTNDMTgyLjQwMyAyNTYuNjk4IDE3Ni4wNzcgMjIzLjk5IDE3Ni4wNzcgMjIzLjk5TDI4Ni4yNjIgMjU0Ljc5NkMyODYuMjYyIDI1NC43OTEgMjY0LjAyMSAyODAuNTc4IDIyMy40MiAyNjguNzEzWk0yNjIuMzc3IDIwMS40OTVDMjYyLjM3NyAyMDEuNDk1IDI3Ni4xMDcgMTgwLjEyNiAyOTkuNDIyIDE4Ni43NzNDMzIyLjczNiAxOTMuNDExIDMyNC41NzIgMjE5LjIwOCAzMjQuNTcyIDIxOS4yMDhMMjYyLjM3NyAyMDEuNDk1WiIgZmlsbD0iIzJFQUQzMyIvPgo8cGF0aCBkPSJNMTM5Ljg4IDI0Ni4wNEw5OS4zMzI0IDI1Ny41MzJDOTkuMzMyNCAyNTcuNTMyIDEwMy43MzcgMjMyLjQ0IDEzMy42MDcgMjIyLjQ5NkwxMTAuNjQ3IDEzNi4zM0wxMDguNjYzIDEzNi45MzNDODAuMTkxOCAxNDQuNjExIDU3LjE3NDIgMTQyLjU3NCA0Ny41NzA0IDE0MC45MTFDMzMuOTU1NCAxMzguNTYzIDI2LjgzNCAxMzUuNTcyIDI3LjUwMDEgMTQ1LjkyOUMyOC4wOCAxNTUuMDYzIDMwLjI1NTcgMTY5LjIyNCAzNS4yMzk3IDE4Ny45MjlDNDYuMDIyNSAyMjguNDMzIDgxLjY1ODMgMzA2LjQ4MSAxNDkuMDA1IDI4OC4zNDJMMTUwLjk4OSAyODcuNzE5TDEzOS44OCAyNDYuMDRaTTYxLjA4NDggMTg4LjQ4NUwxMDguOTQ2IDE3NS44NzZDMTA4Ljk0NiAxNzUuODc2IDEwNy41NTEgMTk0LjI4OCA4OS42MDg3IDE5OS4wMThDNzEuNjYxNSAyMDMuNzQzIDYxLjA4NDggMTg4LjQ4NSA2MS4wODQ4IDE4OC40ODVaIiBmaWxsPSIjRDY1MzQ4Ii8+CjxwYXRoIGQ9Ik0yMjUuMjcgMjY5LjE2M0wyMjMuNDE1IDI2OC43MTJDMTgyLjM5OCAyNTYuNjk4IDE3Ni4wNzIgMjIzLjk5IDE3Ni4wNzIgMjIzLjk5TDIzMi44OSAyMzkuODcyTDI2Mi45NzEgMTI0LjI4MUwyNjIuNjA3IDEyNC4xODVDMjI1LjcxMSAxMTQuMzA0IDIwMS4yMzIgOTcuMDIyNCAxOTEuNTMyIDg4Ljg5OTRDMTc3Ljc4MyA3Ny4zODM0IDE3MS43MzUgNjkuMzgwMiAxNjUuNzgzIDgxLjQ4NTdDMTYwLjUyNiA5Mi4xNjMgMTUzLjc5NyAxMDkuNTQgMTQ3LjI4NCAxMzMuODY2QzEzMy4xNzEgMTg2LjU0MyAxMjIuNjIzIDI5Ny43MDYgMjA5Ljg2NyAzMjEuMDk3TDIxMS42NTUgMzIxLjVMMjI1LjI3IDI2OS4xNjNaTTE2Ni40OTcgMTcyLjc1NkMxNjYuNDk3IDE3Mi43NTYgMTgwLjI0NiAxNTEuMzcyIDIwMy41NjUgMTU4QzIyNi44OTkgMTY0LjYyOCAyMjguNzA2IDE5MC40MjUgMjI4LjcwNiAxOTAuNDI1TDE2Ni40OTcgMTcyLjc1NloiIGZpbGw9IiMxRDhEMjIiLz4KPHBhdGggZD0iTTE0MS45NDYgMjQ1LjQ1MUwxMzEuMDcyIDI0OC41MzdDMTMzLjY0MSAyNjMuMDE5IDEzOC4xNjkgMjc2LjkxNyAxNDUuMjc2IDI4OS4xOTVDMTQ2LjUxMyAyODguOTIyIDE0Ny43NCAyODguNjg3IDE0OSAyODguMzQyQzE1Mi4zMDIgMjg3LjQ1MSAxNTUuMzY0IDI4Ni4zNDggMTU4LjMxMiAyODUuMTQ1QzE1MC4zNzEgMjczLjM2MSAxNDUuMTE4IDI1OS43ODkgMTQxLjk0NiAyNDUuNDUxWk0xMzcuNyAxNDMuNDUxQzEzMi4xMTIgMTY0LjMwNyAxMjcuMTEzIDE5NC4zMjYgMTI4LjQ4OSAyMjQuNDM2QzEzMC45NTIgMjIzLjM2NyAxMzMuNTU0IDIyMi4zNzEgMTM2LjQ0NCAyMjEuNTUxTDEzOC40NTcgMjIxLjEwMUMxMzYuMDAzIDE4OC45MzkgMTQxLjMwOCAxNTYuMTY1IDE0Ny4yODQgMTMzLjg2NkMxNDguNzk5IDEyOC4yMjUgMTUwLjMxOCAxMjIuOTc4IDE1MS44MzIgMTE4LjA4NUMxNDkuMzkzIDExOS42MzcgMTQ2Ljc2NyAxMjEuMjI4IDE0My43NzYgMTIyLjg2N0MxNDEuNzU5IDEyOS4wOTMgMTM5LjcyMiAxMzUuODk4IDEzNy43IDE0My40NTFaIiBmaWxsPSIjQzA0QjQxIi8+Cjwvc3ZnPg==", + system=WEB_AGENT_SYSTEM_PROMPT, ) -ASKUI_WEB_TESTING_AGENT = Assistant( +TESTING_AGENT = Assistant( id="asst_ge3tiojsga3dgnruge3di2u5ov36shedkcslxnmcd", created_at=now(), - name="AskUI Web Testing Agent", + name="Testing Agent", avatar="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHhtbG5zOnhsaW5rPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5L3hsaW5rIiB2aWV3Qm94PSIwIDAgMjcgMjciIGFyaWEtaGlkZGVuPSJ0cnVlIiByb2xlPSJpbWciIGNsYXNzPSJpY29uaWZ5IGljb25pZnktLXR3ZW1vamkiIHByZXNlcnZlQXNwZWN0UmF0aW89InhNaWRZTWlkIG1lZXQiPjxwYXRoIGZpbGw9IiNDQ0Q2REQiIGQ9Ik0xMC45MjIgMTAuODEgMTkuMTAyIDIuNjI5bDUuMjIxIDUuMjIxIC04LjE4MSA4LjE4MXoiLz48cGF0aCBmaWxsPSIjNjhFMDkwIiBkPSJNNi4wNzcgMjUuNzk5QzEuODc1IDI1LjUgMS4xMjUgMjIuNTQ3IDEuMjI2IDIwLjk0OWMwLjI0MSAtMy44MDMgMTEuNzAxIC0xMi40MTMgMTEuNzAxIC0xMi40MTNsOS4zODggMS40NDhjMC4wMDEgMCAtMTMuMDQyIDE2LjA0NCAtMTYuMjM3IDE1LjgxNiIvPjxwYXRoIGZpbGw9IiM4ODk5QTYiIGQ9Ik0yNC4yNDUgMi43ODFDMjIuMDU0IDAuNTkgMTkuNTc4IC0wLjQ4NyAxOC43MTUgMC4zNzdjLTAuMDEgMC4wMSAtMC4wMTcgMC4wMjMgLTAuMDI2IDAuMDMzIC0wLjAwNSAwLjAwNSAtMC4wMTEgMC4wMDYgLTAuMDE2IDAuMDExTDEuNzIxIDE3LjM3M2E1LjU3MiA1LjU3MiAwIDAgMCAtMS42NDMgMy45NjZjMCAxLjQ5OCAwLjU4NCAyLjkwNiAxLjY0MyAzLjk2NWE1LjU3MiA1LjU3MiAwIDAgMCAzLjk2NiAxLjY0MyA1LjU3MiA1LjU3MiAwIDAgMCAzLjk2NSAtMS42NDJsMTYuOTUzIC0xNi45NTNjMC4wMDUgLTAuMDA1IDAuMDA3IC0wLjAxMiAwLjAxMSAtMC4wMTcgMC4wMSAtMC4wMDkgMC4wMjIgLTAuMDE1IDAuMDMyIC0wLjAyNSAwLjg2MyAtMC44NjIgLTAuMjE0IC0zLjMzOCAtMi40MDUgLTUuNTI5TTguMDYzIDIzLjcxNGMtMC42MzQgMC42MzQgLTEuNDc4IDAuOTgzIC0yLjM3NCAwLjk4M3MtMS43NDEgLTAuMzUgLTIuMzc1IC0wLjk4NGEzLjMzOCAzLjMzOCAwIDAgMSAtMC45ODQgLTIuMzc1YzAgLTAuODk3IDAuMzUgLTEuNzQgMC45ODMgLTIuMzc0TDE5LjA1OSAzLjIxOGMwLjQ2NyAwLjg1OCAxLjE3IDEuNzk2IDIuMDYyIDIuNjg4czEuODMgMS41OTUgMi42ODggMi4wNjJ6Ii8+PHBhdGggZmlsbD0iIzE3QkY2MyIgZD0iTTIxLjg5NyA5Ljg1OGMtMC4wNDQgMC4yODQgLTEuOTcgMC41NjMgLTQuMjY4IDAuMjU3cy00LjExMiAtMC45MTcgLTQuMDUyIC0xLjM2NSAxLjk3IC0wLjU2MyA0LjI2OCAtMC4yNTcgNC4xMjEgMC45MTggNC4wNTIgMS4zNjVNOC4xMyAxNy40MzVhMC41OTYgMC41OTYgMCAxIDEgLTAuODQyIC0wLjg0MyAwLjU5NiAwLjU5NiAwIDAgMSAwLjg0MiAwLjg0M20yLjQ4OCAxLjk2MWEwLjk3NCAwLjk3NCAwIDEgMSAtMS4zNzYgLTEuMzc3IDAuOTc0IDAuOTc0IDAgMCAxIDEuMzc2IDEuMzc3bTEuMjU4IC0zLjk5M2EwLjkxNiAwLjkxNiAwIDAgMSAtMS4yOTQgLTEuMjk0IDAuOTE1IDAuOTE1IDAgMSAxIDEuMjk0IDEuMjk0bS01LjE1MSA2LjY0NGExLjExNyAxLjExNyAwIDEgMSAtMS41NzkgLTEuNTc5IDEuMTE3IDEuMTE3IDAgMCAxIDEuNTc5IDEuNTc5bTguNTQ3IC02Ljg2OGEwLjc5NCAwLjc5NCAwIDEgMSAtMS4xMjIgLTEuMTIzIDAuNzk0IDAuNzk0IDAgMCAxIDEuMTIyIDEuMTIzbS0wLjkwNSAtMy4yMTZhMC41MiAwLjUyIDAgMSAxIC0wLjczNCAtMC43MzUgMC41MiAwLjUyIDAgMCAxIDAuNzM0IDAuNzM1Ii8+PHBhdGggdHJhbnNmb3JtPSJyb3RhdGUoLTQ1LjAwMSAzMC44MTcgNS4yMjMpIiBmaWxsPSIjQ0NENkREIiBjeD0iMzAuODE3IiBjeT0iNS4yMjMiIHJ4PSIxLjE4NCIgcnk9IjQuODQ3IiBkPSJNMjQuMDAxIDMuOTE3QTAuODg4IDMuNjM1IDAgMCAxIDIzLjExMyA3LjU1M0EwLjg4OCAzLjYzNSAwIDAgMSAyMi4yMjUgMy45MTdBMC44ODggMy42MzUgMCAwIDEgMjQuMDAxIDMuOTE3eiIvPjwvc3ZnPg==", + system=TESTING_AGENT_SYSTEM_PROMPT, ) SEEDS = [ - ASKUI_VISION_AGENT, + COMPUTER_AGENT, HUMAN_DEMONSTRATION_AGENT, - ANDROID_VISION_AGENT, - ASKUI_WEB_AGENT, - ASKUI_WEB_TESTING_AGENT, + ANDROID_AGENT, + WEB_AGENT, + TESTING_AGENT, ] diff --git a/src/askui/chat/api/runs/runner/runner.py b/src/askui/chat/api/runs/runner/runner.py index b527d696..57a33d48 100644 --- a/src/askui/chat/api/runs/runner/runner.py +++ b/src/askui/chat/api/runs/runner/runner.py @@ -15,11 +15,11 @@ from askui.android_agent import AndroidVisionAgent from askui.chat.api.assistants.models import Assistant from askui.chat.api.assistants.seeds import ( - ANDROID_VISION_AGENT, - ASKUI_VISION_AGENT, - ASKUI_WEB_AGENT, - ASKUI_WEB_TESTING_AGENT, + ANDROID_AGENT, + COMPUTER_AGENT, HUMAN_DEMONSTRATION_AGENT, + TESTING_AGENT, + WEB_AGENT, ) from askui.chat.api.mcp_configs.models import McpConfig from askui.chat.api.mcp_configs.service import McpConfigService @@ -362,22 +362,22 @@ async def run( try: if self._run.assistant_id == HUMAN_DEMONSTRATION_AGENT.id: await self._run_human_agent(send_stream) - elif self._run.assistant_id == ASKUI_VISION_AGENT.id: + elif self._run.assistant_id == COMPUTER_AGENT.id: await self._run_askui_vision_agent( send_stream, mcp_client, ) - elif self._run.assistant_id == ANDROID_VISION_AGENT.id: + elif self._run.assistant_id == ANDROID_AGENT.id: await self._run_askui_android_agent( send_stream, mcp_client, ) - elif self._run.assistant_id == ASKUI_WEB_AGENT.id: + elif self._run.assistant_id == WEB_AGENT.id: await self._run_askui_web_agent( send_stream, mcp_client, ) - elif self._run.assistant_id == ASKUI_WEB_TESTING_AGENT.id: + elif self._run.assistant_id == TESTING_AGENT.id: await self._run_askui_web_testing_agent( send_stream, mcp_client, diff --git a/src/askui/prompts/__init__.py b/src/askui/prompts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/askui/prompts/system.py b/src/askui/prompts/system.py new file mode 100644 index 00000000..2ae6b82b --- /dev/null +++ b/src/askui/prompts/system.py @@ -0,0 +1,125 @@ +import platform +import sys +from datetime import datetime, timezone + +COMPUTER_AGENT_SYSTEM_PROMPT = f""" +* You are utilising a {sys.platform} machine using {platform.machine()} architecture with internet access. +* When you cannot find something (application window, ui element etc.) on the currently selected/active displa/screen, check the other available displays by listing them and checking which one is currently active and then going through the other displays one by one until you find it or you have checked all of them. +* When asked to perform web tasks try to open the browser (firefox, chrome, safari, ...) if not already open. Often you can find the browser icons in the toolbars of the operating systems. +* When viewing a page it can be helpful to zoom out/in so that you can see everything on the page. Either that, or make sure you scroll down/up to see everything before deciding something isn't available. +* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. + + + +* When using Firefox, if a startup wizard appears, IGNORE IT. Do not even click "skip this step". Instead, click on the address bar where it says "Search or enter address", and enter the appropriate search term or URL there. +* If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext to convert it to a text file, and then read that text file directly with your StrReplaceEditTool. +""" # noqa: DTZ002, E501 + +ANDROID_AGENT_SYSTEM_PROMPT = """ +You are an autonomous Android device control agent operating via ADB on a test device with full system access. +Your primary goal is to execute tasks efficiently and reliably while maintaining system stability. + + +* Autonomy: Operate independently and make informed decisions without requiring user input. +* Never ask for other tasks to be done, only do the task you are given. +* Reliability: Ensure actions are repeatable and maintain system stability. +* Efficiency: Optimize operations to minimize latency and resource usage. +* Safety: Always verify actions before execution, even with full system access. + + + +1. Tool Usage: + * Verify tool availability before starting any operation + * Use the most direct and efficient tool for each task + * Combine tools strategically for complex operations + * Prefer built-in tools over shell commands when possible + +2. Error Handling: + * Assess failures systematically: check tool availability, permissions, and device state + * Implement retry logic with exponential backoff for transient failures + * Use fallback strategies when primary approaches fail + * Provide clear, actionable error messages with diagnostic information + +3. Performance Optimization: + * Use one-liner shell commands with inline filtering (grep, cut, awk, jq) for efficiency + * Minimize screen captures and coordinate calculations + * Cache device state information when appropriate + * Batch related operations when possible + +4. Screen Interaction: + * Ensure all coordinates are integers and within screen bounds + * Implement smart scrolling for off-screen elements + * Use appropriate gestures (tap, swipe, drag) based on context + * Verify element visibility before interaction + +5. System Access: + * Leverage full system access responsibly + * Use shell commands for system-level operations + * Monitor system state and resource usage + * Maintain system stability during operations + +6. Recovery Strategies: + * If an element is not visible, try: + - Scrolling in different directions + - Adjusting view parameters + - Using alternative interaction methods + * If a tool fails: + - Check device connection and state + - Verify tool availability and permissions + - Try alternative tools or approaches + * If stuck: + - Provide clear diagnostic information + - Suggest potential solutions + - Request user intervention only if necessary + +7. Best Practices: + * Document all significant operations + * Maintain operation logs for debugging + * Implement proper cleanup after operations + * Follow Android best practices for UI interaction + + +* This is a test device with full system access - use this capability responsibly +* Always verify the success of critical operations +* Maintain system stability as the highest priority +* Provide clear, actionable feedback for all operations +* Use the most efficient method for each task + +""" + +WEB_AGENT_SYSTEM_PROMPT = f""" + +* You are utilizing a webbrowser in full-screen mode. So you are only seeing the content of the currently opened webpage (tab). +* It can be helpful to zoom in/out or scroll down/up so that you can see everything on the page. Make sure to that before deciding something isn't available. +* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. +* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. + +""" + +TESTING_AGENT_SYSTEM_PROMPT = f""" + +* You are an autonomous exploratory web testing agent. Your job is to: + - Analyze the application under test (AUT) at the given URL. + - Use the provided user instructions to guide your testing focus. + - Discover features and scenarios of the AUT, create and update test features and + scenarios as you explore. + - Execute scenarios and create/update test executions, recording results. + - Identify gaps in feature/scenario coverage and prioritize the next most important + feature/scenario for testing. + - Use all available tools to create, retrieve, list, modify, and delete features, + scenarios, and executions. + - Use browser navigation and information tools to explore the AUT. + - Be thorough, systematic, and creative in your exploration. Prioritize critical + paths and user flows. +* You are utilizing a webbrowser in full-screen mode. So you are only seeing the + content of the currently opened webpage (tab). +* It can be helpful to zoom in/out or scroll down/up so that you can see everything + on the page. Make sure to that before deciding something isn't available. +* When using your function calls, they take a while to run and send back to you. + Where possible/feasible, try to chain multiple of these calls all into one function + calls request. +* The current date and time is \ + {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. + +""" diff --git a/src/askui/web_agent.py b/src/askui/web_agent.py index ce1631a3..7f41464a 100644 --- a/src/askui/web_agent.py +++ b/src/askui/web_agent.py @@ -1,5 +1,4 @@ import logging -from datetime import datetime, timezone from pydantic import ConfigDict, validate_call from typing_extensions import override @@ -12,6 +11,7 @@ MessageSettings, ) from askui.models.shared.tools import Tool +from askui.prompts.system import WEB_AGENT_SYSTEM_PROMPT from askui.tools.exception_tool import ExceptionTool from askui.tools.playwright.agent_os import PlaywrightAgentOs from askui.tools.playwright.tools import ( @@ -28,19 +28,10 @@ from .reporting import Reporter from .retry import Retry -_SYSTEM_PROMPT = f""" - -* You are utilizing a webbrowser in full-screen mode. So you are only seeing the content of the currently opened webpage (tab). -* It can be helpful to zoom in/out or scroll down/up so that you can see everything on the page. Make sure to that before deciding something isn't available. -* When using your function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request. -* The current date and time is {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. - -""" - _ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022, - system=_SYSTEM_PROMPT, + system=WEB_AGENT_SYSTEM_PROMPT, betas=[COMPUTER_USE_20241022_BETA_FLAG], ), ) @@ -48,7 +39,7 @@ _CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.CLAUDE__SONNET__4__20250514, - system=_SYSTEM_PROMPT, + system=WEB_AGENT_SYSTEM_PROMPT, betas=[COMPUTER_USE_20250124_BETA_FLAG], thinking={"type": "enabled", "budget_tokens": 2048}, ), diff --git a/src/askui/web_testing_agent.py b/src/askui/web_testing_agent.py index ca994f29..b23f3019 100644 --- a/src/askui/web_testing_agent.py +++ b/src/askui/web_testing_agent.py @@ -1,5 +1,4 @@ import logging -from datetime import datetime, timezone from pathlib import Path from pydantic import ConfigDict, validate_call @@ -11,6 +10,7 @@ ActSettings, MessageSettings, ) +from askui.prompts.system import TESTING_AGENT_SYSTEM_PROMPT from askui.tools.testing.execution_tools import ( CreateExecutionTool, DeleteExecutionTool, @@ -38,37 +38,10 @@ from .reporting import Reporter from .retry import Retry -_TESTING_SYSTEM_PROMPT = f""" - -* You are an autonomous exploratory web testing agent. Your job is to: - - Analyze the application under test (AUT) at the given URL. - - Use the provided user instructions to guide your testing focus. - - Discover features and scenarios of the AUT, create and update test features and - scenarios as you explore. - - Execute scenarios and create/update test executions, recording results. - - Identify gaps in feature/scenario coverage and prioritize the next most important - feature/scenario for testing. - - Use all available tools to create, retrieve, list, modify, and delete features, - scenarios, and executions. - - Use browser navigation and information tools to explore the AUT. - - Be thorough, systematic, and creative in your exploration. Prioritize critical - paths and user flows. -* You are utilizing a webbrowser in full-screen mode. So you are only seeing the - content of the currently opened webpage (tab). -* It can be helpful to zoom in/out or scroll down/up so that you can see everything - on the page. Make sure to that before deciding something isn't available. -* When using your function calls, they take a while to run and send back to you. - Where possible/feasible, try to chain multiple of these calls all into one function - calls request. -* The current date and time is \ - {datetime.now(timezone.utc).strftime("%A, %B %d, %Y %H:%M:%S %z")}. - -""" - _ANTHROPIC__CLAUDE__3_5__SONNET__20241022__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022, - system=_TESTING_SYSTEM_PROMPT, + system=TESTING_AGENT_SYSTEM_PROMPT, betas=[COMPUTER_USE_20241022_BETA_FLAG], ), ) @@ -76,7 +49,7 @@ _CLAUDE__SONNET__4__20250514__ACT_SETTINGS = ActSettings( messages=MessageSettings( model=ModelName.CLAUDE__SONNET__4__20250514, - system=_TESTING_SYSTEM_PROMPT, + system=TESTING_AGENT_SYSTEM_PROMPT, betas=[COMPUTER_USE_20250124_BETA_FLAG], thinking={"type": "enabled", "budget_tokens": 2048}, ),