diff --git a/.env.template b/.env.template index 91545f8f..cd3b5ae2 100644 --- a/.env.template +++ b/.env.template @@ -10,5 +10,8 @@ ASKUI_WORKSPACE_ID= TARS_URL= TARS_API_KEY= +# OpenRouter +OPEN_ROUTER_API_KEY= + # Telemetry ASKUI__VA__TELEMETRY__ENABLED=True # Set to "False" to disable telemetry diff --git a/README.md b/README.md index 717751bd..34b28c15 100644 --- a/README.md +++ b/README.md @@ -420,6 +420,53 @@ with VisionAgent(models=custom_models, model="dynamic-model") as agent: agent.act("do something", model="dynamic-model-cached") # reuses cached instance ``` +### 🔀 OpenRouter **AI Models** + +You can use Vision Agent with [OpenRouter](https://openrouter.ai/) to access a wide variety of models via a unified API. + +**Set your OpenRouter API key:** + +
+ Linux & MacOS + + ```shell + export OPEN_ROUTER_API_KEY= + ``` +
+
+ Windows PowerShell + + ```shell + $env:OPEN_ROUTER_API_KEY="" + ``` +
+ +**Example: Using OpenRouter with a custom model registry** + +```python +from askui import VisionAgent +from askui.models import ( + OpenRouterGetModel, + OpenRouterSettings, + ModelRegistry, +) + + +# Register OpenRouter model in the registry +custom_models: ModelRegistry = { + "my-custom-model": OpenRouterGetModel( + OpenRouterSettings( + model="anthropic/claude-opus-4", + ) + ), +} + +with VisionAgent(model_registry=custom_registry, model={"get":"my-custom-model"}) as agent: + agent.click("search field") + result = agent.get("What is the main heading on the screen?") + print(result) +``` + ### 🛠️ Direct Tool Use diff --git a/src/askui/agent.py b/src/askui/agent.py index 30a93944..1a9cfae0 100644 --- a/src/askui/agent.py +++ b/src/askui/agent.py @@ -12,9 +12,9 @@ from askui.models.shared.computer_agent_message_param import MessageParam from askui.utils.image_utils import ImageSource, Img -from .exceptions import ElementNotFoundError from .logger import configure_logging, logger from .models import ModelComposition +from .models.exceptions import ElementNotFoundError from .models.model_router import ModelRouter from .models.models import ( ModelChoice, diff --git a/src/askui/exceptions.py b/src/askui/exceptions.py index 101ab2fa..d68b662c 100644 --- a/src/askui/exceptions.py +++ b/src/askui/exceptions.py @@ -1,108 +1,13 @@ -from typing import Any - -from askui.locators.locators import Locator - from .models.askui.ai_element_utils import AiElementNotFound from .models.askui.exceptions import AskUiApiError, AskUiApiRequestFailedError - - -class AutomationError(Exception): - """Exception raised when the automation step cannot complete. - - Args: - message (str): The error message. - """ - - def __init__(self, message: str): - self.message = message - super().__init__(self.message) - - -class ElementNotFoundError(AutomationError): - """Exception raised when an element cannot be located. - - Args: - locator (str | Locator): The locator that was used. - locator_serialized (Any): The locator serialized for the specific model - """ - - def __init__(self, locator: str | Locator, locator_serialized: Any) -> None: - self.locator = locator - self.locator_serialized = locator_serialized - super().__init__(f"Element not found: {self.locator}") - - -class ModelNotFoundError(AutomationError): - """Exception raised when a model could not be found within available models. - - Args: - model_choice (str): The model choice. - """ - - def __init__( - self, - model_choice: str, - message: str | None = None, - ): - self.model_choice = model_choice - super().__init__( - f"Model not found: {model_choice}" if message is None else message - ) - - -class ModelTypeMismatchError(ModelNotFoundError): - """Exception raised when a model is not of the expected type. - - Args: - model_choice (str): The model choice. - expected_type (type): The expected type. - actual_type (type): The actual type. - """ - - def __init__( - self, - model_choice: str, - expected_type: type, - actual_type: type, - ): - self.expected_type = expected_type - self.actual_type = actual_type - super().__init__( - model_choice=model_choice, - message=f'Model "{model_choice}" is an instance of {actual_type.mro()}, ' - f"expected it to be an instance of {expected_type.mro()}", - ) - - -class QueryNoResponseError(AutomationError): - """Exception raised when a query does not return a response. - - Args: - message (str): The error message. - query (str): The query that was made. - """ - - def __init__(self, message: str, query: str): - self.message = message - self.query = query - super().__init__(self.message) - - -class QueryUnexpectedResponseError(AutomationError): - """Exception raised when a query returns an unexpected response. - - Args: - message (str): The error message. - query (str): The query that was made. - response (Any): The response that was received. - """ - - def __init__(self, message: str, query: str, response: Any): - self.message = message - self.query = query - self.response = response - super().__init__(self.message) - +from .models.exceptions import ( + AutomationError, + ElementNotFoundError, + ModelNotFoundError, + ModelTypeMismatchError, + QueryNoResponseError, + QueryUnexpectedResponseError, +) __all__ = [ "AiElementNotFound", diff --git a/src/askui/models/__init__.py b/src/askui/models/__init__.py index ec531fb0..5cdcdd61 100644 --- a/src/askui/models/__init__.py +++ b/src/askui/models/__init__.py @@ -11,6 +11,8 @@ OnMessageCb, Point, ) +from .openrouter.handler import OpenRouterGetModel +from .openrouter.settings import OpenRouterSettings from .shared.computer_agent_message_param import ( Base64ImageSourceParam, CacheControlEphemeralParam, @@ -52,4 +54,6 @@ "ToolResultBlockParam", "ToolUseBlockParam", "UrlImageSourceParam", + "OpenRouterGetModel", + "OpenRouterSettings", ] diff --git a/src/askui/models/anthropic/handler.py b/src/askui/models/anthropic/handler.py index ca7240c6..2856c0df 100644 --- a/src/askui/models/anthropic/handler.py +++ b/src/askui/models/anthropic/handler.py @@ -4,15 +4,15 @@ import anthropic from typing_extensions import override -from askui.exceptions import ( - ElementNotFoundError, - QueryNoResponseError, - QueryUnexpectedResponseError, -) from askui.locators.locators import Locator from askui.locators.serializers import VlmLocatorSerializer from askui.logger import logger from askui.models.anthropic.settings import ClaudeSettings +from askui.models.exceptions import ( + ElementNotFoundError, + QueryNoResponseError, + QueryUnexpectedResponseError, +) from askui.models.models import ( ANTHROPIC_MODEL_NAME_MAPPING, GetModel, diff --git a/src/askui/models/askui/inference_api.py b/src/askui/models/askui/inference_api.py index 331796d7..6221a8f6 100644 --- a/src/askui/models/askui/inference_api.py +++ b/src/askui/models/askui/inference_api.py @@ -5,11 +5,11 @@ from pydantic import RootModel from typing_extensions import override -from askui.exceptions import ElementNotFoundError from askui.locators.locators import Locator from askui.locators.serializers import AskUiLocatorSerializer, AskUiSerializedLocator from askui.logger import logger from askui.models.askui.settings import AskUiSettings +from askui.models.exceptions import ElementNotFoundError from askui.models.models import GetModel, LocateModel, ModelComposition, Point from askui.models.types.response_schemas import ResponseSchema from askui.utils.image_utils import ImageSource diff --git a/src/askui/models/askui/model_router.py b/src/askui/models/askui/model_router.py index ff0e0495..d2bf857f 100644 --- a/src/askui/models/askui/model_router.py +++ b/src/askui/models/askui/model_router.py @@ -1,9 +1,13 @@ from typing_extensions import override -from askui.exceptions import AutomationError, ElementNotFoundError, ModelNotFoundError from askui.locators.locators import AiElement, Locator, Prompt, Text from askui.logger import logger from askui.models.askui.inference_api import AskUiInferenceApi +from askui.models.exceptions import ( + AutomationError, + ElementNotFoundError, + ModelNotFoundError, +) from askui.models.models import LocateModel, ModelComposition, ModelName, Point from askui.utils.image_utils import ImageSource diff --git a/src/askui/models/exceptions.py b/src/askui/models/exceptions.py new file mode 100644 index 00000000..fe3c3f84 --- /dev/null +++ b/src/askui/models/exceptions.py @@ -0,0 +1,101 @@ +from typing import Any + +from askui.locators.locators import Locator + + +class AutomationError(Exception): + """Exception raised when the automation step cannot complete. + + Args: + message (str): The error message. + """ + + def __init__(self, message: str): + self.message = message + super().__init__(self.message) + + +class QueryNoResponseError(AutomationError): + """Exception raised when a query does not return a response. + + Args: + message (str): The error message. + query (str): The query that was made. + """ + + def __init__(self, message: str, query: str): + self.message = message + self.query = query + super().__init__(self.message) + + +class ElementNotFoundError(AutomationError): + """Exception raised when an element cannot be located. + + Args: + locator (str | Locator): The locator that was used. + locator_serialized (Any): The locator serialized for the specific model + """ + + def __init__(self, locator: str | Locator, locator_serialized: Any) -> None: + self.locator = locator + self.locator_serialized = locator_serialized + super().__init__(f"Element not found: {self.locator}") + + +class QueryUnexpectedResponseError(AutomationError): + """Exception raised when a query returns an unexpected response. + + Args: + message (str): The error message. + query (str): The query that was made. + response (Any): The response that was received. + """ + + def __init__(self, message: str, query: str, response: Any): + self.message = message + self.query = query + self.response = response + super().__init__(self.message) + + +class ModelNotFoundError(AutomationError): + """Exception raised when a model could not be found within available models. + + Args: + model_choice (str): The model choice. + """ + + def __init__( + self, + model_choice: str, + message: str | None = None, + ): + self.model_choice = model_choice + super().__init__( + f"Model not found: {model_choice}" if message is None else message + ) + + +class ModelTypeMismatchError(ModelNotFoundError): + """Exception raised when a model is not of the expected type. + + Args: + model_choice (str): The model choice. + expected_type (type): The expected type. + actual_type (type): The actual type. + """ + + def __init__( + self, + model_choice: str, + expected_type: type, + actual_type: type, + ): + self.expected_type = expected_type + self.actual_type = actual_type + super().__init__( + model_choice=model_choice, + message=f'Model "{model_choice}" is an instance of {actual_type.mro()}, ' + f"expected it to be an instance of {expected_type.mro()}", + ) diff --git a/src/askui/models/model_router.py b/src/askui/models/model_router.py index fbc49fe1..2f14c0a1 100644 --- a/src/askui/models/model_router.py +++ b/src/askui/models/model_router.py @@ -3,7 +3,6 @@ from typing_extensions import Literal -from askui.exceptions import ModelNotFoundError, ModelTypeMismatchError from askui.locators.locators import Locator from askui.locators.serializers import AskUiLocatorSerializer, VlmLocatorSerializer from askui.models.anthropic.settings import ( @@ -15,6 +14,7 @@ from askui.models.askui.computer_agent import AskUiComputerAgent from askui.models.askui.model_router import AskUiModelRouter from askui.models.askui.settings import AskUiComputerAgentSettings +from askui.models.exceptions import ModelNotFoundError, ModelTypeMismatchError from askui.models.huggingface.spaces_api import HFSpacesHandler from askui.models.models import ( MODEL_TYPES, diff --git a/src/askui/models/openrouter/__init__.py b/src/askui/models/openrouter/__init__.py new file mode 100644 index 00000000..b71cf18a --- /dev/null +++ b/src/askui/models/openrouter/__init__.py @@ -0,0 +1 @@ +"""OpenRouter model implementations.""" diff --git a/src/askui/models/openrouter/handler.py b/src/askui/models/openrouter/handler.py new file mode 100644 index 00000000..f9f7f905 --- /dev/null +++ b/src/askui/models/openrouter/handler.py @@ -0,0 +1,77 @@ +import os +from typing import Type + +from openai import OpenAI +from typing_extensions import override + +from askui.models.exceptions import QueryNoResponseError +from askui.models.models import GetModel +from askui.models.types.response_schemas import ResponseSchema +from askui.utils.image_utils import ImageSource + +from .prompts import PROMPT_QA +from .settings import OpenRouterSettings + + +class OpenRouterGetModel(GetModel): + def __init__(self, settings: OpenRouterSettings): + self._settings = settings + + _open_router_key = os.getenv("OPEN_ROUTER_API_KEY") + if _open_router_key is None: + error_msg = "OPEN_ROUTER_API_KEY is not set" + raise ValueError(error_msg) + + self._client = OpenAI( + api_key=_open_router_key, + base_url="https://openrouter.ai/api/v1", + ) + + def _predict(self, image_url: str, instruction: str, prompt: str) -> str | None: + chat_completion = self._client.chat.completions.create( + model=self._settings.model, + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": image_url, + }, + }, + {"type": "text", "text": prompt + instruction}, + ], + } + ], + top_p=None, + temperature=None, + max_tokens=150, + stream=False, + seed=None, + stop=None, + frequency_penalty=None, + presence_penalty=None, + ) + return chat_completion.choices[0].message.content + + @override + def get( + self, + query: str, + image: ImageSource, + response_schema: Type[ResponseSchema] | None, + model_choice: str, + ) -> ResponseSchema | str: + if response_schema is not None: + error_msg = f'Response schema is not supported for model "{model_choice}"' + raise NotImplementedError(error_msg) + response = self._predict( + image_url=image.to_data_url(), + instruction=query, + prompt=PROMPT_QA, + ) + if response is None: + error_msg = f'No response from model "{model_choice}" to query: "{query}"' + raise QueryNoResponseError(error_msg, query) + return response diff --git a/src/askui/models/openrouter/prompts.py b/src/askui/models/openrouter/prompts.py new file mode 100644 index 00000000..2ed79fd4 --- /dev/null +++ b/src/askui/models/openrouter/prompts.py @@ -0,0 +1 @@ +PROMPT_QA = "You are an agent to process screenshots and answer questions about things on the screen or extract information from it. Answer only with the response to the question and keep it short and precise." # noqa: E501 diff --git a/src/askui/models/openrouter/settings.py b/src/askui/models/openrouter/settings.py new file mode 100644 index 00000000..569a8854 --- /dev/null +++ b/src/askui/models/openrouter/settings.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel, Field + + +class OpenRouterSettings(BaseModel): + model: str = Field(..., description="OpenRouter model name") diff --git a/src/askui/models/ui_tars_ep/ui_tars_api.py b/src/askui/models/ui_tars_ep/ui_tars_api.py index e1c39306..98116400 100644 --- a/src/askui/models/ui_tars_ep/ui_tars_api.py +++ b/src/askui/models/ui_tars_ep/ui_tars_api.py @@ -8,9 +8,9 @@ from pydantic_settings import BaseSettings from typing_extensions import override -from askui.exceptions import ElementNotFoundError, QueryNoResponseError from askui.locators.locators import Locator from askui.locators.serializers import VlmLocatorSerializer +from askui.models.exceptions import ElementNotFoundError, QueryNoResponseError from askui.models.models import ActModel, GetModel, LocateModel, ModelComposition, Point from askui.models.shared.computer_agent_cb_param import OnMessageCb from askui.models.shared.computer_agent_message_param import MessageParam diff --git a/tests/e2e/agent/test_locate.py b/tests/e2e/agent/test_locate.py index 04bf14b0..46a44a5d 100644 --- a/tests/e2e/agent/test_locate.py +++ b/tests/e2e/agent/test_locate.py @@ -6,10 +6,10 @@ from PIL import Image as PILImage from askui.agent import VisionAgent -from askui.exceptions import ElementNotFoundError from askui.locators import AiElement, Element, Prompt, Text from askui.locators.locators import Image from askui.models import ModelName +from askui.models.exceptions import ElementNotFoundError @pytest.mark.parametrize( diff --git a/tests/e2e/agent/test_locate_with_relations.py b/tests/e2e/agent/test_locate_with_relations.py index 8d70d4f2..25d7256c 100644 --- a/tests/e2e/agent/test_locate_with_relations.py +++ b/tests/e2e/agent/test_locate_with_relations.py @@ -6,14 +6,9 @@ from PIL import Image as PILImage from askui.agent import VisionAgent -from askui.exceptions import ElementNotFoundError -from askui.locators import ( - Element, - Image, - Prompt, - Text, -) +from askui.locators import Element, Image, Prompt, Text from askui.locators.locators import AiElement +from askui.models.exceptions import ElementNotFoundError @pytest.mark.parametrize( diff --git a/tests/integration/agent/test_retry.py b/tests/integration/agent/test_retry.py index c4652076..f6a698d6 100644 --- a/tests/integration/agent/test_retry.py +++ b/tests/integration/agent/test_retry.py @@ -3,9 +3,9 @@ import pytest from askui import ConfigurableRetry, LocateModel, VisionAgent -from askui.exceptions import ElementNotFoundError from askui.locators.locators import Locator from askui.models import ModelComposition +from askui.models.exceptions import ElementNotFoundError from askui.tools.toolbox import AgentToolbox from askui.utils.image_utils import ImageSource diff --git a/tests/unit/models/test_model_router.py b/tests/unit/models/test_model_router.py index 258d7292..5cb6f192 100644 --- a/tests/unit/models/test_model_router.py +++ b/tests/unit/models/test_model_router.py @@ -8,7 +8,7 @@ from PIL import Image from pytest_mock import MockerFixture -from askui.exceptions import ModelNotFoundError +from askui.models.exceptions import ModelNotFoundError from askui.models.huggingface.spaces_api import HFSpacesHandler from askui.models.model_router import ModelRouter from askui.models.models import ModelName