Skip to content

Commit c71b89e

Browse files
committed
feat!: introduce Claude 4 Sonnet
- Updated model references in the documentation and codebase from `anthropic-claude-3-5-sonnet-20241022` to `claude-sonnet-4-20250514`. - Added support for `stop_reason` to the `BaseAgent` class to manage stop reasons and raise appropriate exceptions. - Added new exceptions `MaxTokensExceededError` and `ModelRefusalError` to handle specific model response scenarios via `stop_reason` on `act()`. - Added support for thinking and tool choice via `AskUiComputerAgentSettings` and `ClaudeComputerAgentSettings`. - Updated tests to reflect the new model references.
1 parent 1254b98 commit c71b89e

File tree

17 files changed

+200
-48
lines changed

17 files changed

+200
-48
lines changed

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ with VisionAgent() as agent:
128128

129129
agent.click("search field")
130130

131-
# Use Anthropic (Claude 3.5 Sonnet V2) as model
132-
agent.click("search field", model="anthropic-claude-3-5-sonnet-20241022")
131+
# Use Anthropic (Claude 4 Sonnet) as model
132+
agent.click("search field", model="claude-sonnet-4-20250514")
133133
```
134134

135135

@@ -201,7 +201,7 @@ with VisionAgent(model="askui-combo") as agent:
201201

202202
# Use different models for different tasks
203203
with VisionAgent(model={
204-
"act": "anthropic-claude-3-5-sonnet-20241022", # Use Claude for act()
204+
"act": "claude-sonnet-4-20250514", # Use Claude for act()
205205
"get": "askui", # Use AskUI for get()
206206
"locate": "askui-combo", # Use AskUI combo for locate() (and click(), mouse_move())
207207
}) as agent:
@@ -240,7 +240,7 @@ Supported commands are: `act()`, `click()`, `get()`, `locate()`, `mouse_move()`
240240
Supported commands are: `act()`, `get()`, `click()`, `locate()`, `mouse_move()`
241241
| Model Name | Info | Execution Speed | Security | Cost | Reliability |
242242
|-------------|--------------------|--------------|--------------|--------------|--------------|
243-
| `anthropic-claude-3-5-sonnet-20241022` | The [Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use) model from Antrophic is a Large Action Model (LAM), which can autonomously achieve goals. e.g. `"Book me a flight from Berlin to Rom"` | slow, >1s per step | Model hosting by Anthropic | High, up to 1,5$ per act | Not recommended for production usage |
243+
| `claude-sonnet-4-20250514` | The [Computer Use](https://docs.anthropic.com/en/docs/agents-and-tools/computer-use) model from Antrophic is a Large Action Model (LAM), which can autonomously achieve goals. e.g. `"Book me a flight from Berlin to Rom"` | slow, >1s per step | Model hosting by Anthropic | High, up to 1,5$ per act | Not recommended for production usage |
244244
> **Note:** Configure your Antrophic Model Provider [here](#3a-authenticate-with-an-ai-model-provider)
245245
246246

@@ -409,7 +409,7 @@ custom_models: ModelRegistry = {
409409
"dynamic-model": lambda: create_custom_model("your-api-key"),
410410
"dynamic-model-cached": lambda: create_custom_model_cached("your-api-key"),
411411
"askui": lambda: create_custom_model_cached("your-api-key"), # overrides default model
412-
"anthropic-claude-3-5-sonnet-20241022": lambda: create_custom_model_cached("your-api-key"), # overrides model
412+
"claude-sonnet-4-20250514": lambda: create_custom_model_cached("your-api-key"), # overrides model
413413
}
414414

415415

@@ -735,7 +735,7 @@ with VisionAgent() as agent:
735735

736736
**AskUI Vision Agent** is a versatile AI powered framework that enables you to automate computer tasks in Python.
737737

738-
It connects Agent OS with powerful computer use models like Anthropic's Claude Sonnet 3.5 v2 and the AskUI Prompt-to-Action series. It is your entry point for building complex automation scenarios with detailed instructions or let the agent explore new challenges on its own.
738+
It connects Agent OS with powerful computer use models like Anthropic's Claude Sonnet 4 and the AskUI Prompt-to-Action series. It is your entry point for building complex automation scenarios with detailed instructions or let the agent explore new challenges on its own.
739739

740740

741741
![image](docs/assets/Architecture.svg)

src/askui/agent.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from askui.models.shared.computer_agent_cb_param import OnMessageCb
1212
from askui.models.shared.computer_agent_message_param import MessageParam
1313
from askui.models.shared.tools import ToolCollection
14-
from askui.tools.computer import Computer20241022Tool
14+
from askui.tools.computer import Computer20250124Tool
1515
from askui.tools.exception_tool import ExceptionTool
1616
from askui.utils.image_utils import ImageSource, Img
1717

@@ -83,7 +83,7 @@ def __init__(
8383
)
8484
_models = initialize_default_model_registry(
8585
tool_collection=ToolCollection(
86-
tools=[Computer20241022Tool(self.tools.os), ExceptionTool()]
86+
tools=[Computer20250124Tool(self.tools.os), ExceptionTool()]
8787
),
8888
reporter=self._reporter,
8989
)
@@ -598,6 +598,11 @@ def act(
598598
Returns:
599599
None
600600
601+
Raises:
602+
MaxTokensExceededError: If the model reaches the maximum token limit
603+
defined in the agent settings.
604+
ModelRefusalError: If the model refuses to process the request.
605+
601606
Example:
602607
```python
603608
from askui import VisionAgent

src/askui/models/anthropic/computer_agent.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
from typing import TYPE_CHECKING, cast
22

33
from anthropic import Anthropic
4+
from anthropic.types import (
5+
ThinkingConfigDisabledParam,
6+
ThinkingConfigEnabledParam,
7+
ThinkingConfigParam,
8+
)
49
from typing_extensions import override
510

611
from askui.models.anthropic.settings import ClaudeComputerAgentSettings
@@ -30,16 +35,23 @@ def __init__(
3035
def _create_message(
3136
self, messages: list[MessageParam], model_choice: str
3237
) -> MessageParam:
38+
_thinking: ThinkingConfigParam = ThinkingConfigDisabledParam(type="disabled")
39+
40+
if self._settings.thinking:
41+
_thinking = ThinkingConfigEnabledParam(
42+
type="enabled",
43+
budget_tokens=self._settings.thinking.budget_tokens,
44+
)
45+
3346
response = self._client.beta.messages.with_raw_response.create(
3447
max_tokens=self._settings.max_tokens,
35-
messages=[
36-
cast("BetaMessageParam", message.model_dump(mode="json"))
37-
for message in messages
38-
],
48+
messages=cast("list[BetaMessageParam]", messages),
3949
model=ANTHROPIC_MODEL_NAME_MAPPING[ModelName(model_choice)],
4050
system=[self._system],
4151
tools=self._tool_collection.to_params(),
4252
betas=self._settings.betas,
53+
thinking=_thinking,
54+
tool_choice=self._settings.tool_choice,
4355
)
4456
parsed_response = response.parse()
4557
return MessageParam.model_validate(parsed_response.model_dump())

src/askui/models/anthropic/settings.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
from anthropic import NotGiven
2+
from anthropic.types.beta import BetaToolChoiceAutoParam, BetaToolChoiceParam
13
from pydantic import BaseModel, Field, SecretStr
24
from pydantic_settings import BaseSettings
35

46
from askui.models.shared.computer_agent import ComputerAgentSettingsBase
5-
from askui.models.shared.settings import ChatCompletionsCreateSettings
7+
from askui.models.shared.settings import ChatCompletionsCreateSettings, ThinkingSettings
68

7-
COMPUTER_USE_BETA_FLAG = "computer-use-2024-10-22"
9+
NOT_GIVEN = NotGiven()
810

911

1012
class AnthropicSettings(BaseSettings):
@@ -28,4 +30,7 @@ class ClaudeSettings(ClaudeSettingsBase):
2830

2931

3032
class ClaudeComputerAgentSettings(ComputerAgentSettingsBase, ClaudeSettingsBase):
31-
pass
33+
thinking: ThinkingSettings | None = None
34+
tool_choice: BetaToolChoiceParam | BetaToolChoiceAutoParam = (
35+
BetaToolChoiceAutoParam(type="auto")
36+
)

src/askui/models/askui/computer_agent.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
import httpx
2+
from anthropic import NotGiven
3+
from anthropic.types import ThinkingConfigEnabledParam
4+
from anthropic.types.beta import (
5+
BetaTextBlockParam,
6+
BetaThinkingConfigParam,
7+
BetaToolChoiceParam,
8+
BetaToolUnionParam,
9+
)
10+
from pydantic import BaseModel, ConfigDict
211
from tenacity import retry, retry_if_exception, stop_after_attempt, wait_exponential
3-
from typing_extensions import override
12+
from typing_extensions import Literal, override
413

514
from askui.models.askui.settings import AskUiComputerAgentSettings
615
from askui.models.shared.computer_agent import ComputerAgent
@@ -10,6 +19,21 @@
1019

1120
from ...logger import logger
1221

22+
NOT_GIVEN = NotGiven()
23+
24+
25+
class RequestBody(BaseModel):
26+
model_config = ConfigDict(arbitrary_types_allowed=True)
27+
max_tokens: int
28+
messages: list[MessageParam]
29+
provider: Literal["gcp_vertex"] = "gcp_vertex"
30+
model: str
31+
tools: list[BetaToolUnionParam]
32+
betas: list[str]
33+
system: list[BetaTextBlockParam]
34+
thinking: BetaThinkingConfigParam | NotGiven = NOT_GIVEN
35+
tool_choice: BetaToolChoiceParam | NotGiven = NOT_GIVEN
36+
1337

1438
def is_retryable_error(exception: BaseException) -> bool:
1539
"""Check if the exception is a retryable error (status codes 429 or 529)."""
@@ -47,20 +71,26 @@ def _create_message(
4771
model_choice: str, # noqa: ARG002
4872
) -> MessageParam:
4973
try:
50-
request_body = {
51-
"max_tokens": self._settings.max_tokens,
52-
"messages": [msg.model_dump(mode="json") for msg in messages],
53-
"model": self._settings.model,
54-
"tools": self._tool_collection.to_params(),
55-
"betas": self._settings.betas,
56-
"system": [self._system],
57-
}
74+
request_body = RequestBody(
75+
max_tokens=self._settings.max_tokens,
76+
messages=messages,
77+
model=self._settings.model,
78+
tools=self._tool_collection.to_params(),
79+
betas=self._settings.betas,
80+
system=[self._system],
81+
tool_choice=self._settings.tool_choice,
82+
)
83+
if self._settings.thinking:
84+
request_body.thinking = ThinkingConfigEnabledParam(
85+
budget_tokens=self._settings.thinking.budget_tokens,
86+
type="enabled",
87+
)
88+
5889
response = self._client.post(
59-
"/act/inference", json=request_body, timeout=300.0
90+
"/act/inference", json=request_body.model_dump(), timeout=300.0
6091
)
6192
response.raise_for_status()
62-
response_data = response.json()
63-
return MessageParam.model_validate(response_data)
93+
return MessageParam.model_validate(response.json())
6494
except Exception as e: # noqa: BLE001
6595
if is_retryable_error(e):
6696
logger.debug(e)

src/askui/models/askui/settings.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
11
import base64
22
from functools import cached_property
33

4+
from anthropic import NotGiven
5+
from anthropic.types.beta import (
6+
BetaToolChoiceAutoParam,
7+
BetaToolChoiceParam,
8+
)
49
from pydantic import UUID4, Field, HttpUrl, SecretStr
510
from pydantic_settings import BaseSettings
611

712
from askui.models.models import ModelName
813
from askui.models.shared.base_agent import AgentSettingsBase
914
from askui.models.shared.computer_agent import ComputerAgentSettingsBase
15+
from askui.models.shared.settings import ThinkingSettings
16+
17+
NOT_GIVEN = NotGiven()
1018

1119

1220
class AskUiSettings(BaseSettings):
@@ -40,12 +48,16 @@ def base_url(self) -> str:
4048

4149

4250
class AskUiComputerAgentSettings(ComputerAgentSettingsBase):
43-
model: str = ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022
51+
model: str = ModelName.ANTHROPIC__CLAUDE__SONNET__4__20250514
4452
askui: AskUiSettings = Field(default_factory=AskUiSettings)
53+
thinking: ThinkingSettings | None = None
54+
tool_choice: BetaToolChoiceParam | BetaToolChoiceAutoParam = (
55+
BetaToolChoiceAutoParam(type="auto")
56+
)
4557

4658

4759
class AskUiAndroidAgentSettings(AgentSettingsBase):
4860
"""Settings for AskUI Android agent."""
4961

50-
model: str = ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022
62+
model: str = ModelName.ANTHROPIC__CLAUDE__SONNET__4__20250514
5163
askui: AskUiSettings = Field(default_factory=AskUiSettings)

src/askui/models/exceptions.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,41 @@ def __init__(
9999
message=f'Model "{model_choice}" is an instance of {actual_type.mro()}, '
100100
f"expected it to be an instance of {expected_type.mro()}",
101101
)
102+
103+
104+
class MaxTokensExceededError(AutomationError):
105+
"""Exception raised when the model stops due to reaching the maximum token limit.
106+
107+
Args:
108+
max_tokens (int): The maximum token limit that was exceeded.
109+
message (str, optional): Custom error message. If not provided, a default
110+
message will be generated.
111+
"""
112+
113+
def __init__(self, max_tokens: int, message: str | None = None):
114+
self.max_tokens = max_tokens
115+
error_msg = (
116+
f"Model stopped due to reaching maximum token limit of {max_tokens} tokens"
117+
if message is None
118+
else message
119+
)
120+
super().__init__(error_msg)
121+
122+
123+
class ModelRefusalError(AutomationError):
124+
"""Exception raised when the model refuses to process the request.
125+
126+
Args:
127+
reason (str, optional): The reason for the refusal if provided by the model.
128+
message (str, optional): Custom error message. If not provided, a default
129+
message will be generated.
130+
"""
131+
132+
def __init__(self, reason: str | None = None, message: str | None = None):
133+
self.reason = reason
134+
error_msg = (
135+
f"Model refused to process the request{f': {reason}' if reason else ''}"
136+
if message is None
137+
else message
138+
)
139+
super().__init__(error_msg)

src/askui/models/model_router.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import functools
22
from typing import Type, overload
33

4+
from anthropic.types.beta import BetaToolChoiceAutoParam
45
from typing_extensions import Literal
56

67
from askui.locators.locators import Locator
@@ -34,6 +35,7 @@
3435
from askui.models.shared.computer_agent_cb_param import OnMessageCb
3536
from askui.models.shared.computer_agent_message_param import MessageParam
3637
from askui.models.shared.facade import ModelFacade
38+
from askui.models.shared.settings import ThinkingSettings
3739
from askui.models.shared.tools import ToolCollection
3840
from askui.models.types.response_schemas import ResponseSchema
3941
from askui.reporting import CompositeReporter, Reporter
@@ -102,6 +104,12 @@ def askui_facade() -> ModelFacade:
102104
reporter=reporter,
103105
settings=AskUiComputerAgentSettings(
104106
askui=askui_settings(),
107+
tool_choice=BetaToolChoiceAutoParam(
108+
type="auto", disable_parallel_tool_use=False
109+
),
110+
thinking=ThinkingSettings(
111+
budget_tokens=2000,
112+
),
105113
),
106114
)
107115
return ModelFacade(
@@ -122,7 +130,7 @@ def hf_spaces_handler() -> HFSpacesHandler:
122130
ModelName.ASKUI__COMBO: askui_model_router,
123131
ModelName.ASKUI__OCR: askui_model_router,
124132
ModelName.ASKUI__PTA: askui_model_router,
125-
ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: anthropic_facade,
133+
ModelName.ANTHROPIC__CLAUDE__SONNET__4__20250514: anthropic_facade,
126134
ModelName.HF__SPACES__ASKUI__PTA_1: hf_spaces_handler,
127135
ModelName.HF__SPACES__QWEN__QWEN2_VL_2B_INSTRUCT: hf_spaces_handler,
128136
ModelName.HF__SPACES__QWEN__QWEN2_VL_7B_INSTRUCT: hf_spaces_handler,

src/askui/models/models.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class ModelName(str, Enum):
2323
elements.
2424
"""
2525

26-
ANTHROPIC__CLAUDE__3_5__SONNET__20241022 = "anthropic-claude-3-5-sonnet-20241022"
26+
ANTHROPIC__CLAUDE__SONNET__4__20250514 = "claude-sonnet-4-20250514"
2727
ASKUI = "askui"
2828
ASKUI__AI_ELEMENT = "askui-ai-element"
2929
ASKUI__COMBO = "askui-combo"
@@ -38,7 +38,7 @@ class ModelName(str, Enum):
3838

3939

4040
ANTHROPIC_MODEL_NAME_MAPPING = {
41-
ModelName.ANTHROPIC__CLAUDE__3_5__SONNET__20241022: "claude-3-5-sonnet-20241022",
41+
ModelName.ANTHROPIC__CLAUDE__SONNET__4__20250514: "claude-sonnet-4-20250514",
4242
}
4343

4444

src/askui/models/shared/base_agent.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pydantic import BaseModel
66
from typing_extensions import TypeVar, override
77

8+
from askui.models.exceptions import MaxTokensExceededError, ModelRefusalError
89
from askui.models.models import ActModel
910
from askui.models.shared.computer_agent_cb_param import OnMessageCb, OnMessageCbParam
1011
from askui.models.shared.computer_agent_message_param import (
@@ -117,6 +118,8 @@ def _step(
117118
)
118119
else:
119120
message_by_assistant = messages[-1]
121+
122+
self._handle_stop_reason(message_by_assistant)
120123
if tool_result_message := self._use_tools(message_by_assistant):
121124
if tool_result_message := self._call_on_message(
122125
on_message, tool_result_message, messages
@@ -238,3 +241,11 @@ def _maybe_filter_to_n_most_recent_images(
238241
new_content.append(content)
239242
tool_result.content = new_content
240243
return messages
244+
245+
def _handle_stop_reason(self, message: MessageParam) -> None:
246+
if message.stop_reason == "max_tokens":
247+
error_msg = f"Model stopped due to reaching maximum token limit of {self._settings.max_tokens} tokens" # noqa: E501
248+
raise MaxTokensExceededError(self._settings.max_tokens, error_msg)
249+
if message.stop_reason == "refusal":
250+
error_msg = "Model refused to process the request"
251+
raise ModelRefusalError(message=error_msg)

0 commit comments

Comments
 (0)