askui · philipph-askui · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 10, 2025
diff --git a/docs/tracing.md b/docs/tracing.md
@@ -7,6 +7,8 @@ Traces give us the big picture of what happens when a request is made to an appl
 - [Core Concepts](#core-concepts)
 - [Components](#components)
 - [Configuration](#configuration)
+- [Setup for VisionAgent](#setup-for-visionagent)
+- [What Gets Traced](#what-gets-traced)
 - [Usage](#usage)
   - [Create a new span](#create-a-new-span)
     - [Context Manager](#context-manager)
@@ -40,13 +42,118 @@ Automatic instrumentors (like opentelemetry-instrumentation-fastapi) handle cont
 ## Configuration
 
 This feature is entirely behind a feature flag and controlled via env variables see [.env.temp](https://github.com/askui/vision-agent/blob/main/.env.template).
-To enable tracing we need to set the following flags: 
-- `ASKUI__CHAT_API__OTEL__ENABLED=True` 
+To enable tracing we need to set the following flags:
+- `ASKUI__CHAT_API__OTEL__ENABLED=True`
 - `ASKUI__CHAT_API__OTEL__ENDPOINT=http://localhost/v1/traces`
 - `ASKUI__CHAT_API__OTEL__SECRET=***`
 
 For further configuration options please refer to [OtelSettings](https://github.com/askui/vision-agent/blob/feat/otel-tracing/src/askui/telemetry/otel.py).
 
+## Setup for VisionAgent
+
+To enable tracing in your VisionAgent application, you need to:
+
+1. **Set up environment variables** for your OTLP endpoint and credentials:
+   ```bash
+   export OTEL_ENDPOINT="https://your-otlp-endpoint.com/v1/traces"
+   export OTEL_B64_SECRET="your-base64-encoded-secret"
-   export OTEL_B64_SECRET="your-base64-encoded-secret"
+   export OTEL_B64_SECRET="<your-base64-encoded-secret>"
-   export OTEL_B64_SECRET="your-base64-encoded-secret"
+   export OTEL_B64_SECRET="<your-base64-encoded-secret>"
+   ```
+
+2. **Create an `OtelSettings` instance** with your configuration:
+   ```python
+   import os
+   from askui import VisionAgent
+   from askui.telemetry.otel import OtelSettings
+
+   def get_tracing_settings() -> OtelSettings:
+       return OtelSettings(
+           enabled=True,
+           secret=os.environ.get("OTEL_B64_SECRET", ""),
+           endpoint=os.environ.get("OTEL_ENDPOINT", ""),
+           service_name="vision-agent-sdk",  # Optional: defaults to "chat-api"
+           service_version="1.0.0",  # Optional: defaults to package version
+           cluster_name="my-cluster",  # Optional: defaults to "askui-dev"
+       )
+   ```
+
+3. **Pass the tracing settings to the `act()` method**:
+   ```python
+   def main() -> None:
+       agent = VisionAgent(display=1, model="askui/claude-haiku-4-5-20251001")
+       tracing_settings = get_tracing_settings()
+
+       with agent:
+           agent.act(
+               goal="Open Chrome and navigate to www.askui.com",
+               tracing_settings=tracing_settings,
+           )
+   ```
+
+### OtelSettings Configuration Options
+
+The `OtelSettings` class accepts the following parameters:
+
+- **`enabled`** (bool): Enable/disable tracing. Default: `False`
+- **`secret`** (SecretStr | None): Base64-encoded authentication secret for OTLP. Required when `enabled=True`
+- **`endpoint`** (str | None): OTLP endpoint URL (e.g., `https://tempo.example.com/v1/traces`)
+- **`service_name`** (str): Name of your service in traces. Default: `"chat-api"`
+- **`service_version`** (str): Version of your service. Default: package version
+- **`cluster_name`** (str): Name of the cluster/environment. Default: `"askui-dev"`
+
+## What Gets Traced
+
+VisionAgent automatically creates spans for key operations during agent execution. Here's what gets traced:
+
+### Span Hierarchy
+
+```
+act (root span)
+├── _step (one per conversation turn)
+│   ├── _call_on_message (for assistant messages)
+│   ├── _handle_stop_reason
+│   ├── _use_tools (if tools are used)
+│   └── _call_on_message (for tool results)
+└── (additional _step spans for recursive calls)
+```
+
+### Span Details
+
+#### `act` Span
+The root span for the entire conversation.
+
+**Attributes:**
+- `input_tokens` (int): Total input tokens consumed across all API calls
+- `output_tokens` (int): Total output tokens generated across all API calls
+
+#### `_step` Span
+Represents a single conversation turn (one API call to the LLM).
+
+**Attributes:**
+- `input_tokens` (int): Input tokens for this specific API call
+- `output_tokens` (int): Output tokens for this specific API call
+
+#### `_use_tools` Span
+Created when the agent uses tools (e.g., taking screenshots, clicking, typing).
+
+**Attributes (per tool use):**
+- `id_{n}` (str): Tool use block ID
+- `input_{n}` (str): JSON-encoded tool input parameters
+- `name_{n}` (str): Tool name (e.g., "computer", "bash")
+- `type_{n}` (str): Always "tool_use"
+- `caching_control_{n}` (str): Cache control settings
+
+Where `{n}` is the tool index (1, 2, 3, ...).
+
+#### `_call_on_message` Span
+Tracks callbacks for new messages from the assistant or user.
+
+#### `_handle_stop_reason` Span
+Handles conversation stop reasons (e.g., max_tokens, tool_use, end_turn).
+
+### Automatic Instrumentation
+
+When tracing is enabled, VisionAgent also automatically instruments:
+- **HTTPX**: All HTTP client requests (including Anthropic API calls)
 
 ## Usage
 

diff --git a/src/askui/agent_base.py b/src/askui/agent_base.py
@@ -18,6 +18,10 @@
 from askui.models.shared.settings import ActSettings, CachingSettings
 from askui.models.shared.tools import Tool, ToolCollection
 from askui.prompts.caching import CACHE_USE_PROMPT
+from askui.telemetry.otel import (
+    OtelSettings,
+    setup_opentelemetry_tracing_for_vision_agent,
+)
 from askui.tools.agent_os import AgentOs
 from askui.tools.android.agent_os import AndroidAgentOs
 from askui.tools.caching_tools import (
@@ -188,6 +192,7 @@ def act(
         tools: list[Tool] | ToolCollection | None = None,
         settings: ActSettings | None = None,
         caching_settings: CachingSettings | None = None,
+        tracing_settings: OtelSettings | None = None,
     ) -> None:
         """
         Instructs the agent to achieve a specified goal through autonomous actions.
@@ -213,6 +218,7 @@ def act(
                 sequences (trajectories). Available strategies: "no" (default, no
                 caching), "write" (record actions to cache file), "read" (replay from
                 cached trajectories), "both" (read and write). Defaults to no caching.
+            tracing_settings (OtelSettings | None, optional): Settings for tracing.
 
         Returns:
             None
@@ -314,6 +320,9 @@ def act(
         if cached_execution_tool:
             cached_execution_tool.set_toolbox(_tools)
 
+        if tracing_settings is not None:
+            setup_opentelemetry_tracing_for_vision_agent(tracing_settings)
+
         self._model_router.act(
             messages=messages,
             model=_model,

diff --git a/src/askui/models/anthropic/messages_api.py b/src/askui/models/anthropic/messages_api.py
@@ -67,7 +67,10 @@ def create_message(
         temperature: float | Omit = omit,
     ) -> MessageParam:
         _messages = [
-            cast("BetaMessageParam", message.model_dump(exclude={"stop_reason"}))
+            cast(
+                "BetaMessageParam",
+                message.model_dump(exclude={"stop_reason", "usage"}),
+            )
             for message in messages
         ]
         response = self._client.beta.messages.create(  # type: ignore[misc]

diff --git a/src/askui/models/shared/agent.py b/src/askui/models/shared/agent.py
@@ -1,10 +1,12 @@
+import json
 import logging
 
+from opentelemetry import context, trace
 from typing_extensions import override
 
 from askui.models.exceptions import MaxTokensExceededError, ModelRefusalError
 from askui.models.models import ActModel
-from askui.models.shared.agent_message_param import MessageParam
+from askui.models.shared.agent_message_param import MessageParam, UsageParam
 from askui.models.shared.agent_on_message_cb import (
     NULL_ON_MESSAGE_CB,
     OnMessageCb,
@@ -21,6 +23,7 @@
 from askui.reporting import NULL_REPORTER, Reporter
 
 logger = logging.getLogger(__name__)
+tracer = trace.get_tracer(__name__)
 
 
 class Agent(ActModel):
@@ -58,7 +61,8 @@ def _step(
         settings: ActSettings,
         tool_collection: ToolCollection,
         truncation_strategy: TruncationStrategy,
-    ) -> None:
+        accumulated_usage: UsageParam | None = None,
+    ) -> UsageParam:
         """Execute a single step in the conversation.
 
         If the last message is an assistant's message and does not contain tool use
@@ -72,10 +76,22 @@ def _step(
             tool_collection (ToolCollection): The tools to use for the step.
             truncation_strategy (TruncationStrategy): The truncation strategy to use
                 for the step.
+            accumulated_usage (UsageParam, optional): UsageParam to accumulate
+                token usage across steps.
 
         Returns:
-            None
+            UsageParam: Accumulated token usage with input_tokens and output_tokens.
         """
+        if accumulated_usage is None:
+            accumulated_usage = UsageParam(
+                input_tokens=0,
+                output_tokens=0,
+                cache_creation_input_tokens=0,
+                cache_read_input_tokens=0,
+            )
+        step_span = tracer.start_span("_step")
+        ctx = trace.set_span_in_context(step_span)
+        token = context.attach(ctx)
         if truncation_strategy.messages[-1].role == "user":
             response_message = self._messages_api.create_message(
                 messages=truncation_strategy.messages,
@@ -88,11 +104,48 @@ def _step(
                 tool_choice=settings.messages.tool_choice,
                 temperature=settings.messages.temperature,
             )
+            # Accumulate token usage
+            if response_message.usage:
+                accumulated_usage.input_tokens = (
+                    accumulated_usage.input_tokens or 0
+                ) + (response_message.usage.input_tokens or 0)
+                accumulated_usage.output_tokens = (
+                    accumulated_usage.output_tokens or 0
+                ) + (response_message.usage.output_tokens or 0)
+                accumulated_usage.cache_creation_input_tokens = (
+                    accumulated_usage.cache_creation_input_tokens or 0
+                ) + (response_message.usage.cache_creation_input_tokens or 0)
+                accumulated_usage.cache_read_input_tokens = (
+                    accumulated_usage.cache_read_input_tokens or 0
+                ) + (response_message.usage.cache_read_input_tokens or 0)
+
+            step_span.set_attributes(
+                {
+                    "cache_creation_input_tokens": (
+                        response_message.usage.cache_creation_input_tokens or 0
+                        if response_message.usage
+                        else 0
+                    ),
+                    "cache_read_input_tokens": (
+                        response_message.usage.cache_read_input_tokens or 0
+                        if response_message.usage
+                        else 0
+                    ),
+                    "input_tokens": response_message.usage.input_tokens or 0
+                    if response_message.usage
+                    else 0,
+                    "output_tokens": response_message.usage.output_tokens or 0
+                    if response_message.usage
+                    else 0,
+                }
+            )
             message_by_assistant = self._call_on_message(
                 on_message, response_message, truncation_strategy.messages
             )
             if message_by_assistant is None:
-                return
+                context.detach(token)
+                step_span.end()
+                return accumulated_usage
             message_by_assistant_dict = message_by_assistant.model_dump(mode="json")
             logger.debug(message_by_assistant_dict)
             truncation_strategy.append_message(message_by_assistant)
@@ -111,14 +164,21 @@ def _step(
                 tool_result_message_dict = tool_result_message.model_dump(mode="json")
                 logger.debug(tool_result_message_dict)
                 truncation_strategy.append_message(tool_result_message)
-                self._step(
+                context.detach(token)
+                step_span.end()
+                return self._step(
                     model=model,
                     tool_collection=tool_collection,
                     on_message=on_message,
                     settings=settings,
                     truncation_strategy=truncation_strategy,
+                    accumulated_usage=accumulated_usage,
                 )
+        context.detach(token)
+        step_span.end()
+        return accumulated_usage
 
+    @tracer.start_as_current_span("_call_on_message")
     def _call_on_message(
         self,
         on_message: OnMessageCb | None,
@@ -130,6 +190,7 @@ def _call_on_message(
         return on_message(OnMessageCbParam(message=message, messages=messages))
 
     @override
+    @tracer.start_as_current_span("act")
     def act(
         self,
         messages: list[MessageParam],
@@ -148,14 +209,27 @@ def act(
                 model=model,
             )
         )
-        self._step(
+        accumulated_usage = self._step(
             model=model,
             on_message=on_message or NULL_ON_MESSAGE_CB,
             settings=_settings,
             tool_collection=_tool_collection,
             truncation_strategy=truncation_strategy,
         )
+        current_span = trace.get_current_span()
+        current_span.set_attributes(
+            {
+                "input_tokens": accumulated_usage.input_tokens or 0,
+                "output_tokens": accumulated_usage.output_tokens or 0,
+                "cache_creation_input_tokens": (
+                    accumulated_usage.cache_creation_input_tokens or 0
+                ),
+                "cache_read_input_tokens": accumulated_usage.cache_read_input_tokens
+                or 0,
+            }
+        )
 
+    @tracer.start_as_current_span("_use_tools")
     def _use_tools(
         self,
         message: MessageParam,
@@ -178,6 +252,19 @@ def _use_tools(
             for content_block in message.content
             if content_block.type == "tool_use"
         ]
+
+        current_span = trace.get_current_span()
+        for idx, tool_use_block in enumerate(tool_use_content_blocks, 1):
+            current_span.set_attributes(
+                {
+                    f"id_{idx}": tool_use_block.id,
+                    f"input_{idx}": json.dumps(tool_use_block.input),
+                    f"name_{idx}": tool_use_block.name,
+                    f"type_{idx}": tool_use_block.type,
+                    f"caching_control_{idx}": str(tool_use_block.cache_control),
+                }
+            )
+
         content = tool_collection.run(tool_use_content_blocks)
         if len(content) == 0:
             return None
@@ -187,6 +274,7 @@ def _use_tools(
             role="user",
         )
 
+    @tracer.start_as_current_span("_handle_stop_reason")
     def _handle_stop_reason(self, message: MessageParam, max_tokens: int) -> None:
         if message.stop_reason == "max_tokens":
             raise MaxTokensExceededError(max_tokens)

diff --git a/src/askui/models/shared/agent_message_param.py b/src/askui/models/shared/agent_message_param.py
@@ -105,10 +105,18 @@ class BetaRedactedThinkingBlock(BaseModel):
 ]
 
 
+class UsageParam(BaseModel):
+    input_tokens: int | None = None
+    output_tokens: int | None = None
+    cache_creation_input_tokens: int | None = None
+    cache_read_input_tokens: int | None = None
+
+
 class MessageParam(BaseModel):
     role: Literal["user", "assistant"]
     content: str | list[ContentBlockParam]
     stop_reason: StopReason | None = None
+    usage: UsageParam | None = None
 
 
 __all__ = [