From 8467f3705db6b8b298658cb28264aa9f85940f4c Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 15 May 2026 20:02:57 +0000 Subject: [PATCH 1/5] Add conversation observability metadata Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 2 + .../openhands/sdk/conversation/base.py | 12 ++++- .../sdk/conversation/conversation.py | 11 +++++ .../conversation/impl/local_conversation.py | 11 ++++- .../conversation/impl/remote_conversation.py | 15 +++++- .../openhands/sdk/conversation/request.py | 16 ++++++- .../openhands/sdk/conversation/types.py | 48 ++++++++++++++++++- .../openhands/sdk/observability/laminar.py | 30 +++++++++--- openhands-sdk/openhands/sdk/settings/model.py | 15 ++++++ .../conversation/test_base_span_management.py | 30 +++++++++++- tests/sdk/conversation/test_tags.py | 38 +++++++++++++++ tests/sdk/observability/test_laminar.py | 28 +++++++++++ 12 files changed, 241 insertions(+), 15 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 79baab56ce..5c35db9efe 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -645,6 +645,8 @@ def _token_streaming_callback(chunk: LLMStreamChunk) -> None: cipher=self.cipher, hook_config=self.stored.hook_config, tags=self.stored.tags, + observability_metadata=self.stored.observability_metadata, + observability_tags=self.stored.observability_tags, ) conversation.set_confirmation_policy(self.stored.confirmation_policy) diff --git a/openhands-sdk/openhands/sdk/conversation/base.py b/openhands-sdk/openhands/sdk/conversation/base.py index 1e50239bd3..3900489af6 100644 --- a/openhands-sdk/openhands/sdk/conversation/base.py +++ b/openhands-sdk/openhands/sdk/conversation/base.py @@ -10,6 +10,7 @@ ConversationCallbackType, ConversationID, ConversationTokenCallbackType, + TraceMetadataValue, ) from openhands.sdk.llm.llm import LLM from openhands.sdk.llm.message import Message @@ -127,11 +128,18 @@ def __init__(self) -> None: # that constructed the conversation. self._observability_root_span: RootSpan | None = None - def _start_observability_span(self, session_id: str) -> None: + def _start_observability_span( + self, + session_id: str, + metadata: dict[str, TraceMetadataValue] | None = None, + tags: list[str] | None = None, + ) -> None: """Start a per-conversation observability root span. Args: session_id: The session ID to associate with the trace + metadata: Optional trace-level metadata to attach to observability backends + tags: Optional span tags to attach to the conversation root span """ if not should_enable_observability(): return @@ -139,7 +147,7 @@ def _start_observability_span(self, session_id: str) -> None: # Idempotent: never start two roots for one conversation. return self._observability_root_span = start_root_span( - "conversation", session_id=session_id + "conversation", session_id=session_id, metadata=metadata, tags=tags ) def _end_observability_span(self) -> None: diff --git a/openhands-sdk/openhands/sdk/conversation/conversation.py b/openhands-sdk/openhands/sdk/conversation/conversation.py index 0c9e51a386..0c536281f7 100644 --- a/openhands-sdk/openhands/sdk/conversation/conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/conversation.py @@ -9,6 +9,7 @@ ConversationID, ConversationTokenCallbackType, StuckDetectionThresholds, + TraceMetadataValue, ) from openhands.sdk.conversation.visualizer import ( ConversationVisualizerBase, @@ -80,6 +81,8 @@ def __new__( secrets: dict[str, SecretValue] | dict[str, str] | None = None, delete_on_close: bool = True, tags: dict[str, str] | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, ) -> "LocalConversation": ... @overload @@ -104,6 +107,8 @@ def __new__( secrets: dict[str, SecretValue] | dict[str, str] | None = None, delete_on_close: bool = True, tags: dict[str, str] | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, ) -> "RemoteConversation": ... def __new__( @@ -128,6 +133,8 @@ def __new__( secrets: dict[str, SecretValue] | dict[str, str] | None = None, delete_on_close: bool = True, tags: dict[str, str] | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, ) -> BaseConversation: from openhands.sdk.conversation.impl.local_conversation import LocalConversation from openhands.sdk.conversation.impl.remote_conversation import ( @@ -181,6 +188,8 @@ def __new__( secrets=secrets, delete_on_close=delete_on_close, tags=effective_tags if effective_tags else None, + observability_metadata=observability_metadata, + observability_tags=observability_tags, ) return LocalConversation( @@ -199,4 +208,6 @@ def __new__( secrets=secrets, delete_on_close=delete_on_close, tags=tags, + observability_metadata=observability_metadata, + observability_tags=observability_tags, ) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 0ab9dc1cb9..2a517e44f5 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -23,6 +23,7 @@ ConversationID, ConversationTokenCallbackType, StuckDetectionThresholds, + TraceMetadataValue, ) from openhands.sdk.conversation.visualizer import ( ConversationVisualizerBase, @@ -109,6 +110,8 @@ def __init__( delete_on_close: bool = True, cipher: Cipher | None = None, tags: dict[str, str] | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, **_: object, ): """Initialize the conversation. @@ -150,6 +153,8 @@ def __init__( (lost) on serialization. tags: Optional key-value tags for the conversation. Keys must be lowercase alphanumeric, values up to 256 characters. + observability_metadata: Optional trace metadata for observability backends. + observability_tags: Optional root span tags for observability backends. """ super().__init__() # Initialize with span tracking # Mark cleanup as initiated as early as possible to avoid races or partially @@ -275,7 +280,11 @@ def _default_callback(e): self.update_secrets(secret_values) atexit.register(self.close) - self._start_observability_span(str(desired_id)) + self._start_observability_span( + str(desired_id), + metadata=observability_metadata, + tags=observability_tags, + ) self.delete_on_close = delete_on_close @property diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py index 17bc680bb1..3f5d4a1ef5 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py @@ -32,6 +32,7 @@ ConversationCallbackType, ConversationID, StuckDetectionThresholds, + TraceMetadataValue, ) from openhands.sdk.conversation.visualizer import ( ConversationVisualizerBase, @@ -665,6 +666,8 @@ def __init__( secrets: Mapping[str, SecretValue] | None = None, delete_on_close: bool = False, tags: dict[str, str] | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, **_: object, ) -> None: """Remote conversation proxy that talks to an agent server. @@ -693,6 +696,8 @@ def __init__( secrets: Optional secrets to initialize the conversation with tags: Optional key-value tags for the conversation. Keys must be lowercase alphanumeric, values up to 256 characters. + observability_metadata: Optional trace metadata for observability backends. + observability_tags: Optional root span tags for observability backends. """ super().__init__() # Initialize base class with span tracking self.agent = agent @@ -757,8 +762,10 @@ def __init__( "plugins": [p.model_dump() for p in plugins] if plugins else None, # Include hook_config for server-side hooks "hook_config": hook_config.model_dump() if hook_config else None, - # Include tags if provided + # Include tags and observability metadata if provided "tags": tags or {}, + "observability_metadata": observability_metadata or {}, + "observability_tags": observability_tags or [], } if stuck_detection_thresholds is not None: # Convert to StuckDetectionThresholds if dict, then serialize @@ -882,7 +889,11 @@ def run_complete_callback(event: Event) -> None: secret_values: dict[str, SecretValue] = {k: v for k, v in secrets.items()} self.update_secrets(secret_values) - self._start_observability_span(str(self._id)) + self._start_observability_span( + str(self._id), + metadata=observability_metadata, + tags=observability_tags, + ) # All hooks (including SessionStart/SessionEnd) are executed server-side. # hook_config is sent in the creation payload. self.delete_on_close = delete_on_close diff --git a/openhands-sdk/openhands/sdk/conversation/request.py b/openhands-sdk/openhands/sdk/conversation/request.py index 2611845f2c..ff5d31cc43 100644 --- a/openhands-sdk/openhands/sdk/conversation/request.py +++ b/openhands-sdk/openhands/sdk/conversation/request.py @@ -16,7 +16,10 @@ from openhands.sdk.agent.acp_agent import ACPAgent as ACPAgent from openhands.sdk.agent.agent import Agent as Agent from openhands.sdk.agent.base import AgentBase -from openhands.sdk.conversation.types import ConversationTags +from openhands.sdk.conversation.types import ( + ConversationObservabilityMetadata, + ConversationTags, +) from openhands.sdk.hooks import HookConfig from openhands.sdk.llm.message import ImageContent, Message, TextContent from openhands.sdk.plugin import PluginSource @@ -171,6 +174,17 @@ class StartConversationRequest(BaseModel): "alphanumeric. Values are arbitrary strings up to 256 characters." ), ) + observability_metadata: ConversationObservabilityMetadata = Field( + default_factory=dict, + description=( + "Trace-level metadata to attach to observability backends. Values must " + "be scalars or homogeneous scalar lists supported by OpenTelemetry." + ), + ) + observability_tags: list[str] = Field( + default_factory=list, + description="Tags to attach to the conversation root observability span.", + ) autotitle: bool = Field( default=True, description=( diff --git a/openhands-sdk/openhands/sdk/conversation/types.py b/openhands-sdk/openhands/sdk/conversation/types.py index a2b8b7fddb..884cb4a35f 100644 --- a/openhands-sdk/openhands/sdk/conversation/types.py +++ b/openhands-sdk/openhands/sdk/conversation/types.py @@ -1,6 +1,6 @@ import re import uuid -from collections.abc import Callable +from collections.abc import Callable, Sequence from typing import Annotated from pydantic import BaseModel, BeforeValidator, Field @@ -44,6 +44,52 @@ def _validate_tags(v: dict[str, str] | None) -> dict[str, str]: Keys must be lowercase alphanumeric. Values are arbitrary strings up to 256 chars. """ +type TraceMetadataValue = ( + str + | bool + | int + | float + | Sequence[str] + | Sequence[bool] + | Sequence[int] + | Sequence[float] +) + + +def _validate_observability_metadata( + v: dict[str, TraceMetadataValue] | None, +) -> dict[str, TraceMetadataValue]: + if v is None: + return {} + for key, value in v.items(): + if not isinstance(key, str) or not key: + raise ValueError("Observability metadata keys must be non-empty strings") + if isinstance(value, str | bool | int | float): + continue + if isinstance(value, Sequence) and not isinstance(value, bytes | bytearray): + if all(isinstance(item, str) for item in value): + continue + if all(isinstance(item, bool) for item in value): + continue + if all( + isinstance(item, int) and not isinstance(item, bool) for item in value + ): + continue + if all(isinstance(item, float) for item in value): + continue + raise ValueError( + f"Observability metadata value for '{key}' must be a scalar " + "or a sequence of strings, booleans, integers, or floats" + ) + return v + + +ConversationObservabilityMetadata = Annotated[ + dict[str, TraceMetadataValue], + BeforeValidator(_validate_observability_metadata), +] +"""Validated dict of Laminar/OTel trace metadata for a conversation.""" + class StuckDetectionThresholds(BaseModel): """Configuration for stuck detection thresholds. diff --git a/openhands-sdk/openhands/sdk/observability/laminar.py b/openhands-sdk/openhands/sdk/observability/laminar.py index 18b2e5b017..e846ee4f8b 100644 --- a/openhands-sdk/openhands/sdk/observability/laminar.py +++ b/openhands-sdk/openhands/sdk/observability/laminar.py @@ -248,18 +248,29 @@ class RootSpan: traces with no ``session_id``), so we switched to the recommended pattern. """ - def __init__(self, name: str, session_id: str | None = None) -> None: + def __init__( + self, + name: str, + session_id: str | None = None, + metadata: dict[str, Any] | None = None, + tags: list[str] | None = None, + ) -> None: from lmnr import Laminar # ``start_span`` returns a span without attaching it as the current # OTel context; we'll restore it on every entry point via ``use_span``. self.span = Laminar.start_span(name) - if session_id: - # ``set_trace_session_id`` requires an active span; briefly enter - # the span context to apply the session id to the trace metadata. + if session_id or metadata or tags: + # These trace/span helpers require an active span; briefly enter + # the span context to apply conversation-level observability data. with contextlib.suppress(Exception): with Laminar.use_span(self.span): - Laminar.set_trace_session_id(session_id) + if session_id: + Laminar.set_trace_session_id(session_id) + if metadata: + Laminar.set_trace_metadata(metadata) + if tags: + Laminar.set_span_tags(tags) self._ended = False def end(self) -> None: @@ -273,7 +284,12 @@ def end(self) -> None: logger.debug("Error ending observability root span", exc_info=True) -def start_root_span(name: str, session_id: str | None = None) -> RootSpan | None: +def start_root_span( + name: str, + session_id: str | None = None, + metadata: dict[str, Any] | None = None, + tags: list[str] | None = None, +) -> RootSpan | None: """Create a long-lived root span for an owning object. Returns ``None`` if observability is not enabled. @@ -281,7 +297,7 @@ def start_root_span(name: str, session_id: str | None = None) -> RootSpan | None if not should_enable_observability(): return None try: - return RootSpan(name, session_id=session_id) + return RootSpan(name, session_id=session_id, metadata=metadata, tags=tags) except Exception: logger.debug("Failed to create observability root span", exc_info=True) return None diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 6fff5b7f45..bdd3845887 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -33,6 +33,7 @@ from openhands.sdk.context.agent_context import AgentContext from openhands.sdk.conversation.request import SendMessageRequest +from openhands.sdk.conversation.types import TraceMetadataValue from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM from openhands.sdk.logger import get_logger @@ -530,6 +531,16 @@ class ConversationSettings(BaseModel): exclude=True, description="Repository selected for the conversation.", ) + observability_metadata: dict[str, TraceMetadataValue] | None = Field( + default=None, + exclude=True, + description="Trace-level metadata for observability backends.", + ) + observability_tags: list[str] | None = Field( + default=None, + exclude=True, + description="Tags for the conversation root observability span.", + ) # --- persisted fields --------------------------------------------------- max_iterations: int = Field( @@ -650,6 +661,10 @@ def _start_request_kwargs(self, **kwargs: Any) -> dict[str, Any]: payload.setdefault("plugins", self.plugins) if self.hook_config is not None: payload.setdefault("hook_config", self.hook_config) + if self.observability_metadata is not None: + payload.setdefault("observability_metadata", self.observability_metadata) + if self.observability_tags is not None: + payload.setdefault("observability_tags", self.observability_tags) # --- persisted defaults --------------------------------------------- payload.setdefault("confirmation_policy", self._build_confirmation_policy()) diff --git a/tests/sdk/conversation/test_base_span_management.py b/tests/sdk/conversation/test_base_span_management.py index e70e13d7c8..f6b7b249ca 100644 --- a/tests/sdk/conversation/test_base_span_management.py +++ b/tests/sdk/conversation/test_base_span_management.py @@ -94,7 +94,10 @@ def test_base_conversation_span_management(): # Start span conversation._start_observability_span("test-session-id") mock_start_span.assert_called_once_with( - "conversation", session_id="test-session-id" + "conversation", + session_id="test-session-id", + metadata=None, + tags=None, ) assert conversation._span_ended is False assert conversation._observability_root_span is fake_root @@ -116,6 +119,31 @@ def test_base_conversation_span_management(): assert conversation._span_ended is True +def test_base_conversation_passes_observability_metadata(): + conversation = MockConversation() + + with ( + patch( + "openhands.sdk.conversation.base.should_enable_observability", + return_value=True, + ), + patch("openhands.sdk.conversation.base.start_root_span") as mock_start_span, + ): + metadata = {"repo_name": "OpenHands/software-agent-sdk"} + tags = ["repo:OpenHands/software-agent-sdk"] + + conversation._start_observability_span( + "test-session-id", metadata=metadata, tags=tags + ) + + mock_start_span.assert_called_once_with( + "conversation", + session_id="test-session-id", + metadata=metadata, + tags=tags, + ) + + def test_base_conversation_span_management_disabled(): """Test that BaseConversation doesn't perform span operations when observability is disabled.""" # noqa: E501 diff --git a/tests/sdk/conversation/test_tags.py b/tests/sdk/conversation/test_tags.py index a38b379e3f..8662588a55 100644 --- a/tests/sdk/conversation/test_tags.py +++ b/tests/sdk/conversation/test_tags.py @@ -5,7 +5,9 @@ from openhands.sdk.conversation.types import ( TAG_VALUE_MAX_LENGTH, + ConversationObservabilityMetadata, ConversationTags, + _validate_observability_metadata, _validate_tags, ) @@ -84,3 +86,39 @@ class TestModel(BaseModel): # Invalid key rejected with pytest.raises(ValidationError): TestModel(tags={"BAD": "value"}) + + +def test_validate_observability_metadata_valid(): + metadata = { + "repo_name": "OpenHands/software-agent-sdk", + "private": True, + "retry_count": 3, + "cost": 1.5, + "labels": ["repo", "cloud"], + "flags": [True, False], + "counts": [1, 2], + "scores": [0.1, 0.2], + } + + assert _validate_observability_metadata(metadata) == metadata + + +def test_validate_observability_metadata_rejects_nested_values(): + with pytest.raises(ValueError, match="must be a scalar"): + _validate_observability_metadata({"repo": {"name": "openhands"}}) # type: ignore[dict-item] + + +def test_observability_metadata_in_pydantic_model(): + from pydantic import BaseModel + + class TestModel(BaseModel): + observability_metadata: ConversationObservabilityMetadata = {} + + m = TestModel(observability_metadata={"repo_name": "OpenHands/OpenHands"}) + assert m.observability_metadata == {"repo_name": "OpenHands/OpenHands"} + + m = TestModel.model_validate({"observability_metadata": None}) + assert m.observability_metadata == {} + + with pytest.raises(ValidationError): + TestModel(observability_metadata={"nested": {"bad": True}}) diff --git a/tests/sdk/observability/test_laminar.py b/tests/sdk/observability/test_laminar.py index bb22d736a1..7f125ed474 100644 --- a/tests/sdk/observability/test_laminar.py +++ b/tests/sdk/observability/test_laminar.py @@ -438,6 +438,34 @@ def contextlib_compat(): return contextlib.contextmanager +def test_root_span_sets_trace_metadata_and_tags(): + from openhands.sdk.observability.laminar import RootSpan + + fake_span = MagicMock() + fake_context = MagicMock() + + with patch("lmnr.Laminar") as mock_laminar: + mock_laminar.start_span.return_value = fake_span + mock_laminar.use_span.return_value.__enter__.return_value = fake_context + + RootSpan( + "conversation", + session_id="session-1", + metadata={"repo_name": "OpenHands/software-agent-sdk"}, + tags=["repo:OpenHands/software-agent-sdk"], + ) + + mock_laminar.start_span.assert_called_once_with("conversation") + mock_laminar.use_span.assert_called_once_with(fake_span) + mock_laminar.set_trace_session_id.assert_called_once_with("session-1") + mock_laminar.set_trace_metadata.assert_called_once_with( + {"repo_name": "OpenHands/software-agent-sdk"} + ) + mock_laminar.set_span_tags.assert_called_once_with( + ["repo:OpenHands/software-agent-sdk"] + ) + + def test_deprecated_shims_emit_warnings(): """The legacy global-stack API must emit DeprecationWarning so external callers (none found in the org-wide audit, but still) are alerted before From 9d8db8f72fe4057d590e57da97f0f56c2f950176 Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 16:39:18 -0400 Subject: [PATCH 2/5] test(observability): satisfy pyright for metadata tests --- tests/sdk/conversation/test_base_span_management.py | 5 ++++- tests/sdk/conversation/test_tags.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/sdk/conversation/test_base_span_management.py b/tests/sdk/conversation/test_base_span_management.py index afb3353b70..7cf66fd08f 100644 --- a/tests/sdk/conversation/test_base_span_management.py +++ b/tests/sdk/conversation/test_base_span_management.py @@ -7,6 +7,7 @@ from openhands.sdk.conversation.base import BaseConversation from openhands.sdk.conversation.conversation_stats import ConversationStats +from openhands.sdk.conversation.types import TraceMetadataValue from openhands.sdk.llm.llm import LLM from openhands.sdk.tool.schema import Action, Observation @@ -130,7 +131,9 @@ def test_base_conversation_passes_observability_metadata(): ), patch("openhands.sdk.conversation.base.start_root_span") as mock_start_span, ): - metadata = {"repo_name": "OpenHands/software-agent-sdk"} + metadata: dict[str, TraceMetadataValue] = { + "repo_name": "OpenHands/software-agent-sdk" + } tags = ["repo:OpenHands/software-agent-sdk"] conversation._start_observability_span( diff --git a/tests/sdk/conversation/test_tags.py b/tests/sdk/conversation/test_tags.py index 8662588a55..2efbe558a7 100644 --- a/tests/sdk/conversation/test_tags.py +++ b/tests/sdk/conversation/test_tags.py @@ -121,4 +121,4 @@ class TestModel(BaseModel): assert m.observability_metadata == {} with pytest.raises(ValidationError): - TestModel(observability_metadata={"nested": {"bad": True}}) + TestModel.model_validate({"observability_metadata": {"nested": {"bad": True}}}) From 015b17cb7322cc85cb10a2d3416dedecf48b57ec Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 27 May 2026 04:32:08 +0000 Subject: [PATCH 3/5] chore: address PR review feedback (#3270) Co-authored-by: openhands --- openhands-sdk/openhands/sdk/conversation/types.py | 6 ++++-- openhands-sdk/openhands/sdk/settings/model.py | 7 +++++-- tests/sdk/conversation/test_tags.py | 5 +++++ tests/sdk/test_settings.py | 13 ++++++++++++- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/types.py b/openhands-sdk/openhands/sdk/conversation/types.py index 884cb4a35f..5a5ab485b0 100644 --- a/openhands-sdk/openhands/sdk/conversation/types.py +++ b/openhands-sdk/openhands/sdk/conversation/types.py @@ -1,7 +1,7 @@ import re import uuid from collections.abc import Callable, Sequence -from typing import Annotated +from typing import Annotated, Any from pydantic import BaseModel, BeforeValidator, Field @@ -57,10 +57,12 @@ def _validate_tags(v: dict[str, str] | None) -> dict[str, str]: def _validate_observability_metadata( - v: dict[str, TraceMetadataValue] | None, + v: Any, ) -> dict[str, TraceMetadataValue]: if v is None: return {} + if not isinstance(v, dict): + raise ValueError("Observability metadata must be a dictionary") for key, value in v.items(): if not isinstance(key, str) or not key: raise ValueError("Observability metadata keys must be non-empty strings") diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index ef48c7e4bc..ee13a816e4 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -33,7 +33,10 @@ from openhands.sdk.context.agent_context import AgentContext from openhands.sdk.conversation.request import SendMessageRequest -from openhands.sdk.conversation.types import TraceMetadataValue +from openhands.sdk.conversation.types import ( + ConversationObservabilityMetadata, + TraceMetadataValue, +) from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM from openhands.sdk.logger import get_logger @@ -531,7 +534,7 @@ class ConversationSettings(BaseModel): exclude=True, description="Repository selected for the conversation.", ) - observability_metadata: dict[str, TraceMetadataValue] | None = Field( + observability_metadata: ConversationObservabilityMetadata | None = Field( default=None, exclude=True, description="Trace-level metadata for observability backends.", diff --git a/tests/sdk/conversation/test_tags.py b/tests/sdk/conversation/test_tags.py index 2efbe558a7..7b05f4cfca 100644 --- a/tests/sdk/conversation/test_tags.py +++ b/tests/sdk/conversation/test_tags.py @@ -103,6 +103,11 @@ def test_validate_observability_metadata_valid(): assert _validate_observability_metadata(metadata) == metadata +def test_validate_observability_metadata_rejects_non_dict(): + with pytest.raises(ValueError, match="must be a dictionary"): + _validate_observability_metadata([]) + + def test_validate_observability_metadata_rejects_nested_values(): with pytest.raises(ValueError, match="must be a scalar"): _validate_observability_metadata({"repo": {"name": "openhands"}}) # type: ignore[dict-item] diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index ff27ca5a1b..9cc511532d 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -3,7 +3,7 @@ import pytest from fastmcp.mcp_config import MCPConfig -from pydantic import SecretStr +from pydantic import SecretStr, ValidationError from openhands.agent_server.models import StartConversationRequest from openhands.sdk import ( @@ -175,6 +175,17 @@ def test_conversation_settings_export_schema_groups_sections() -> None: assert verification_fields["security_analyzer"].depends_on == ["confirmation_mode"] +def test_conversation_settings_validates_observability_metadata() -> None: + settings = ConversationSettings(observability_metadata={"repo": "OpenHands/sdk"}) + assert settings.observability_metadata == {"repo": "OpenHands/sdk"} + + with pytest.raises(ValidationError): + ConversationSettings(observability_metadata={"": "missing-key"}) + + with pytest.raises(ValidationError): + ConversationSettings(observability_metadata=[]) # type: ignore[arg-type] + + def test_conversation_settings_model_dump_roundtrip() -> None: settings = ConversationSettings( max_iterations=42, From dd37fffb334201c7a33493910b1992eb47298fa6 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 27 May 2026 04:36:38 +0000 Subject: [PATCH 4/5] fix: remove unused metadata type import Co-authored-by: openhands --- openhands-sdk/openhands/sdk/settings/model.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index ee13a816e4..7d82039d6a 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -33,10 +33,7 @@ from openhands.sdk.context.agent_context import AgentContext from openhands.sdk.conversation.request import SendMessageRequest -from openhands.sdk.conversation.types import ( - ConversationObservabilityMetadata, - TraceMetadataValue, -) +from openhands.sdk.conversation.types import ConversationObservabilityMetadata from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM from openhands.sdk.logger import get_logger From 725d8f5bfab6e37d54c3f7edd47cd33fde8609d7 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 27 May 2026 04:48:02 +0000 Subject: [PATCH 5/5] fix: make observability validation API-safe Co-authored-by: openhands --- .../openhands/agent_server/api.py | 4 +++ .../openhands/sdk/conversation/request.py | 3 ++- .../openhands/sdk/conversation/types.py | 17 ++++++++++++ openhands-sdk/openhands/sdk/settings/model.py | 7 +++-- .../test_validation_error_sanitization.py | 14 ++++++++++ .../remote/test_remote_conversation.py | 26 +++++++++++++++++++ tests/sdk/conversation/test_tags.py | 17 ++++++++++++ tests/sdk/test_settings.py | 3 +++ 8 files changed, 88 insertions(+), 3 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py index 070080ed32..d8221c50db 100644 --- a/openhands-agent-server/openhands/agent_server/api.py +++ b/openhands-agent-server/openhands/agent_server/api.py @@ -374,6 +374,10 @@ def _sanitize_validation_errors(errors: Sequence[Any]) -> list[dict]: error = dict(error) # shallow copy so we don't mutate the original if "input" in error: error["input"] = sanitize_dict(error["input"]) + if isinstance(error.get("ctx"), dict) and isinstance( + error["ctx"].get("error"), Exception + ): + error["ctx"] = {**error["ctx"], "error": str(error["ctx"]["error"])} sanitized.append(error) return sanitized diff --git a/openhands-sdk/openhands/sdk/conversation/request.py b/openhands-sdk/openhands/sdk/conversation/request.py index b6ccfcc346..402ad5efa0 100644 --- a/openhands-sdk/openhands/sdk/conversation/request.py +++ b/openhands-sdk/openhands/sdk/conversation/request.py @@ -18,6 +18,7 @@ from openhands.sdk.agent.base import AgentBase from openhands.sdk.conversation.types import ( ConversationObservabilityMetadata, + ConversationObservabilityTags, ConversationTags, ) from openhands.sdk.hooks import HookConfig @@ -189,7 +190,7 @@ class StartConversationRequest(BaseModel): "be scalars or homogeneous scalar lists supported by OpenTelemetry." ), ) - observability_tags: list[str] = Field( + observability_tags: ConversationObservabilityTags = Field( default_factory=list, description="Tags to attach to the conversation root observability span.", ) diff --git a/openhands-sdk/openhands/sdk/conversation/types.py b/openhands-sdk/openhands/sdk/conversation/types.py index 5a5ab485b0..39036a1293 100644 --- a/openhands-sdk/openhands/sdk/conversation/types.py +++ b/openhands-sdk/openhands/sdk/conversation/types.py @@ -93,6 +93,23 @@ def _validate_observability_metadata( """Validated dict of Laminar/OTel trace metadata for a conversation.""" +def _validate_observability_tags(v: Any) -> list[str]: + if v is None: + return [] + if not isinstance(v, list): + raise ValueError("Observability tags must be a list") + if not all(isinstance(tag, str) for tag in v): + raise ValueError("Observability tags must contain only strings") + return v + + +ConversationObservabilityTags = Annotated[ + list[str], + BeforeValidator(_validate_observability_tags), +] +"""Validated list of Laminar/OTel span tags for a conversation.""" + + class StuckDetectionThresholds(BaseModel): """Configuration for stuck detection thresholds. diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 7d82039d6a..86b953a653 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -33,7 +33,10 @@ from openhands.sdk.context.agent_context import AgentContext from openhands.sdk.conversation.request import SendMessageRequest -from openhands.sdk.conversation.types import ConversationObservabilityMetadata +from openhands.sdk.conversation.types import ( + ConversationObservabilityMetadata, + ConversationObservabilityTags, +) from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM from openhands.sdk.logger import get_logger @@ -536,7 +539,7 @@ class ConversationSettings(BaseModel): exclude=True, description="Trace-level metadata for observability backends.", ) - observability_tags: list[str] | None = Field( + observability_tags: ConversationObservabilityTags | None = Field( default=None, exclude=True, description="Tags for the conversation root observability span.", diff --git a/tests/agent_server/test_validation_error_sanitization.py b/tests/agent_server/test_validation_error_sanitization.py index fa5af7c9e2..38d577e343 100644 --- a/tests/agent_server/test_validation_error_sanitization.py +++ b/tests/agent_server/test_validation_error_sanitization.py @@ -165,6 +165,20 @@ def test_redacts_multiple_secret_patterns(self): assert inp["x_session_id"] == "" assert inp["name"] == "safe_value" + def test_stringifies_value_error_context(self): + """ValueError in ctx should not break JSONResponse rendering.""" + errors = [ + { + "type": "value_error", + "loc": ["body", "observability_metadata"], + "msg": "Value error, bad metadata", + "input": {"nested": {"bad": True}}, + "ctx": {"error": ValueError("bad metadata")}, + } + ] + result = _sanitize_validation_errors(errors) + assert result[0]["ctx"]["error"] == "bad metadata" + def test_empty_errors_list(self): """An empty error list should return an empty list.""" assert _sanitize_validation_errors([]) == [] diff --git a/tests/sdk/conversation/remote/test_remote_conversation.py b/tests/sdk/conversation/remote/test_remote_conversation.py index 4a4de21c81..761d573dd6 100644 --- a/tests/sdk/conversation/remote/test_remote_conversation.py +++ b/tests/sdk/conversation/remote/test_remote_conversation.py @@ -173,6 +173,32 @@ def test_remote_conversation_initialization_new_conversation(self, mock_ws_clien "to fetch initial events" ) + @patch( + "openhands.sdk.conversation.impl.remote_conversation.WebSocketCallbackClient" + ) + def test_remote_conversation_sends_observability_fields(self, mock_ws_client): + conversation_id = str(uuid.uuid4()) + mock_client_instance = self.setup_mock_client(conversation_id=conversation_id) + mock_ws_client.return_value = Mock() + + RemoteConversation( + agent=self.agent, + workspace=self.workspace, + observability_metadata={"repo": "OpenHands/software-agent-sdk"}, + observability_tags=["sdk", "remote"], + ) + + create_call = next( + call + for call in mock_client_instance.request.call_args_list + if call[0][0] == "POST" and call[0][1] == "/api/conversations" + ) + payload = create_call.kwargs["json"] + assert payload["observability_metadata"] == { + "repo": "OpenHands/software-agent-sdk" + } + assert payload["observability_tags"] == ["sdk", "remote"] + @patch( "openhands.sdk.conversation.impl.remote_conversation.WebSocketCallbackClient" ) diff --git a/tests/sdk/conversation/test_tags.py b/tests/sdk/conversation/test_tags.py index 7b05f4cfca..eba6ba8964 100644 --- a/tests/sdk/conversation/test_tags.py +++ b/tests/sdk/conversation/test_tags.py @@ -6,6 +6,7 @@ from openhands.sdk.conversation.types import ( TAG_VALUE_MAX_LENGTH, ConversationObservabilityMetadata, + ConversationObservabilityTags, ConversationTags, _validate_observability_metadata, _validate_tags, @@ -127,3 +128,19 @@ class TestModel(BaseModel): with pytest.raises(ValidationError): TestModel.model_validate({"observability_metadata": {"nested": {"bad": True}}}) + + +def test_observability_tags_in_pydantic_model(): + from pydantic import BaseModel + + class TestModel(BaseModel): + observability_tags: ConversationObservabilityTags = [] + + m = TestModel(observability_tags=["repo", "cloud"]) + assert m.observability_tags == ["repo", "cloud"] + + m = TestModel.model_validate({"observability_tags": None}) + assert m.observability_tags == [] + + with pytest.raises(ValidationError): + TestModel.model_validate({"observability_tags": [1, 2]}) diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 9cc511532d..68aab573dd 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -185,6 +185,9 @@ def test_conversation_settings_validates_observability_metadata() -> None: with pytest.raises(ValidationError): ConversationSettings(observability_metadata=[]) # type: ignore[arg-type] + with pytest.raises(ValidationError): + ConversationSettings(observability_tags=[1, 2]) # type: ignore[list-item] + def test_conversation_settings_model_dump_roundtrip() -> None: settings = ConversationSettings(