diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py index 070080ed32..d8221c50db 100644 --- a/openhands-agent-server/openhands/agent_server/api.py +++ b/openhands-agent-server/openhands/agent_server/api.py @@ -374,6 +374,10 @@ def _sanitize_validation_errors(errors: Sequence[Any]) -> list[dict]: error = dict(error) # shallow copy so we don't mutate the original if "input" in error: error["input"] = sanitize_dict(error["input"]) + if isinstance(error.get("ctx"), dict) and isinstance( + error["ctx"].get("error"), Exception + ): + error["ctx"] = {**error["ctx"], "error": str(error["ctx"]["error"])} sanitized.append(error) return sanitized diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 0a5a501008..3e2135d72a 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -676,6 +676,8 @@ def _token_streaming_callback(chunk: LLMStreamChunk) -> None: hook_config=self.stored.hook_config, tags=self.stored.tags, user_id=self.stored.user_id, + observability_metadata=self.stored.observability_metadata, + observability_tags=self.stored.observability_tags, ) conversation.set_confirmation_policy(self.stored.confirmation_policy) diff --git a/openhands-sdk/openhands/sdk/conversation/base.py b/openhands-sdk/openhands/sdk/conversation/base.py index c6f56da5ee..1a400356f6 100644 --- a/openhands-sdk/openhands/sdk/conversation/base.py +++ b/openhands-sdk/openhands/sdk/conversation/base.py @@ -10,6 +10,7 @@ ConversationCallbackType, ConversationID, ConversationTokenCallbackType, + TraceMetadataValue, ) from openhands.sdk.llm.llm import LLM from openhands.sdk.llm.message import Message @@ -128,13 +129,19 @@ def __init__(self) -> None: self._observability_root_span: RootSpan | None = None def _start_observability_span( - self, session_id: str, user_id: str | None = None + self, + session_id: str, + user_id: str | None = None, + metadata: dict[str, TraceMetadataValue] | None = None, + tags: list[str] | None = None, ) -> None: """Start a per-conversation observability root span. Args: session_id: The session ID to associate with the trace user_id: Optional user ID to associate with the trace + metadata: Optional trace-level metadata to attach to observability backends + tags: Optional span tags to attach to the conversation root span """ if not should_enable_observability(): return @@ -142,7 +149,11 @@ def _start_observability_span( # Idempotent: never start two roots for one conversation. return self._observability_root_span = start_root_span( - "conversation", session_id=session_id, user_id=user_id + "conversation", + session_id=session_id, + user_id=user_id, + metadata=metadata, + tags=tags, ) def _end_observability_span(self) -> None: diff --git a/openhands-sdk/openhands/sdk/conversation/conversation.py b/openhands-sdk/openhands/sdk/conversation/conversation.py index 3c298fa8c7..778bab4b4e 100644 --- a/openhands-sdk/openhands/sdk/conversation/conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/conversation.py @@ -9,6 +9,7 @@ ConversationID, ConversationTokenCallbackType, StuckDetectionThresholds, + TraceMetadataValue, ) from openhands.sdk.conversation.visualizer import ( ConversationVisualizerBase, @@ -81,6 +82,8 @@ def __new__( delete_on_close: bool = True, tags: dict[str, str] | None = None, user_id: str | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, ) -> "LocalConversation": ... @overload @@ -106,6 +109,8 @@ def __new__( delete_on_close: bool = True, tags: dict[str, str] | None = None, user_id: str | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, ) -> "RemoteConversation": ... def __new__( @@ -131,6 +136,8 @@ def __new__( delete_on_close: bool = True, tags: dict[str, str] | None = None, user_id: str | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, ) -> BaseConversation: from openhands.sdk.conversation.impl.local_conversation import LocalConversation from openhands.sdk.conversation.impl.remote_conversation import ( @@ -185,6 +192,8 @@ def __new__( delete_on_close=delete_on_close, tags=effective_tags if effective_tags else None, user_id=user_id, + observability_metadata=observability_metadata, + observability_tags=observability_tags, ) return LocalConversation( @@ -204,4 +213,6 @@ def __new__( delete_on_close=delete_on_close, tags=tags, user_id=user_id, + observability_metadata=observability_metadata, + observability_tags=observability_tags, ) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index a6bb1ec60e..a0341ab6f0 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -25,6 +25,7 @@ ConversationID, ConversationTokenCallbackType, StuckDetectionThresholds, + TraceMetadataValue, ) from openhands.sdk.conversation.visualizer import ( ConversationVisualizerBase, @@ -116,6 +117,8 @@ def __init__( cipher: Cipher | None = None, tags: dict[str, str] | None = None, user_id: str | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, **_: object, ): """Initialize the conversation. @@ -157,6 +160,9 @@ def __init__( (lost) on serialization. tags: Optional key-value tags for the conversation. Keys must be lowercase alphanumeric, values up to 256 characters. + user_id: Optional user ID to associate with observability traces. + observability_metadata: Optional trace metadata for observability backends. + observability_tags: Optional root span tags for observability backends. """ super().__init__() # Initialize with span tracking # Mark cleanup as initiated as early as possible to avoid races or partially @@ -284,7 +290,12 @@ def _default_callback(e): self.update_secrets(secret_values) atexit.register(self.close) - self._start_observability_span(str(desired_id), user_id=user_id) + self._start_observability_span( + str(desired_id), + user_id=user_id, + metadata=observability_metadata, + tags=observability_tags, + ) self.delete_on_close = delete_on_close @property diff --git a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py index 4b40f357c9..f4db95b113 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py @@ -32,6 +32,7 @@ ConversationCallbackType, ConversationID, StuckDetectionThresholds, + TraceMetadataValue, ) from openhands.sdk.conversation.visualizer import ( ConversationVisualizerBase, @@ -666,6 +667,8 @@ def __init__( delete_on_close: bool = False, tags: dict[str, str] | None = None, user_id: str | None = None, + observability_metadata: dict[str, TraceMetadataValue] | None = None, + observability_tags: list[str] | None = None, **_: object, ) -> None: """Remote conversation proxy that talks to an agent server. @@ -695,6 +698,8 @@ def __init__( tags: Optional key-value tags for the conversation. Keys must be lowercase alphanumeric, values up to 256 characters. user_id: Optional user ID to associate with observability traces + observability_metadata: Optional trace metadata for observability backends. + observability_tags: Optional root span tags for observability backends. """ super().__init__() # Initialize base class with span tracking self.agent = agent @@ -759,8 +764,10 @@ def __init__( "plugins": [p.model_dump() for p in plugins] if plugins else None, # Include hook_config for server-side hooks "hook_config": hook_config.model_dump() if hook_config else None, - # Include tags if provided + # Include tags and observability metadata if provided "tags": tags or {}, + "observability_metadata": observability_metadata or {}, + "observability_tags": observability_tags or [], } if stuck_detection_thresholds is not None: # Convert to StuckDetectionThresholds if dict, then serialize @@ -884,7 +891,12 @@ def run_complete_callback(event: Event) -> None: secret_values: dict[str, SecretValue] = {k: v for k, v in secrets.items()} self.update_secrets(secret_values) - self._start_observability_span(str(self._id), user_id=user_id) + self._start_observability_span( + str(self._id), + user_id=user_id, + metadata=observability_metadata, + tags=observability_tags, + ) # All hooks (including SessionStart/SessionEnd) are executed server-side. # hook_config is sent in the creation payload. self.delete_on_close = delete_on_close diff --git a/openhands-sdk/openhands/sdk/conversation/request.py b/openhands-sdk/openhands/sdk/conversation/request.py index 1cd8266925..402ad5efa0 100644 --- a/openhands-sdk/openhands/sdk/conversation/request.py +++ b/openhands-sdk/openhands/sdk/conversation/request.py @@ -16,7 +16,11 @@ from openhands.sdk.agent.acp_agent import ACPAgent as ACPAgent from openhands.sdk.agent.agent import Agent as Agent from openhands.sdk.agent.base import AgentBase -from openhands.sdk.conversation.types import ConversationTags +from openhands.sdk.conversation.types import ( + ConversationObservabilityMetadata, + ConversationObservabilityTags, + ConversationTags, +) from openhands.sdk.hooks import HookConfig from openhands.sdk.llm.message import ImageContent, Message, TextContent from openhands.sdk.plugin import PluginSource @@ -179,6 +183,17 @@ class StartConversationRequest(BaseModel): "traces can be queried by user." ), ) + observability_metadata: ConversationObservabilityMetadata = Field( + default_factory=dict, + description=( + "Trace-level metadata to attach to observability backends. Values must " + "be scalars or homogeneous scalar lists supported by OpenTelemetry." + ), + ) + observability_tags: ConversationObservabilityTags = Field( + default_factory=list, + description="Tags to attach to the conversation root observability span.", + ) autotitle: bool = Field( default=True, description=( diff --git a/openhands-sdk/openhands/sdk/conversation/types.py b/openhands-sdk/openhands/sdk/conversation/types.py index a2b8b7fddb..39036a1293 100644 --- a/openhands-sdk/openhands/sdk/conversation/types.py +++ b/openhands-sdk/openhands/sdk/conversation/types.py @@ -1,7 +1,7 @@ import re import uuid -from collections.abc import Callable -from typing import Annotated +from collections.abc import Callable, Sequence +from typing import Annotated, Any from pydantic import BaseModel, BeforeValidator, Field @@ -44,6 +44,71 @@ def _validate_tags(v: dict[str, str] | None) -> dict[str, str]: Keys must be lowercase alphanumeric. Values are arbitrary strings up to 256 chars. """ +type TraceMetadataValue = ( + str + | bool + | int + | float + | Sequence[str] + | Sequence[bool] + | Sequence[int] + | Sequence[float] +) + + +def _validate_observability_metadata( + v: Any, +) -> dict[str, TraceMetadataValue]: + if v is None: + return {} + if not isinstance(v, dict): + raise ValueError("Observability metadata must be a dictionary") + for key, value in v.items(): + if not isinstance(key, str) or not key: + raise ValueError("Observability metadata keys must be non-empty strings") + if isinstance(value, str | bool | int | float): + continue + if isinstance(value, Sequence) and not isinstance(value, bytes | bytearray): + if all(isinstance(item, str) for item in value): + continue + if all(isinstance(item, bool) for item in value): + continue + if all( + isinstance(item, int) and not isinstance(item, bool) for item in value + ): + continue + if all(isinstance(item, float) for item in value): + continue + raise ValueError( + f"Observability metadata value for '{key}' must be a scalar " + "or a sequence of strings, booleans, integers, or floats" + ) + return v + + +ConversationObservabilityMetadata = Annotated[ + dict[str, TraceMetadataValue], + BeforeValidator(_validate_observability_metadata), +] +"""Validated dict of Laminar/OTel trace metadata for a conversation.""" + + +def _validate_observability_tags(v: Any) -> list[str]: + if v is None: + return [] + if not isinstance(v, list): + raise ValueError("Observability tags must be a list") + if not all(isinstance(tag, str) for tag in v): + raise ValueError("Observability tags must contain only strings") + return v + + +ConversationObservabilityTags = Annotated[ + list[str], + BeforeValidator(_validate_observability_tags), +] +"""Validated list of Laminar/OTel span tags for a conversation.""" + class StuckDetectionThresholds(BaseModel): """Configuration for stuck detection thresholds. diff --git a/openhands-sdk/openhands/sdk/observability/laminar.py b/openhands-sdk/openhands/sdk/observability/laminar.py index fd9701eccb..e449b08858 100644 --- a/openhands-sdk/openhands/sdk/observability/laminar.py +++ b/openhands-sdk/openhands/sdk/observability/laminar.py @@ -253,21 +253,27 @@ def __init__( name: str, session_id: str | None = None, user_id: str | None = None, + metadata: dict[str, Any] | None = None, + tags: list[str] | None = None, ) -> None: from lmnr import Laminar # ``start_span`` returns a span without attaching it as the current # OTel context; we'll restore it on every entry point via ``use_span``. self.span = Laminar.start_span(name) - if session_id or user_id: - # ``set_trace_session_id`` / ``set_trace_user_id`` require an - # active span; briefly enter the span context to apply them. + if session_id or user_id or metadata or tags: + # These trace/span helpers require an active span; briefly enter + # the span context to apply conversation-level observability data. with contextlib.suppress(Exception): with Laminar.use_span(self.span): if session_id: Laminar.set_trace_session_id(session_id) if user_id: Laminar.set_trace_user_id(user_id) + if metadata: + Laminar.set_trace_metadata(metadata) + if tags: + Laminar.set_span_tags(tags) self._ended = False def end(self) -> None: @@ -285,6 +291,8 @@ def start_root_span( name: str, session_id: str | None = None, user_id: str | None = None, + metadata: dict[str, Any] | None = None, + tags: list[str] | None = None, ) -> RootSpan | None: """Create a long-lived root span for an owning object. @@ -293,7 +301,13 @@ def start_root_span( if not should_enable_observability(): return None try: - return RootSpan(name, session_id=session_id, user_id=user_id) + return RootSpan( + name, + session_id=session_id, + user_id=user_id, + metadata=metadata, + tags=tags, + ) except Exception: logger.debug("Failed to create observability root span", exc_info=True) return None diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 1a1fcb589b..86b953a653 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -33,6 +33,10 @@ from openhands.sdk.context.agent_context import AgentContext from openhands.sdk.conversation.request import SendMessageRequest +from openhands.sdk.conversation.types import ( + ConversationObservabilityMetadata, + ConversationObservabilityTags, +) from openhands.sdk.hooks import HookConfig from openhands.sdk.llm import LLM from openhands.sdk.logger import get_logger @@ -530,6 +534,16 @@ class ConversationSettings(BaseModel): exclude=True, description="Repository selected for the conversation.", ) + observability_metadata: ConversationObservabilityMetadata | None = Field( + default=None, + exclude=True, + description="Trace-level metadata for observability backends.", + ) + observability_tags: ConversationObservabilityTags | None = Field( + default=None, + exclude=True, + description="Tags for the conversation root observability span.", + ) # --- persisted fields --------------------------------------------------- max_iterations: int = Field( @@ -650,6 +664,10 @@ def _start_request_kwargs(self, **kwargs: Any) -> dict[str, Any]: payload.setdefault("plugins", self.plugins) if self.hook_config is not None: payload.setdefault("hook_config", self.hook_config) + if self.observability_metadata is not None: + payload.setdefault("observability_metadata", self.observability_metadata) + if self.observability_tags is not None: + payload.setdefault("observability_tags", self.observability_tags) # --- persisted defaults --------------------------------------------- payload.setdefault("confirmation_policy", self._build_confirmation_policy()) diff --git a/tests/agent_server/test_validation_error_sanitization.py b/tests/agent_server/test_validation_error_sanitization.py index fa5af7c9e2..38d577e343 100644 --- a/tests/agent_server/test_validation_error_sanitization.py +++ b/tests/agent_server/test_validation_error_sanitization.py @@ -165,6 +165,20 @@ def test_redacts_multiple_secret_patterns(self): assert inp["x_session_id"] == "" assert inp["name"] == "safe_value" + def test_stringifies_value_error_context(self): + """ValueError in ctx should not break JSONResponse rendering.""" + errors = [ + { + "type": "value_error", + "loc": ["body", "observability_metadata"], + "msg": "Value error, bad metadata", + "input": {"nested": {"bad": True}}, + "ctx": {"error": ValueError("bad metadata")}, + } + ] + result = _sanitize_validation_errors(errors) + assert result[0]["ctx"]["error"] == "bad metadata" + def test_empty_errors_list(self): """An empty error list should return an empty list.""" assert _sanitize_validation_errors([]) == [] diff --git a/tests/sdk/conversation/remote/test_remote_conversation.py b/tests/sdk/conversation/remote/test_remote_conversation.py index 4a4de21c81..761d573dd6 100644 --- a/tests/sdk/conversation/remote/test_remote_conversation.py +++ b/tests/sdk/conversation/remote/test_remote_conversation.py @@ -173,6 +173,32 @@ def test_remote_conversation_initialization_new_conversation(self, mock_ws_clien "to fetch initial events" ) + @patch( + "openhands.sdk.conversation.impl.remote_conversation.WebSocketCallbackClient" + ) + def test_remote_conversation_sends_observability_fields(self, mock_ws_client): + conversation_id = str(uuid.uuid4()) + mock_client_instance = self.setup_mock_client(conversation_id=conversation_id) + mock_ws_client.return_value = Mock() + + RemoteConversation( + agent=self.agent, + workspace=self.workspace, + observability_metadata={"repo": "OpenHands/software-agent-sdk"}, + observability_tags=["sdk", "remote"], + ) + + create_call = next( + call + for call in mock_client_instance.request.call_args_list + if call[0][0] == "POST" and call[0][1] == "/api/conversations" + ) + payload = create_call.kwargs["json"] + assert payload["observability_metadata"] == { + "repo": "OpenHands/software-agent-sdk" + } + assert payload["observability_tags"] == ["sdk", "remote"] + @patch( "openhands.sdk.conversation.impl.remote_conversation.WebSocketCallbackClient" ) diff --git a/tests/sdk/conversation/test_base_span_management.py b/tests/sdk/conversation/test_base_span_management.py index 933056dbd0..7cf66fd08f 100644 --- a/tests/sdk/conversation/test_base_span_management.py +++ b/tests/sdk/conversation/test_base_span_management.py @@ -7,6 +7,7 @@ from openhands.sdk.conversation.base import BaseConversation from openhands.sdk.conversation.conversation_stats import ConversationStats +from openhands.sdk.conversation.types import TraceMetadataValue from openhands.sdk.llm.llm import LLM from openhands.sdk.tool.schema import Action, Observation @@ -94,7 +95,11 @@ def test_base_conversation_span_management(): # Start span conversation._start_observability_span("test-session-id") mock_start_span.assert_called_once_with( - "conversation", session_id="test-session-id", user_id=None + "conversation", + session_id="test-session-id", + user_id=None, + metadata=None, + tags=None, ) assert conversation._span_ended is False assert conversation._observability_root_span is fake_root @@ -116,6 +121,34 @@ def test_base_conversation_span_management(): assert conversation._span_ended is True +def test_base_conversation_passes_observability_metadata(): + conversation = MockConversation() + + with ( + patch( + "openhands.sdk.conversation.base.should_enable_observability", + return_value=True, + ), + patch("openhands.sdk.conversation.base.start_root_span") as mock_start_span, + ): + metadata: dict[str, TraceMetadataValue] = { + "repo_name": "OpenHands/software-agent-sdk" + } + tags = ["repo:OpenHands/software-agent-sdk"] + + conversation._start_observability_span( + "test-session-id", metadata=metadata, tags=tags + ) + + mock_start_span.assert_called_once_with( + "conversation", + session_id="test-session-id", + user_id=None, + metadata=metadata, + tags=tags, + ) + + def test_base_conversation_span_management_disabled(): """Test that BaseConversation doesn't perform span operations when observability is disabled.""" # noqa: E501 diff --git a/tests/sdk/conversation/test_tags.py b/tests/sdk/conversation/test_tags.py index a38b379e3f..eba6ba8964 100644 --- a/tests/sdk/conversation/test_tags.py +++ b/tests/sdk/conversation/test_tags.py @@ -5,7 +5,10 @@ from openhands.sdk.conversation.types import ( TAG_VALUE_MAX_LENGTH, + ConversationObservabilityMetadata, + ConversationObservabilityTags, ConversationTags, + _validate_observability_metadata, _validate_tags, ) @@ -84,3 +87,60 @@ class TestModel(BaseModel): # Invalid key rejected with pytest.raises(ValidationError): TestModel(tags={"BAD": "value"}) + + +def test_validate_observability_metadata_valid(): + metadata = { + "repo_name": "OpenHands/software-agent-sdk", + "private": True, + "retry_count": 3, + "cost": 1.5, + "labels": ["repo", "cloud"], + "flags": [True, False], + "counts": [1, 2], + "scores": [0.1, 0.2], + } + + assert _validate_observability_metadata(metadata) == metadata + + +def test_validate_observability_metadata_rejects_non_dict(): + with pytest.raises(ValueError, match="must be a dictionary"): + _validate_observability_metadata([]) + + +def test_validate_observability_metadata_rejects_nested_values(): + with pytest.raises(ValueError, match="must be a scalar"): + _validate_observability_metadata({"repo": {"name": "openhands"}}) # type: ignore[dict-item] + + +def test_observability_metadata_in_pydantic_model(): + from pydantic import BaseModel + + class TestModel(BaseModel): + observability_metadata: ConversationObservabilityMetadata = {} + + m = TestModel(observability_metadata={"repo_name": "OpenHands/OpenHands"}) + assert m.observability_metadata == {"repo_name": "OpenHands/OpenHands"} + + m = TestModel.model_validate({"observability_metadata": None}) + assert m.observability_metadata == {} + + with pytest.raises(ValidationError): + TestModel.model_validate({"observability_metadata": {"nested": {"bad": True}}}) + + +def test_observability_tags_in_pydantic_model(): + from pydantic import BaseModel + + class TestModel(BaseModel): + observability_tags: ConversationObservabilityTags = [] + + m = TestModel(observability_tags=["repo", "cloud"]) + assert m.observability_tags == ["repo", "cloud"] + + m = TestModel.model_validate({"observability_tags": None}) + assert m.observability_tags == [] + + with pytest.raises(ValidationError): + TestModel.model_validate({"observability_tags": [1, 2]}) diff --git a/tests/sdk/observability/test_laminar.py b/tests/sdk/observability/test_laminar.py index 66f4130e43..68504f8307 100644 --- a/tests/sdk/observability/test_laminar.py +++ b/tests/sdk/observability/test_laminar.py @@ -495,6 +495,34 @@ def contextlib_compat(): return contextlib.contextmanager +def test_root_span_sets_trace_metadata_and_tags(): + from openhands.sdk.observability.laminar import RootSpan + + fake_span = MagicMock() + fake_context = MagicMock() + + with patch("lmnr.Laminar") as mock_laminar: + mock_laminar.start_span.return_value = fake_span + mock_laminar.use_span.return_value.__enter__.return_value = fake_context + + RootSpan( + "conversation", + session_id="session-1", + metadata={"repo_name": "OpenHands/software-agent-sdk"}, + tags=["repo:OpenHands/software-agent-sdk"], + ) + + mock_laminar.start_span.assert_called_once_with("conversation") + mock_laminar.use_span.assert_called_once_with(fake_span) + mock_laminar.set_trace_session_id.assert_called_once_with("session-1") + mock_laminar.set_trace_metadata.assert_called_once_with( + {"repo_name": "OpenHands/software-agent-sdk"} + ) + mock_laminar.set_span_tags.assert_called_once_with( + ["repo:OpenHands/software-agent-sdk"] + ) + + def test_deprecated_shims_emit_warnings(): """The legacy global-stack API must emit DeprecationWarning so external callers (none found in the org-wide audit, but still) are alerted before diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index ff27ca5a1b..68aab573dd 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -3,7 +3,7 @@ import pytest from fastmcp.mcp_config import MCPConfig -from pydantic import SecretStr +from pydantic import SecretStr, ValidationError from openhands.agent_server.models import StartConversationRequest from openhands.sdk import ( @@ -175,6 +175,20 @@ def test_conversation_settings_export_schema_groups_sections() -> None: assert verification_fields["security_analyzer"].depends_on == ["confirmation_mode"] +def test_conversation_settings_validates_observability_metadata() -> None: + settings = ConversationSettings(observability_metadata={"repo": "OpenHands/sdk"}) + assert settings.observability_metadata == {"repo": "OpenHands/sdk"} + + with pytest.raises(ValidationError): + ConversationSettings(observability_metadata={"": "missing-key"}) + + with pytest.raises(ValidationError): + ConversationSettings(observability_metadata=[]) # type: ignore[arg-type] + + with pytest.raises(ValidationError): + ConversationSettings(observability_tags=[1, 2]) # type: ignore[list-item] + + def test_conversation_settings_model_dump_roundtrip() -> None: settings = ConversationSettings( max_iterations=42,