diff --git a/openhands-agent-server/openhands/agent_server/conversation_router.py b/openhands-agent-server/openhands/agent_server/conversation_router.py index 73cea2985e..fbe0353647 100644 --- a/openhands-agent-server/openhands/agent_server/conversation_router.py +++ b/openhands-agent-server/openhands/agent_server/conversation_router.py @@ -394,6 +394,52 @@ async def switch_conversation_llm( return Success() +@conversation_router.post( + "/{conversation_id}/switch_acp_model", + responses={ + 400: {"description": "Agent is not ACP, or provider can't switch models"}, + 404: {"description": "Conversation not found"}, + 409: {"description": "ACP session not initialized yet"}, + 504: {"description": "ACP server did not answer the model switch in time"}, + }, +) +async def switch_conversation_acp_model( + conversation_id: UUID, + model: str = Body(..., embed=True), + conversation_service: ConversationService = Depends(get_conversation_service), +) -> Success: + """Switch the model of a running ACP conversation, mid-conversation. + + Issues a protocol-level ``session/set_model`` call to the ACP subprocess + so the new model applies to subsequent turns without losing context. Only + valid for ACP conversations whose provider supports runtime switching. + """ + event_service = await conversation_service.get_event_service(conversation_id) + if event_service is None: + raise HTTPException(status.HTTP_404_NOT_FOUND) + try: + await event_service.switch_acp_model(model) + except ValueError as e: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), + ) + except TimeoutError as e: + # The bounded session/set_model round-trip expired. The ACP server is + # wedged/slow rather than rejecting the request, so surface a 504 + # instead of an opaque 500. + raise HTTPException( + status_code=status.HTTP_504_GATEWAY_TIMEOUT, + detail=str(e), + ) + except RuntimeError as e: + raise HTTPException( + status_code=status.HTTP_409_CONFLICT, + detail=str(e), + ) + return Success() + + @conversation_router.patch( "/{conversation_id}", responses={404: {"description": "Item not found"}} ) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 0a5a501008..4ec523f031 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -911,6 +911,29 @@ async def set_security_analyzer( None, self._conversation.set_security_analyzer, security_analyzer ) + async def switch_acp_model(self, model: str) -> None: + """Switch the model on a running ACP conversation, mid-conversation. + + Runs the (blocking) protocol-level ``session/set_model`` round-trip in a + worker thread, then mirrors the new model into ``meta.json`` so the + switch survives an agent-server restart: ``start()`` rebuilds the agent + from ``self.stored.agent`` and ``ConversationState.create()`` copies + that over the persisted base_state.json on resume. Only ``acp_model`` + needs updating — ``model_post_init`` re-derives the sentinel + ``llm.model`` on reload. + """ + if self._conversation is None: + raise RuntimeError( + "Conversation is not active; it has not been started or has " + "been closed." + ) + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, self._conversation.switch_acp_model, model) + self.stored = self.stored.model_copy( + update={"agent": self.stored.agent.model_copy(update={"acp_model": model})} + ) + await self.save_meta() + async def close(self): if self._lease_task is not None: self._lease_task.cancel() diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 5f9883954c..9511074c9d 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -208,12 +208,20 @@ async def _maybe_set_session_model( session_id: str, acp_model: str | None, ) -> None: - """Apply a protocol-level session model override when the server supports it. - - Uses :func:`~openhands.sdk.settings.acp_providers.detect_acp_provider_by_agent_name` - to check whether the server supports ``set_session_model``. - claude-agent-acp uses session ``_meta`` via - :func:`~openhands.sdk.settings.acp_providers.build_session_model_meta` instead. + """Apply the *initial* session model right after session creation. + + This is the session-creation path only, gated on + :attr:`~openhands.sdk.settings.acp_providers.ACPProviderInfo.supports_set_session_model`. + Providers that select their initial model via session ``_meta`` + (claude-agent-acp, ``supports_set_session_model=False``) already received + the model in ``new_session()``, so this is a no-op for them. Providers that + use the protocol call for initial selection (codex-acp, gemini-cli) get a + one-shot ``set_session_model`` call here. + + Runtime, mid-conversation switches go through + :meth:`ACPAgent.set_acp_model` instead, which always uses + ``set_session_model`` and is gated on the separate + ``supports_runtime_model_switch`` capability flag. """ if not acp_model: return @@ -222,6 +230,44 @@ async def _maybe_set_session_model( await conn.set_session_model(model_id=acp_model, session_id=session_id) +async def _reapply_session_model_on_resume( + conn: ClientSideConnection, + agent_name: str, + session_id: str, + acp_model: str | None, +) -> None: + """Reapply the persisted model to a *resumed* session. + + ``load_session()`` carries no model ``_meta``, so a session resumed after a + runtime switch (or with any persisted ``acp_model``) would otherwise run on + the ACP server's default. This issues ``set_session_model`` so the resumed + live session matches the serialized ``acp_model``. + + The gating mirrors :meth:`ACPAgent.set_acp_model` (attempt for custom/unknown + servers and known providers that support runtime switching; skip only known + providers that don't), deliberately differing from the initial-selection + gate: claude-agent-acp selects its initial model via ``_meta`` yet supports + ``set_session_model`` for later switches. A server that rejects the call is + tolerated (logged) — like the ``load_session`` fallback above — so resume + can't break; the session keeps the server default until the next switch. + """ + if not acp_model: + return + provider = detect_acp_provider_by_agent_name(agent_name) + if provider is not None and not provider.supports_runtime_model_switch: + return + try: + await conn.set_session_model(model_id=acp_model, session_id=session_id) + except ACPRequestError as e: + logger.warning( + "Could not reapply model %r on resumed session %s (%s); the live " + "session may run on the server default until the next switch", + acp_model, + session_id, + e, + ) + + def _extract_token_usage( response: Any, ) -> tuple[int, int, int, int, int]: @@ -1076,7 +1122,7 @@ def _start_acp_server(self, state: ConversationState) -> None: ) prior_session_id = None - async def _init() -> tuple[Any, Any, Any, str, str, str]: + async def _init() -> tuple[str, str, str]: # Spawn the subprocess directly so we can install a # filtering reader that skips non-JSON-RPC lines some # ACP servers (e.g. claude-code-acp v0.1.x) write to @@ -1106,6 +1152,17 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: filtered_reader, # read filtered output ) + # Track the subprocess/connection on self as soon as they exist, so + # that if a *later* init step fails (e.g. the resume model reapply + # times out or the server errors), init_state()'s _cleanup() can + # still tear them down instead of leaking the subprocess/connection. + # The "session initialized" gating keys off _session_id (assigned + # last, on full success), so an early _conn here does not make the + # agent look ready before _init completes. + self._process = process + self._conn = conn + self._filtered_reader = filtered_reader + # Initialize the protocol and discover server identity init_response = await conn.initialize(protocol_version=1) agent_name = "" @@ -1180,18 +1237,34 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: ) if session_id is None: - # Build _meta content for session options (e.g. model selection). - # Extra kwargs to new_session() become the _meta dict in the - # JSON-RPC request — do NOT wrap in _meta= (that double-nests). + # Fresh session. Build _meta content for session options (e.g. + # model selection). Extra kwargs to new_session() become the + # _meta dict in the JSON-RPC request — do NOT wrap in _meta= + # (that double-nests). session_meta = build_session_model_meta(agent_name, self.acp_model) response = await conn.new_session(cwd=working_dir, **session_meta) session_id = response.session_id - await _maybe_set_session_model( - conn, - agent_name, - session_id, - self.acp_model, - ) + # Initial-selection protocol call for providers that use it + # (codex-acp, gemini-cli); no-op for claude, which selected its + # model via the _meta above. + await _maybe_set_session_model( + conn, + agent_name, + session_id, + self.acp_model, + ) + else: + # Resumed session. load_session() does not carry model _meta, so + # reapply the persisted (possibly runtime-switched) acp_model via + # the runtime-switch capability — otherwise the resumed live + # session would run on the server default while serialized state + # claims the switched model. + await _reapply_session_model_on_resume( + conn, + agent_name, + session_id, + self.acp_model, + ) # Resolve the permission mode. Known providers each have their # own mode ID (bypassPermissions, full-access, yolo …). @@ -1205,17 +1278,14 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: logger.info("Setting ACP session mode: %s", mode_id) await conn.set_session_mode(mode_id=mode_id, session_id=session_id) - return conn, process, filtered_reader, session_id, agent_name, agent_version + return session_id, agent_name, agent_version - result = self._executor.run_async(_init) - ( - self._conn, - self._process, - self._filtered_reader, - self._session_id, - self._agent_name, - self._agent_version, - ) = result + # _conn / _process / _filtered_reader are assigned inside _init() (right + # after creation) so a mid-init failure can be cleaned up; only the + # success-only fields are returned here. + self._session_id, self._agent_name, self._agent_version = ( + self._executor.run_async(_init) + ) self._working_dir = working_dir def _reset_client_for_turn( @@ -1828,6 +1898,105 @@ async def _fork_and_prompt() -> str: with client._fork_lock: return self._executor.run_async(_fork_and_prompt) + def set_acp_model(self, model: str) -> None: + """Switch the model on the running ACP session (mid-conversation). + + Issues a protocol-level ``session/set_model`` call on the live + connection so the new model takes effect for subsequent turns in the + *same* session — no subprocess restart, no loss of conversation + context. Verified against claude-agent-acp and codex-acp. + + This is the low-level agent primitive; prefer + :meth:`LocalConversation.switch_acp_model` as the entry point. That + wrapper (a) holds the state lock so the switch cannot race a running + ``step()``, and (b) persists the new value by swapping in an agent + ``model_copy`` — ``acp_model`` is frozen, so this method updates only + the live session and the sentinel ``llm.model``/metrics, **not** + ``self.acp_model``. A direct caller therefore leaves ``acp_model`` + (which ``_record_usage`` reads for cost attribution) stale and the + switch unpersisted; go through ``switch_acp_model`` instead. + + Args: + model: Provider-specific model id to switch to (e.g. + ``"claude-haiku-4-5-20251001"`` or ``"gpt-5.4/low"``). + + Raises: + ValueError: If ``model`` is empty or whitespace-only, if the + detected provider does not support runtime model switching, or + if the ACP server rejects the ``session/set_model`` call (e.g. + method-not-found on a custom server, or an invalid model id). + RuntimeError: If the ACP session has not been initialized yet + (i.e. before the first ``run()``). + TimeoutError: If the server does not answer within + ``acp_prompt_timeout`` seconds. + + Note: + A timeout means the client stopped waiting, not that the switch was + rejected: the ``session/set_model`` request may already have been + written and could still be applied server-side. The connection and + session stay alive and the local sentinel model is intentionally + left unchanged, so a timed-out switch leaves the server-side model + indeterminate. The conservative choice (treat it as failed locally) + keeps cost/token accounting on the previously-known model and + self-heals on the next successful switch; the agent itself always + runs whatever model the live ACP session holds. + """ + if not model or not model.strip(): + raise ValueError("model must be a non-empty string") + if self._conn is None or self._session_id is None or self._executor is None: + raise RuntimeError( + "ACP session is not initialized; the model can only be switched " + "after the conversation has started (first run())." + ) + provider = detect_acp_provider_by_agent_name(self._agent_name) + if provider is not None and not provider.supports_runtime_model_switch: + raise ValueError( + f"ACP provider '{provider.key}' does not support runtime model " + "switching via set_session_model." + ) + # Bounded round-trip: this runs while LocalConversation.switch_acp_model + # holds the state lock, so a server that accepts the call but never + # answers must not wedge the lock indefinitely. On timeout / protocol + # error we propagate *before* mutating any local state, so the sentinel + # LLM is only updated once the live session has actually switched. + try: + self._executor.run_async( + self._conn.set_session_model( + model_id=model, session_id=self._session_id + ), + timeout=self.acp_prompt_timeout, + ) + except ACPRequestError as e: + # Server-internal failures (JSON-RPC -32603) are not the caller's + # fault, and the prompt path already treats them as retriable. Let + # them propagate (-> 5xx) instead of mislabeling them as a 400 + # client error. + if e.code in _RETRIABLE_SERVER_ERROR_CODES: + raise + # acp.exceptions.RequestError derives from Exception (not + # RuntimeError); surface a true client/protocol rejection (e.g. + # method-not-found, invalid model id) as a ValueError so callers — + # and the agent-server route — treat it as a 400-class client error + # rather than an opaque 500. + raise ValueError( + f"ACP server rejected set_session_model(model={model!r}): {e}" + ) from e + # Reflect the live model on the sentinel LLM + metrics so cost/token + # accounting and serialized state show the model actually in use + # (mirrors model_post_init). The ``acp_model`` field is frozen, so the + # authoritative current model is persisted by + # :meth:`LocalConversation.switch_acp_model` via an agent ``model_copy``. + self.llm.model = model + self.llm.metrics.model_name = model + if self.llm.metrics.accumulated_token_usage is not None: + self.llm.metrics.accumulated_token_usage.model = model + logger.info( + "Switched ACP session model to %s (provider=%s, session=%s)", + model, + provider.key if provider else "unknown", + self._session_id, + ) + def close(self) -> None: """Terminate the ACP subprocess and clean up resources.""" if self._closed: @@ -1864,6 +2033,25 @@ def _cleanup(self) -> None: logger.debug("Error closing executor: %s", e) self._executor = None + def release_runtime(self) -> None: + """Disarm this agent's finalizer after handing its live ACP runtime to a + shallow :meth:`~pydantic.BaseModel.model_copy`. + + The copy shares this agent's ``_conn`` / ``_executor`` / ``_process`` + references (``model_copy`` is shallow). Marking this now-stale instance + closed makes its ``__del__`` -> :meth:`close` a no-op, so dropping it + cannot tear down the runtime the copy now owns. + + The runtime references are intentionally left intact: an in-flight + :meth:`ask_agent` fork — which is thread-safe and may still hold this + pre-switch agent — keeps a valid connection until it finishes. Sole + ownership for teardown passes to the copy (the live ``self.agent`` + going forward), which is closed on conversation shutdown. + + See :meth:`LocalConversation.switch_acp_model`. + """ + self._closed = True + def __del__(self) -> None: try: self.close() diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index ab38d0d3d9..23296b1972 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -704,6 +704,59 @@ def switch_profile(self, profile_name: str) -> None: cached = loaded.model_copy(update={"usage_id": usage_id}) self.switch_llm(cached) + def switch_acp_model(self, model: str) -> None: + """Switch the model on a running ACP conversation (mid-conversation). + + Unlike :meth:`switch_llm`, which swaps OpenHands' own LLM object, this + issues a protocol-level ``session/set_model`` call to the ACP + subprocess so the new model applies to subsequent turns of the *same* + session, preserving conversation context. ``switch_llm`` would not + affect an ACP conversation, since the subprocess owns its own model. + + Args: + model: Provider-specific model id to switch to. + + Raises: + ValueError: If the conversation's agent is not an :class:`ACPAgent`, + or the provider does not support runtime model switching, or + the ACP server rejects the switch. + RuntimeError: If the ACP session is not yet initialized. + TimeoutError: If the ACP server does not respond within + ``acp_prompt_timeout`` seconds. + """ + if not isinstance(self.agent, ACPAgent): + raise ValueError( + "switch_acp_model is only supported for ACP conversations." + ) + with self._state: + # Perform the live protocol switch first; if it fails we leave the + # persisted state untouched. + self.agent.set_acp_model(model) + # Persist the switched model as the authoritative value. ``acp_model`` + # is frozen, so we replace the agent with a copy carrying the new + # value. This matters on two counts the in-place mutation missed: + # 1. A fresh object identity makes the autosave path actually + # write base_state.json (re-assigning the same object is a + # no-op because old == new). + # 2. model_post_init / _start_acp_server derive the sentinel model + # and the resumed session model from ``acp_model`` on reload, so + # it must hold the switched value, not the construction-time one. + # + # model_copy is shallow, so the copy shares the live ACP runtime + # (_conn/_executor/_process) with the old agent. Disarm the old + # agent's finalizer before dropping it: otherwise ACPAgent.__del__ + # -> close() on the discarded agent would tear down the session the + # copy now owns, leaving the next turn pointing at a dead connection. + old_agent = self.agent + new_agent = old_agent.model_copy(update={"acp_model": model}) + old_agent.release_runtime() + # ``self.agent`` is the live reference used by subsequent ``step()`` + # calls; ``self._state.agent`` is what the autosave path serializes + # to base_state.json. Update both so the running conversation and the + # persisted state agree on the switched model. + self.agent = new_agent + self._state.agent = new_agent + @observe(name="conversation.send_message") def send_message(self, message: str | Message, sender: str | None = None) -> None: """Send a message to the agent. diff --git a/openhands-sdk/openhands/sdk/settings/acp_providers.py b/openhands-sdk/openhands/sdk/settings/acp_providers.py index f29684d93d..c34e7d5b6a 100644 --- a/openhands-sdk/openhands/sdk/settings/acp_providers.py +++ b/openhands-sdk/openhands/sdk/settings/acp_providers.py @@ -11,8 +11,12 @@ - ``default_session_mode`` ACP mode ID that disables permission prompts - ``agent_name_patterns`` lowercase substrings in the runtime agent name; used by ``ACPAgent`` to auto-detect mode / protocol -- ``supports_set_session_model`` whether to use the ``set_session_model`` - protocol call (vs ``_meta``) for model selection +- ``supports_set_session_model`` whether the provider selects its *initial* + model via the ``set_session_model`` protocol + call (vs session ``_meta``) at session creation +- ``supports_runtime_model_switch`` whether the server supports the + ``session/set_model`` protocol call for + runtime, mid-conversation model switching - ``session_meta_key`` top-level ``_meta`` key for model selection (or ``None``) - ``available_models`` curated list of selectable models for the provider's model picker (``acp_model`` candidates) @@ -89,19 +93,34 @@ class ACPProviderInfo: """ supports_set_session_model: bool - """``True`` if this provider uses the ``set_session_model`` protocol call. + """``True`` if this provider selects its *initial* model via the + ``set_session_model`` protocol call (rather than session ``_meta``). - - ``False`` for claude-agent-acp, which uses session ``_meta`` instead. - - ``True`` for codex-acp and gemini-cli. + This governs the **session-creation** path only: + + - ``False`` for claude-agent-acp, which selects its initial model via + session ``_meta`` (see :attr:`session_meta_key`). + - ``True`` for codex-acp and gemini-cli, which get a one-shot + ``set_session_model`` call right after the session is created. + + This is **independent of** runtime switching capability — see + :attr:`supports_runtime_model_switch`. The original meaning of this flag + is preserved so external consumers that use it to pick the initial + selection path keep working. """ session_meta_key: str | None - """Top-level ``_meta`` key for model selection, or ``None``. + """Top-level ``_meta`` key for model selection *at session creation*. - When non-``None``, the provider selects its model via ACP session ``_meta`` - using the structure ``{session_meta_key: {"options": {"model": }}}``. - ``None`` means the provider uses the ``set_session_model`` protocol call - instead (see :attr:`supports_set_session_model`). + When non-``None``, the provider selects its **initial** model via ACP + session ``_meta`` using the structure + ``{session_meta_key: {"options": {"model": }}}`` passed to + ``new_session()``. When ``None``, the initial model is applied with a + one-shot ``set_session_model`` call right after the session is created + (gated on :attr:`supports_set_session_model`). + + This only governs the *initial* selection; runtime switches always use + ``set_session_model`` (gated on :attr:`supports_runtime_model_switch`). - ``"claudeCode"`` — claude-agent-acp - ``None`` — codex-acp, gemini-cli @@ -124,6 +143,25 @@ class ACPProviderInfo: the ACP server pick its own default. """ + supports_runtime_model_switch: bool = False + """``True`` if the server supports the ``session/set_model`` protocol call + for **runtime, mid-conversation model switching**. + + The call applies to the live session, so subsequent turns use the new + model without restarting the subprocess or losing context. All three + built-in providers support it (verified against claude-agent-acp, + codex-acp, and gemini-cli). + + Unlike :attr:`supports_set_session_model`, this is about switching the + model of an *already-running* session, not the initial selection. A + provider may select its initial model via ``_meta`` (claude-agent-acp) + yet still support ``set_session_model`` for later switches. + + Defaults to ``False`` so forward-compat providers — and any external + caller constructing this dataclass positionally — keep working without a + signature break; the built-in providers set it explicitly. + """ + # --------------------------------------------------------------------------- # Curated ``acp_model`` candidate lists for the built-in providers. @@ -204,7 +242,12 @@ class ACPProviderInfo: base_url_env_var="ANTHROPIC_BASE_URL", default_session_mode="bypassPermissions", agent_name_patterns=("claude-agent",), + # claude-agent-acp selects its *initial* model via session _meta + # (session_meta_key below), so the init path does NOT use + # set_session_model. It DOES, however, support session/set_model + # for mid-conversation switches. supports_set_session_model=False, + supports_runtime_model_switch=True, session_meta_key="claudeCode", available_models=_CLAUDE_MODELS, default_model="claude-opus-4-7", @@ -218,6 +261,7 @@ class ACPProviderInfo: default_session_mode="full-access", agent_name_patterns=("codex-acp",), supports_set_session_model=True, + supports_runtime_model_switch=True, session_meta_key=None, available_models=_CODEX_MODELS, default_model="gpt-5.5/medium", @@ -231,6 +275,7 @@ class ACPProviderInfo: default_session_mode="yolo", agent_name_patterns=("gemini-cli",), supports_set_session_model=True, + supports_runtime_model_switch=True, session_meta_key=None, available_models=_GEMINI_MODELS, # Match the Gemini CLI's own no-model-configured default diff --git a/tests/agent_server/test_conversation_router.py b/tests/agent_server/test_conversation_router.py index b9cdc12720..38cb74c269 100644 --- a/tests/agent_server/test_conversation_router.py +++ b/tests/agent_server/test_conversation_router.py @@ -1092,6 +1092,146 @@ def test_run_conversation_not_found( client.app.dependency_overrides.clear() +def test_switch_acp_model_success( + client, mock_conversation_service, mock_event_service, sample_conversation_id +): + """switch_acp_model endpoint forwards the model to the event service.""" + mock_conversation_service.get_event_service.return_value = mock_event_service + mock_event_service.switch_acp_model.return_value = None + + client.app.dependency_overrides[get_conversation_service] = ( + lambda: mock_conversation_service + ) + try: + response = client.post( + f"/api/conversations/{sample_conversation_id}/switch_acp_model", + json={"model": "haiku"}, + ) + assert response.status_code == 200 + assert response.json()["success"] is True + mock_event_service.switch_acp_model.assert_awaited_once_with("haiku") + finally: + client.app.dependency_overrides.clear() + + +def test_switch_acp_model_not_found( + client, mock_conversation_service, sample_conversation_id +): + """switch_acp_model returns 404 when the conversation is unknown.""" + mock_conversation_service.get_event_service.return_value = None + + client.app.dependency_overrides[get_conversation_service] = ( + lambda: mock_conversation_service + ) + try: + response = client.post( + f"/api/conversations/{sample_conversation_id}/switch_acp_model", + json={"model": "haiku"}, + ) + assert response.status_code == 404 + finally: + client.app.dependency_overrides.clear() + + +def test_switch_acp_model_non_acp_returns_400( + client, mock_conversation_service, mock_event_service, sample_conversation_id +): + """A ValueError (e.g. non-ACP agent / unsupported provider) maps to 400.""" + mock_conversation_service.get_event_service.return_value = mock_event_service + mock_event_service.switch_acp_model.side_effect = ValueError( + "switch_acp_model is only supported for ACP conversations." + ) + + client.app.dependency_overrides[get_conversation_service] = ( + lambda: mock_conversation_service + ) + try: + response = client.post( + f"/api/conversations/{sample_conversation_id}/switch_acp_model", + json={"model": "haiku"}, + ) + assert response.status_code == 400 + finally: + client.app.dependency_overrides.clear() + + +def test_switch_acp_model_uninitialized_returns_409( + client, mock_conversation_service, mock_event_service, sample_conversation_id +): + """A RuntimeError (session not initialized yet) maps to 409.""" + mock_conversation_service.get_event_service.return_value = mock_event_service + mock_event_service.switch_acp_model.side_effect = RuntimeError( + "ACP session is not initialized" + ) + + client.app.dependency_overrides[get_conversation_service] = ( + lambda: mock_conversation_service + ) + try: + response = client.post( + f"/api/conversations/{sample_conversation_id}/switch_acp_model", + json={"model": "haiku"}, + ) + assert response.status_code == 409 + finally: + client.app.dependency_overrides.clear() + + +def test_switch_acp_model_protocol_error_returns_400( + client, mock_conversation_service, mock_event_service, sample_conversation_id +): + """A rejected ACP ``session/set_model`` call maps to 400, not 500. + + ``ACPAgent.set_acp_model`` translates ``acp.exceptions.RequestError`` (e.g. + method-not-found on a custom server, or an invalid model id) into a + ValueError, so a protocol-level rejection surfaces as a 400 client error + rather than an opaque 500. + """ + mock_conversation_service.get_event_service.return_value = mock_event_service + mock_event_service.switch_acp_model.side_effect = ValueError( + "ACP server rejected set_session_model(model='bogus'): method not found" + ) + + client.app.dependency_overrides[get_conversation_service] = ( + lambda: mock_conversation_service + ) + try: + response = client.post( + f"/api/conversations/{sample_conversation_id}/switch_acp_model", + json={"model": "bogus"}, + ) + assert response.status_code == 400 + finally: + client.app.dependency_overrides.clear() + + +def test_switch_acp_model_timeout_returns_504( + client, mock_conversation_service, mock_event_service, sample_conversation_id +): + """A TimeoutError (wedged/slow ACP server) maps to 504, not 500. + + ``ACPAgent.set_acp_model`` bounds the ``session/set_model`` round-trip with + ``acp_prompt_timeout``; an expired call raises ``TimeoutError``, which the + route surfaces as a Gateway Timeout rather than an opaque 500. + """ + mock_conversation_service.get_event_service.return_value = mock_event_service + mock_event_service.switch_acp_model.side_effect = TimeoutError( + "ACP server did not answer set_session_model within 600s" + ) + + client.app.dependency_overrides[get_conversation_service] = ( + lambda: mock_conversation_service + ) + try: + response = client.post( + f"/api/conversations/{sample_conversation_id}/switch_acp_model", + json={"model": "haiku"}, + ) + assert response.status_code == 504 + finally: + client.app.dependency_overrides.clear() + + def test_run_conversation_already_running( client, mock_conversation_service, mock_event_service, sample_conversation_id ): diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index 2058052c10..7dc415bd8b 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -1308,6 +1308,48 @@ async def test_save_meta_preserves_updated_at(self, event_service, tmp_path): loaded = StoredConversation.model_validate_json(meta_file.read_text()) assert loaded.updated_at == original_updated_at + @pytest.mark.asyncio + async def test_switch_acp_model_persists_to_meta(self, tmp_path): + """switch_acp_model mirrors the new model into meta.json. + + start() rebuilds the runtime agent from meta.json (self.stored.agent), + and ConversationState.create() copies that agent over the persisted + base_state.json on resume. So the switched model must also be written + to meta.json, otherwise a restart silently reverts to the old model. + """ + from openhands.sdk.agent import ACPAgent + + stored = StoredConversation( + id=uuid4(), + agent=ACPAgent(acp_command=["echo", "test"], acp_model="old-model"), + workspace=LocalWorkspace(working_dir=str(tmp_path)), + confirmation_policy=NeverConfirm(), + initial_message=None, + metrics=None, + ) + service = EventService(stored=stored, conversations_dir=tmp_path) + conv_dir = tmp_path / stored.id.hex + conv_dir.mkdir(parents=True, exist_ok=True) + + # Stand in for a live conversation; the protocol-level switch is + # covered elsewhere — here we only assert the meta.json mirroring. + service._conversation = MagicMock() + + await service.switch_acp_model("new-model") + + # Live switch was delegated to the conversation... + service._conversation.switch_acp_model.assert_called_once_with("new-model") + # ...the in-memory stored agent was updated... + assert isinstance(service.stored.agent, ACPAgent) + assert service.stored.agent.acp_model == "new-model" + # ...and the new model was persisted to meta.json so it survives a + # restart. + loaded = StoredConversation.model_validate_json( + (conv_dir / "meta.json").read_text() + ) + assert isinstance(loaded.agent, ACPAgent) + assert loaded.agent.acp_model == "new-model" + class TestEventServiceStartWithRunningStatus: """Test cases for EventService.start handling of RUNNING execution status.""" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 7f0bd2f689..6d2557b83e 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -19,6 +19,7 @@ _image_url_to_acp_block, _maybe_set_session_model, _OpenHandsACPBridge, + _reapply_session_model_on_resume, _select_auth_method, _serialize_tool_content, ) @@ -3188,7 +3189,10 @@ async def test_codex_agent_uses_protocol_model_override(self): ) @pytest.mark.asyncio - async def test_non_codex_agent_skips_protocol_override(self): + async def test_meta_key_provider_skips_protocol_override_at_init(self): + # claude-agent-acp selects its *initial* model via session _meta, so the + # one-shot init set_session_model call is skipped (even though the + # provider now supports the protocol call for runtime switches). conn = AsyncMock() await _maybe_set_session_model( conn, @@ -3205,6 +3209,216 @@ async def test_missing_model_skips_protocol_override(self): conn.set_session_model.assert_not_called() +class TestReapplySessionModelOnResume: + """Resume reapplies the persisted model via the runtime-switch gate.""" + + @pytest.mark.asyncio + async def test_claude_reapplies_persisted_model_on_resume(self): + # claude selects its initial model via _meta (supports_set_session_model + # =False) but DOES support set_session_model for runtime switches. + # load_session() carries no _meta, so on resume the persisted model must + # be reapplied via the runtime-switch gate — _maybe_set_session_model + # would skip it. + conn = AsyncMock() + await _reapply_session_model_on_resume( + conn, "claude-agent-acp", "sess-1", "claude-haiku-4-5-20251001" + ) + conn.set_session_model.assert_awaited_once_with( + model_id="claude-haiku-4-5-20251001", session_id="sess-1" + ) + + @pytest.mark.asyncio + async def test_codex_reapplies_persisted_model_on_resume(self): + conn = AsyncMock() + await _reapply_session_model_on_resume( + conn, "codex-acp", "sess-1", "gpt-5.4/low" + ) + conn.set_session_model.assert_awaited_once_with( + model_id="gpt-5.4/low", session_id="sess-1" + ) + + @pytest.mark.asyncio + async def test_missing_model_skips_reapply(self): + conn = AsyncMock() + await _reapply_session_model_on_resume(conn, "claude-agent-acp", "sess-1", None) + conn.set_session_model.assert_not_called() + + @pytest.mark.asyncio + async def test_unknown_provider_attempts_reapply(self): + # provider=None (custom server) is allowed to attempt the switch by + # set_acp_model, and such switches are persisted as authoritative — so + # resume must mirror that and attempt the reapply too (otherwise the + # resumed session would silently revert to the server default). + conn = AsyncMock() + await _reapply_session_model_on_resume( + conn, "some-custom-acp", "sess-1", "whatever" + ) + conn.set_session_model.assert_awaited_once_with( + model_id="whatever", session_id="sess-1" + ) + + @pytest.mark.asyncio + async def test_known_unsupported_provider_skips_reapply(self): + from openhands.sdk.settings.acp_providers import ACPProviderInfo + + unsupported = ACPProviderInfo( + key="legacy", + display_name="Legacy", + default_command=("legacy",), + api_key_env_var=None, + base_url_env_var=None, + default_session_mode="default", + agent_name_patterns=("legacy",), + supports_set_session_model=False, + supports_runtime_model_switch=False, + session_meta_key=None, + ) + conn = AsyncMock() + with patch( + "openhands.sdk.agent.acp_agent.detect_acp_provider_by_agent_name", + return_value=unsupported, + ): + await _reapply_session_model_on_resume(conn, "legacy-acp", "sess-1", "x") + conn.set_session_model.assert_not_called() + + @pytest.mark.asyncio + async def test_client_rejection_is_swallowed_on_resume(self): + # A client/protocol rejection (method-not-found = server doesn't support + # the call, or invalid model id) must not break resume — mirrors the + # load_session fallback. The error is logged, not raised. + conn = AsyncMock() + conn.set_session_model.side_effect = ACPRequestError( + code=-32601, message="method not found" + ) + await _reapply_session_model_on_resume( + conn, "some-custom-acp", "sess-1", "whatever" + ) + conn.set_session_model.assert_awaited_once() + + @pytest.mark.asyncio + async def test_any_request_error_is_swallowed_on_resume(self): + # Any ACPRequestError (here a -32603 server error) is tolerated on + # resume — like the load_session fallback — so a flaky/stale server + # can't break session startup; the session keeps the server default. + conn = AsyncMock() + conn.set_session_model.side_effect = ACPRequestError( + code=-32603, message="internal error" + ) + await _reapply_session_model_on_resume( + conn, "codex-acp", "sess-1", "gpt-5.4/low" + ) + conn.set_session_model.assert_awaited_once() + + +class TestSetACPModel: + """Runtime (mid-conversation) model switching via set_session_model.""" + + @staticmethod + def _wire(agent: ACPAgent, agent_name: str) -> ACPAgent: + agent._conn = MagicMock() + agent._session_id = "sess-1" + agent._agent_name = agent_name + executor = MagicMock() + executor.run_async = MagicMock() + agent._executor = executor + return agent + + def test_switches_model_on_live_codex_session(self): + agent = self._wire(_make_agent(), "codex-acp") + agent.set_acp_model("gpt-5.4/low") + agent._conn.set_session_model.assert_called_once_with( + model_id="gpt-5.4/low", session_id="sess-1" + ) + agent._executor.run_async.assert_called_once() + # Sentinel LLM + metrics reflect the live model for cost/token tracking. + assert agent.llm.model == "gpt-5.4/low" + assert agent.llm.metrics.model_name == "gpt-5.4/low" + + def test_claude_provider_supports_runtime_switch(self): + agent = self._wire(_make_agent(), "claude-agent-acp") + agent.set_acp_model("claude-haiku-4-5-20251001") + agent._conn.set_session_model.assert_called_once_with( + model_id="claude-haiku-4-5-20251001", session_id="sess-1" + ) + + def test_unknown_provider_still_attempts_switch(self): + # A custom/unrecognised server (provider=None) is allowed to attempt + # the call; the ACP layer errors if it isn't actually supported. + agent = self._wire(_make_agent(), "some-custom-acp") + agent.set_acp_model("whatever") + agent._conn.set_session_model.assert_called_once() + + def test_rejects_empty_model(self): + agent = self._wire(_make_agent(), "codex-acp") + with pytest.raises(ValueError, match="non-empty"): + agent.set_acp_model(" ") + agent._conn.set_session_model.assert_not_called() + + def test_raises_before_session_initialized(self): + agent = _make_agent() # no _conn / _session_id / _executor + with pytest.raises(RuntimeError, match="not initialized"): + agent.set_acp_model("gpt-5.4") + + def test_raises_for_provider_without_protocol_support(self): + from openhands.sdk.settings.acp_providers import ACPProviderInfo + + unsupported = ACPProviderInfo( + key="legacy", + display_name="Legacy", + default_command=("legacy",), + api_key_env_var=None, + base_url_env_var=None, + default_session_mode="default", + agent_name_patterns=("legacy",), + supports_set_session_model=False, + supports_runtime_model_switch=False, + session_meta_key=None, + ) + agent = self._wire(_make_agent(), "legacy-acp") + with patch( + "openhands.sdk.agent.acp_agent.detect_acp_provider_by_agent_name", + return_value=unsupported, + ): + with pytest.raises(ValueError, match="does not support runtime"): + agent.set_acp_model("x") + agent._conn.set_session_model.assert_not_called() + + def test_translates_acp_request_error_to_value_error(self): + # A protocol-level rejection (e.g. method-not-found on a custom server, + # or an invalid model id) must surface as a ValueError — not leak as a + # raw acp.exceptions.RequestError — so the agent-server maps it to 400. + agent = self._wire(_make_agent(), "codex-acp") + agent._executor.run_async.side_effect = ACPRequestError( + code=-32601, message="method not found" + ) + with pytest.raises(ValueError, match="rejected set_session_model"): + agent.set_acp_model("bogus-model") + # The sentinel LLM must not be mutated when the switch fails. + assert agent.llm.model != "bogus-model" + + def test_propagates_server_internal_error(self): + # JSON-RPC -32603 is a server-internal failure, not a bad client + # request. It must propagate (as the raw ACPRequestError -> 5xx) rather + # than be mislabeled as a 400-class ValueError, mirroring the retriable + # handling on the prompt path. + agent = self._wire(_make_agent(), "codex-acp") + agent._executor.run_async.side_effect = ACPRequestError( + code=-32603, message="internal error" + ) + with pytest.raises(ACPRequestError): + agent.set_acp_model("some-model") + # The sentinel LLM must not be mutated when the switch fails. + assert agent.llm.model != "some-model" + + def test_passes_timeout_to_run_async(self): + # The protocol round-trip runs under the conversation state lock, so it + # must be bounded to avoid wedging the lock if the server never answers. + agent = self._wire(_make_agent(acp_prompt_timeout=42.0), "codex-acp") + agent.set_acp_model("gpt-5.4/low") + _, kwargs = agent._executor.run_async.call_args + assert kwargs["timeout"] == 42.0 + + # --------------------------------------------------------------------------- # acp_session_mode field # --------------------------------------------------------------------------- diff --git a/tests/sdk/conversation/test_switch_model.py b/tests/sdk/conversation/test_switch_model.py index bb64c1b530..ccd591ff81 100644 --- a/tests/sdk/conversation/test_switch_model.py +++ b/tests/sdk/conversation/test_switch_model.py @@ -1,11 +1,16 @@ +import json from pathlib import Path +from unittest.mock import MagicMock import pytest from pydantic import SecretStr from openhands.sdk import LLM, LocalConversation from openhands.sdk.agent import Agent +from openhands.sdk.agent.acp_agent import ACPAgent from openhands.sdk.context.condenser import LLMSummarizingCondenser +from openhands.sdk.conversation.persistence_const import BASE_STATE +from openhands.sdk.conversation.state import ConversationState from openhands.sdk.llm import llm_profile_store from openhands.sdk.llm.llm_profile_store import LLMProfileStore from openhands.sdk.testing import TestLLM @@ -43,6 +48,104 @@ def _make_conversation() -> LocalConversation: ) +def test_switch_acp_model_rejects_non_acp_agent(): + """switch_acp_model is only valid for ACP conversations.""" + conv = _make_conversation() # plain Agent, not ACPAgent + with pytest.raises(ValueError, match="only supported for ACP"): + conv.switch_acp_model("haiku") + + +def _make_acp_conversation(tmp_path) -> tuple[LocalConversation, ACPAgent]: + """A persisted ACP conversation with a faked-out live session. + + The fake ``_conn`` / ``_executor`` let ``set_acp_model`` issue its + protocol call without launching a real ACP subprocess. + """ + agent = ACPAgent(acp_command=["echo", "test"], acp_model="model-a") + agent._conn = MagicMock() + agent._session_id = "sess-1" + agent._agent_name = "codex-acp" + executor = MagicMock() + executor.run_async = MagicMock() + agent._executor = executor + conv = LocalConversation( + agent=agent, + workspace=tmp_path, + persistence_dir=str(tmp_path / "persist"), + ) + return conv, agent + + +def test_switch_acp_model_persists_authoritative_model(tmp_path): + """A runtime switch persists as the authoritative ``acp_model``. + + Regression for the review finding that re-assigning the same (mutated) + agent object was an autosave no-op, and that the frozen ``acp_model`` + field — which ``model_post_init`` / ``_start_acp_server`` read on + reload/resume — stayed at its construction-time value. + """ + conv, agent = _make_acp_conversation(tmp_path) + live_conn = agent._conn + + conv.switch_acp_model("model-b") + + # In-memory: agent + state agree on the new model, and the live connection + # survived the model_copy so the conversation can keep running. + switched = conv.agent + assert isinstance(switched, ACPAgent) + assert switched.acp_model == "model-b" + assert isinstance(conv.state.agent, ACPAgent) + assert conv.state.agent.acp_model == "model-b" + assert switched.llm.model == "model-b" + assert switched._conn is live_conn + assert switched._session_id == "sess-1" + + # On disk: base_state.json actually changed (not an autosave no-op), and the + # persisted agent reconstructs with the switched model as authoritative. + base_text = conv.state._fs.read(BASE_STATE) + reloaded = ConversationState.model_validate(json.loads(base_text)) + assert isinstance(reloaded.agent, ACPAgent) + assert reloaded.agent.acp_model == "model-b" + # model_post_init derives the sentinel LLM model from the persisted acp_model. + assert reloaded.agent.llm.model == "model-b" + + +def test_switch_acp_model_disarms_discarded_agent_finalizer(tmp_path): + """The pre-switch agent must not tear down the shared live session. + + Regression: ``switch_acp_model`` swaps in a shallow ``model_copy`` that + shares ``_conn`` / ``_executor`` / ``_process`` with the old agent. Without + disarming it first, ``ACPAgent.__del__`` -> ``close()`` on the discarded + agent closes the connection, kills the subprocess and shuts down the + executor — out from under the copy, breaking the next turn. + """ + conv, old_agent = _make_acp_conversation(tmp_path) + live_conn = old_agent._conn + live_executor = old_agent._executor + + conv.switch_acp_model("model-b") + + # The copy took over the live runtime... + switched = conv.agent + assert isinstance(switched, ACPAgent) + assert switched._conn is live_conn + assert switched._executor is live_executor + + # ...and the discarded agent's finalizer was disarmed (marked closed) + # WITHOUT clearing its runtime references — an in-flight ask_agent()/fork + # still holding the old agent keeps a valid connection. + assert old_agent._closed is True + assert old_agent._conn is live_conn + assert old_agent._executor is live_executor + + # Simulating GC (__del__ -> close()) on the disarmed old agent is a no-op: + # the copy's shared connection/executor are left intact. + live_executor.run_async.reset_mock() + old_agent.close() + live_executor.run_async.assert_not_called() + live_executor.close.assert_not_called() + + def test_switch_profile(profile_store): """switch_profile switches the agent's LLM.""" conv = _make_conversation() diff --git a/tests/sdk/settings/test_acp_providers.py b/tests/sdk/settings/test_acp_providers.py index afd1be564d..5c1c683165 100644 --- a/tests/sdk/settings/test_acp_providers.py +++ b/tests/sdk/settings/test_acp_providers.py @@ -34,7 +34,11 @@ def test_claude_code_metadata(self): assert info.base_url_env_var == "ANTHROPIC_BASE_URL" assert info.default_session_mode == "bypassPermissions" assert "claude-agent" in info.agent_name_patterns + # claude-agent-acp selects its *initial* model via _meta (session_meta_key), + # so it does NOT use set_session_model at session creation ... assert info.supports_set_session_model is False + # ... but it DOES support session/set_model for mid-conversation switches. + assert info.supports_runtime_model_switch is True assert info.session_meta_key == "claudeCode" assert info.default_model == "claude-opus-4-7" assert any(m.id == "claude-opus-4-7" for m in info.available_models) @@ -49,6 +53,7 @@ def test_codex_metadata(self): assert info.default_session_mode == "full-access" assert "codex-acp" in info.agent_name_patterns assert info.supports_set_session_model is True + assert info.supports_runtime_model_switch is True assert info.session_meta_key is None assert info.default_model == "gpt-5.5/medium" assert any(m.id == "gpt-5.5/medium" for m in info.available_models) @@ -63,6 +68,7 @@ def test_gemini_cli_metadata(self): assert info.default_session_mode == "yolo" assert "gemini-cli" in info.agent_name_patterns assert info.supports_set_session_model is True + assert info.supports_runtime_model_switch is True assert info.session_meta_key is None assert info.default_model == "auto-gemini-2.5" assert any(m.id == "auto-gemini-2.5" for m in info.available_models)