From 0a180f15ff0201904719766272cadce200a2db7a Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 23 May 2026 16:59:17 +0000 Subject: [PATCH 01/34] fix(sdk): assign condenser LLM usage id Co-authored-by: openhands --- openhands-sdk/openhands/sdk/settings/model.py | 5 ++++- tests/sdk/test_settings.py | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index 4e395b55a0..ca10c38bb6 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -889,7 +889,10 @@ def build_condenser(self, llm: LLM) -> LLMSummarizingCondenser | None: from openhands.sdk.context.condenser import LLMSummarizingCondenser - return LLMSummarizingCondenser(llm=llm, max_size=self.condenser.max_size) + condenser_llm = llm.model_copy(update={"usage_id": "condenser"}) + return LLMSummarizingCondenser( + llm=condenser_llm, max_size=self.condenser.max_size + ) def build_critic(self) -> CriticBase | None: """Create an :class:`APIBasedCritic` from these settings. diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 8085138d1d..00f94512ba 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -473,12 +473,19 @@ def test_llm_create_agent_serializes_typed_mcp_config_compactly() -> None: def test_llm_create_agent_builds_condenser_when_enabled() -> None: + llm = LLM(model="test-model", usage_id="agent") settings = OpenHandsAgentSettings( + llm=llm, condenser=CondenserSettings(enabled=True, max_size=100), ) agent = settings.create_agent() + + assert agent.llm is llm assert isinstance(agent.condenser, LLMSummarizingCondenser) assert agent.condenser.max_size == 100 + assert agent.condenser.llm is not llm + assert agent.condenser.llm.model == llm.model + assert agent.condenser.llm.usage_id == "condenser" def test_llm_create_agent_no_condenser_when_disabled() -> None: From 43032a629660e7611e2cd62298de490853684723 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 00:54:54 +0000 Subject: [PATCH 02/34] fix(sdk): reset condenser LLM metrics Co-authored-by: openhands --- openhands-sdk/openhands/sdk/settings/model.py | 1 + tests/sdk/test_settings.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/openhands-sdk/openhands/sdk/settings/model.py b/openhands-sdk/openhands/sdk/settings/model.py index ca10c38bb6..1a1fcb589b 100644 --- a/openhands-sdk/openhands/sdk/settings/model.py +++ b/openhands-sdk/openhands/sdk/settings/model.py @@ -890,6 +890,7 @@ def build_condenser(self, llm: LLM) -> LLMSummarizingCondenser | None: from openhands.sdk.context.condenser import LLMSummarizingCondenser condenser_llm = llm.model_copy(update={"usage_id": "condenser"}) + condenser_llm.reset_metrics() return LLMSummarizingCondenser( llm=condenser_llm, max_size=self.condenser.max_size ) diff --git a/tests/sdk/test_settings.py b/tests/sdk/test_settings.py index 00f94512ba..ff27ca5a1b 100644 --- a/tests/sdk/test_settings.py +++ b/tests/sdk/test_settings.py @@ -474,6 +474,7 @@ def test_llm_create_agent_serializes_typed_mcp_config_compactly() -> None: def test_llm_create_agent_builds_condenser_when_enabled() -> None: llm = LLM(model="test-model", usage_id="agent") + agent_metrics = llm.metrics settings = OpenHandsAgentSettings( llm=llm, condenser=CondenserSettings(enabled=True, max_size=100), @@ -486,6 +487,7 @@ def test_llm_create_agent_builds_condenser_when_enabled() -> None: assert agent.condenser.llm is not llm assert agent.condenser.llm.model == llm.model assert agent.condenser.llm.usage_id == "condenser" + assert agent.condenser.llm.metrics is not agent_metrics def test_llm_create_agent_no_condenser_when_disabled() -> None: From ca04e8c98e53b8dcdf465d40ea7fb28628ade3d8 Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 09:51:57 -0400 Subject: [PATCH 03/34] fix(acp): accept user messages during async turns --- .../openhands/sdk/agent/acp_agent.py | 31 ++++---- .../conversation/impl/local_conversation.py | 75 +++++++++++++++++-- tests/sdk/agent/test_acp_agent.py | 24 +++--- .../local/test_conversation_send_message.py | 73 ++++++++++++++++++ 4 files changed, 167 insertions(+), 36 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 5f9883954c..88af0f8c03 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1634,27 +1634,22 @@ async def astep( ``on_event(observation)``, ``state.execution_status`` — runs entirely on the caller's thread. - Why this matters: ``LocalConversation.arun`` holds the - conversation state's reentrant ``FIFOLock`` on its loop thread - across ``await self.agent.astep(...)``. The default + Why this matters: ``LocalConversation.arun`` deliberately does + not hold the conversation state's reentrant ``FIFOLock`` across + long ACP prompt awaits, so remote user messages can be persisted + while the subprocess is still working. The default ``AgentBase.astep`` would wrap sync ``step`` in - ``loop.run_in_executor(None, self.step, ...)``, moving every - post-prompt callback to a worker thread. Any ``with state:`` - inside that chain (today: ``stats_callback``; tomorrow: any - callback added to LLM telemetry or the event pipeline) then - blocks on a lock owned by the loop thread that is itself - ``await``-ing ``astep`` to return. Keeping post-prompt work on - the caller's thread sidesteps the whole class of cross-thread - state-lock deadlocks. See #3348 / #3350 for the full diagnosis. + ``loop.run_in_executor(None, self.step, ...)``, moving post-prompt + callbacks and state updates to a worker thread. Keeping this path + native-async leaves finalization on the caller's loop task, where + ``LocalConversation`` can serialize each emitted event with a + short state-lock acquire and avoid the cross-thread deadlocks + diagnosed in #3348 / #3350. Bridge ``session_update`` notifications continue to fire on the - portal thread (no marshalling here) — they reach the user's - ``on_event`` chain via the agent-server's - ``_emit_event_from_thread`` queue, which already handles the - thread hop. Real-time mid-turn delivery of those events is a - separate concern (the queue waits for ``arun()`` to release the - state lock between iterations); it is not part of the deadlock - this fix removes. + portal thread (no marshalling here). The ``on_event`` callback + supplied by ``LocalConversation.arun`` is responsible for taking + the state lock around each individual event. """ state = conversation.state diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index a6bb1ec60e..1a52eafc6d 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -34,6 +34,7 @@ ActionEvent, AgentErrorEvent, CondensationRequest, + Event, InterruptEvent, MessageEvent, ObservationEvent, @@ -765,6 +766,11 @@ def send_message(self, message: str | Message, sender: str | None = None) -> Non ) self._on_event(user_msg_event) + def _on_event_with_state_lock(self, event: Event) -> None: + """Emit an event while holding the conversation state lock.""" + with self._state: + self._on_event(event) + @observe(name="conversation.run") def run(self) -> None: """Runs the conversation until the agent finishes. @@ -950,6 +956,7 @@ async def arun(self) -> None: try: while True: logger.debug(f"Conversation arun iteration {iteration}") + acp_step_user_message_id: str | None = None with self._state: if self._state.execution_status in [ ConversationExecutionStatus.PAUSED, @@ -1000,12 +1007,70 @@ async def arun(self) -> None: ConversationExecutionStatus.RUNNING ) - await self.agent.astep( - self, - on_event=self._on_event, - on_token=self._on_token, - ) + if isinstance(self.agent, ACPAgent): + acp_step_user_message_id = self._state.last_user_message_id + else: + await self.agent.astep( + self, + on_event=self._on_event, + on_token=self._on_token, + ) + iteration += 1 + + if ( + self.state.execution_status + == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION + ): + break + + if iteration >= self.max_iteration_per_run: + if ( + self._state.execution_status + == ConversationExecutionStatus.FINISHED + ): + break + error_msg = ( + f"Agent reached maximum iterations limit " + f"({self.max_iteration_per_run})." + ) + logger.error(error_msg) + self._state.execution_status = ( + ConversationExecutionStatus.ERROR + ) + self._on_event( + ConversationErrorEvent( + source="environment", + code="MaxIterationsReached", + detail=error_msg, + ) + ) + break + + continue + + # ACP prompt round-trips can run for minutes. Keep the state + # lock free while awaiting them so incoming user messages can + # be persisted immediately; event callbacks take the lock only + # for each individual mutation. + await self.agent.astep( + self, + on_event=self._on_event_with_state_lock, + on_token=self._on_token, + ) + with self._state: iteration += 1 + if ( + self._state.execution_status + == ConversationExecutionStatus.FINISHED + and self._state.last_user_message_id is not None + and self._state.last_user_message_id != acp_step_user_message_id + ): + logger.info( + "User message arrived during ACP step; continuing run" + ) + self._state.execution_status = ( + ConversationExecutionStatus.RUNNING + ) if ( self.state.execution_status diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 7f0bd2f689..4d3028823e 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1369,9 +1369,9 @@ def _fake_run_async(_coro, **_kwargs): class TestACPAgentAstep: """Native ``ACPAgent.astep`` must not fall back to ``AgentBase.astep`` (which wraps ``step`` in ``loop.run_in_executor``). Doing so would - move post-prompt callbacks onto an executor worker thread, - deadlocking against ``LocalConversation.arun`` which holds the - state's reentrant ``FIFOLock`` on the loop thread. See #3348. + move post-prompt callbacks and state updates onto an executor worker + thread, outside ``LocalConversation.arun``'s controlled event + serialization. See #3348. """ def _make_conversation_with_message(self, tmp_path, text="Hello"): @@ -1401,10 +1401,9 @@ def test_astep_overrides_default_agentbase_implementation(self): def test_astep_runs_post_prompt_callbacks_on_caller_thread(self, tmp_path): """Post-prompt ``on_event`` callbacks must fire on the caller - thread (same thread that holds ``state.lock`` in ``arun``). - ``FIFOLock`` is reentrant per-thread; if astep schedules ``step`` - on a worker thread (the buggy default), callbacks run cross-thread - and block on the lock owner forever — see #3348. + thread. If astep schedules ``step`` on a worker thread (the buggy + default), callbacks and final state updates run outside the async + run task's serialization model — see #3348. """ from openhands.sdk.utils.async_executor import AsyncExecutor @@ -1673,12 +1672,11 @@ async def _fake_prompt(prompt_blocks, session_id): def test_astep_does_not_deadlock_under_reentrant_state_lock(self, tmp_path): """End-to-end shape of the #3348 bug. - Mirrors ``LocalConversation.arun``: holds ``state.lock`` on the - loop thread across ``await astep(...)``, while a post-prompt - callback re-acquires it (same shape as ``stats_callback``'s - ``with state:``). With astep overridden, the callback runs on - the same thread as the lock owner — FIFOLock's reentrancy lets - it through. Without the override, this hangs. + Covers direct callers that hold ``state.lock`` on the loop thread + across ``await astep(...)`` while a post-prompt callback + re-acquires it. With astep overridden, the callback runs on the + same thread as the lock owner — FIFOLock's reentrancy lets it + through. Without the override, this hangs. """ from openhands.sdk.utils.async_executor import AsyncExecutor diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index b43c4c7e7b..ca010211b8 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -1,5 +1,7 @@ +import asyncio from unittest.mock import patch +import pytest from pydantic import SecretStr from openhands.sdk.agent.acp_agent import ACPAgent @@ -201,3 +203,74 @@ def _finish_immediately(self, conv, on_event, on_token=None): conversation.state.execution_status == ConversationExecutionStatus.FINISHED ) assert conversation.state.events[-1] == user_event + + +@pytest.mark.asyncio +async def test_acp_arun_accepts_user_message_while_step_is_in_flight(tmp_path): + """ACP user messages should be persisted while a long async turn is running.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=4, + stuck_detection=False, + ) + conversation.send_message("initial request") + + first_step_started = asyncio.Event() + release_first_step = asyncio.Event() + second_step_seen = asyncio.Event() + prompts_seen: list[str] = [] + + def latest_user_text(conv: LocalConversation) -> str: + for event in reversed(list(conv.state.events)): + if isinstance(event, MessageEvent) and event.source == "user": + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + raise AssertionError("expected at least one user message") + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + ) -> None: + prompts_seen.append(latest_user_text(conv)) + if len(prompts_seen) == 1: + first_step_started.set() + await release_first_step.wait() + else: + second_step_seen.set() + conv.state.execution_status = ConversationExecutionStatus.FINISHED + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + run_task = asyncio.create_task(conversation.arun()) + send_done = asyncio.Event() + + async def send_intervening_message() -> None: + await asyncio.to_thread(conversation.send_message, "intervening request") + send_done.set() + + await asyncio.wait_for(first_step_started.wait(), timeout=1.0) + send_task = asyncio.create_task(send_intervening_message()) + + completed_promptly = True + try: + await asyncio.wait_for(send_done.wait(), timeout=0.2) + except TimeoutError: + completed_promptly = False + finally: + release_first_step.set() + await asyncio.wait_for(send_task, timeout=1.0) + await asyncio.wait_for(second_step_seen.wait(), timeout=1.0) + await asyncio.wait_for(run_task, timeout=1.0) + + assert completed_promptly, ( + "send_message() should not wait for the in-flight ACP prompt to finish" + ) + assert prompts_seen == ["initial request", "intervening request"] From f50ed7292b6b2baa3725653434b3785943fcf89e Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 10:40:49 -0400 Subject: [PATCH 04/34] fix(agent-server): stream ACP text deltas --- .../openhands/agent_server/event_service.py | 13 ++++++---- tests/agent_server/test_event_streaming.py | 26 ++++++++++++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 70e329a1d4..f27f1d881c 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -611,11 +611,14 @@ async def start(self): self._pub_sub, loop=asyncio.get_running_loop() ) - # Only wire token streaming if at least one LLM has stream=True. - # The LLM silently ignores on_token when stream is off, but skipping - # the wiring lets us log the decision so operators can tell from a - # log line whether deltas will flow. - streaming_enabled = any(llm.stream for llm in agent.get_all_llms()) + from openhands.sdk.agent import ACPAgent + + # Only wire token streaming for agents that can actually emit token + # callbacks. SDK LLM agents need stream=True, while ACP agents emit + # AgentMessageChunk text through their bridge without exposing an LLM. + streaming_enabled = isinstance(agent, ACPAgent) or any( + llm.stream for llm in agent.get_all_llms() + ) logger.debug( "Token streaming: %s", "enabled" if streaming_enabled else "disabled (no LLM has stream=True)", diff --git a/tests/agent_server/test_event_streaming.py b/tests/agent_server/test_event_streaming.py index 5279d73a8c..71b2f73ec9 100644 --- a/tests/agent_server/test_event_streaming.py +++ b/tests/agent_server/test_event_streaming.py @@ -12,7 +12,7 @@ from openhands.agent_server.models import StoredConversation from openhands.agent_server.pub_sub import Subscriber from openhands.sdk import Event -from openhands.sdk.agent import Agent +from openhands.sdk.agent import ACPAgent, Agent from openhands.sdk.event import StreamingDeltaEvent from openhands.sdk.llm import LLM from openhands.sdk.workspace import LocalWorkspace @@ -218,6 +218,30 @@ async def test_token_callbacks_not_wired_when_stream_disabled(tmp_path): assert MockConv.call_args.kwargs["token_callbacks"] == [] +@pytest.mark.asyncio +async def test_acp_agents_wire_token_callback_without_llm_streaming(tmp_path): + """ACP AgentMessageChunk text should stream even though ACPAgent has no LLM.""" + service = EventService( + stored=StoredConversation( + id=uuid4(), + agent=ACPAgent(acp_command=["echo", "test"]), + workspace=LocalWorkspace(working_dir=str(tmp_path / "workspace")), + ), + conversations_dir=tmp_path / "conversations", + ) + (tmp_path / "workspace").mkdir(exist_ok=True) + + with _mock_local_conversation() as MockConv: + mock_conv = MagicMock() + mock_conv.state = MagicMock(execution_status="idle") + mock_conv._state = MagicMock() + mock_conv._on_event = MagicMock() + MockConv.return_value = mock_conv + + await service.start() + assert len(MockConv.call_args.kwargs["token_callbacks"]) == 1 + + @pytest.mark.asyncio async def test_multiple_chunks_produce_multiple_events(event_service, tmp_path): collector = _CollectorSubscriber() From 71f81487ff5a89764bfc783bfdbea0299ba30d31 Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 11:02:30 -0400 Subject: [PATCH 05/34] fix(acp): interrupt running turn on new user message --- .../openhands/agent_server/event_service.py | 12 +++ .../openhands/sdk/agent/acp_agent.py | 20 +++++ tests/agent_server/test_event_service.py | 75 +++++++++++++++++++ tests/sdk/agent/test_acp_agent.py | 7 ++ 4 files changed, 114 insertions(+) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index f27f1d881c..908309477c 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -18,6 +18,7 @@ ) from openhands.agent_server.pub_sub import PubSub, Subscriber from openhands.sdk import LLM, AgentBase, Event, Message, get_logger +from openhands.sdk.agent import ACPAgent from openhands.sdk.conversation.base import BaseConversation from openhands.sdk.conversation.impl.local_conversation import LocalConversation from openhands.sdk.conversation.response_utils import get_agent_final_response @@ -419,10 +420,21 @@ async def send_message(self, message: Message, run: bool = False): loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.send_message, message) if run: + if await self._should_interrupt_running_acp_turn(): + await self.interrupt() # Already running or inactive — message was sent, skip run. with suppress(ValueError): await self.run() + async def _should_interrupt_running_acp_turn(self) -> bool: + if not self._conversation: + return False + if self._run_task is None or self._run_task.done(): + return False + if not isinstance(self._conversation.agent, ACPAgent): + return False + return await self._get_execution_status() == ConversationExecutionStatus.RUNNING + async def subscribe_to_events(self, subscriber: Subscriber[Event]) -> UUID: subscriber_id = self._pub_sub.subscribe(subscriber) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 88af0f8c03..cc8ba6ec3c 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -17,6 +17,7 @@ from __future__ import annotations import asyncio +import inspect import json import os import threading @@ -1282,6 +1283,22 @@ def _cancel_inflight_tool_calls(self) -> None: exc_info=True, ) + def _request_session_cancel(self) -> None: + """Ask the ACP server to cancel the active session prompt.""" + if self._conn is None or self._executor is None or self._session_id is None: + return + session_id = self._session_id + + async def _cancel() -> None: + result = self._conn.cancel(session_id) + if inspect.isawaitable(result): + await result + + try: + self._executor.portal.start_task_soon(_cancel) + except Exception: + logger.warning("Failed to send ACP session cancel", exc_info=True) + def _build_acp_prompt( self, event: MessageEvent ) -> list[TextContentBlock | ImageContentBlock] | None: @@ -1607,6 +1624,7 @@ async def _prompt() -> PromptResponse | None: logger.info("ACP prompt returned in %.1fs", elapsed) self._finalize_successful_turn(response, elapsed, state, on_event) except TimeoutError: + self._request_session_cancel() self._emit_turn_timeout(time.monotonic() - t0, state, on_event) except Exception as e: self._emit_turn_error(e, state, on_event) @@ -1757,9 +1775,11 @@ async def astep( # ``ActionEvent``s, not ``ACPToolCallEvent``s). Cancel-emit on # the caller thread while callbacks are still wired, then re-raise # so ``arun()`` can transition to PAUSED. + self._request_session_cancel() self._cancel_inflight_tool_calls() raise except TimeoutError: + self._request_session_cancel() self._emit_turn_timeout(time.monotonic() - t0, state, on_event) except Exception as e: self._emit_turn_error(e, state, on_event) diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index 20ac59329b..c59c7761e4 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -23,7 +23,9 @@ ) from openhands.agent_server.pub_sub import Subscriber from openhands.sdk import LLM, Agent, Conversation, Message +from openhands.sdk.agent import ACPAgent from openhands.sdk.conversation.fifo_lock import FIFOLock +from openhands.sdk.conversation.impl.local_conversation import LocalConversation from openhands.sdk.conversation.state import ( ConversationExecutionStatus, ConversationState, @@ -825,6 +827,79 @@ async def test_send_message_with_run_true_agent_idle(self, event_service): # Verify run was called since agent was idle conversation.run.assert_called_once() + @pytest.mark.asyncio + async def test_send_message_with_run_true_interrupts_running_acp_turn( + self, event_service, tmp_path + ): + """A new user message should interrupt an in-flight ACP prompt.""" + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=4, + stuck_detection=False, + ) + conversation.send_message("initial request") + event_service._conversation = conversation + event_service._publish_state_update = AsyncMock() + + first_step_started = asyncio.Event() + first_step_cancelled = asyncio.Event() + second_step_seen = asyncio.Event() + prompts_seen: list[str] = [] + + def latest_user_text(conv: LocalConversation) -> str: + for event in reversed(list(conv.state.events)): + if isinstance(event, MessageEvent) and event.source == "user": + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + raise AssertionError("expected at least one user message") + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event, # noqa: ARG001 + on_token=None, # noqa: ARG001 + ) -> None: + prompts_seen.append(latest_user_text(conv)) + if len(prompts_seen) == 1: + first_step_started.set() + try: + await asyncio.Event().wait() + except asyncio.CancelledError: + first_step_cancelled.set() + raise + + second_step_seen.set() + conv.state.execution_status = ConversationExecutionStatus.FINISHED + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + try: + await event_service.run() + await asyncio.wait_for(first_step_started.wait(), timeout=1.0) + + await event_service.send_message( + Message(role="user", content=[TextContent(text="intervening")]), + run=True, + ) + + await asyncio.wait_for(first_step_cancelled.wait(), timeout=1.0) + await asyncio.wait_for(second_step_seen.wait(), timeout=1.0) + finally: + if ( + event_service._run_task is not None + and not event_service._run_task.done() + ): + conversation.interrupt() + with suppress(asyncio.CancelledError, TimeoutError): + await asyncio.wait_for(event_service._run_task, timeout=1.0) + + assert prompts_seen == ["initial request", "intervening"] + @pytest.mark.asyncio async def test_send_message_with_run_true_logs_exception(self, event_service): """Test that exceptions from conversation.run() are caught and logged.""" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 4d3028823e..ed2aeba135 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1542,6 +1542,7 @@ def test_astep_emits_failed_tool_calls_on_cancellation(self, tmp_path): async def _run_with_cancel() -> None: prompt_entered = asyncio.Event() + cancel_called = asyncio.Event() caller_loop = asyncio.get_running_loop() async def _fake_prompt(prompt_blocks, session_id): @@ -1568,7 +1569,12 @@ async def _fake_prompt(prompt_blocks, session_id): await asyncio.sleep(60) return None + async def _fake_cancel(session_id): + assert session_id == "test-session" + caller_loop.call_soon_threadsafe(cancel_called.set) + agent._conn.prompt = _fake_prompt + agent._conn.cancel = _fake_cancel agent._session_id = "test-session" task = asyncio.create_task( @@ -1578,6 +1584,7 @@ async def _fake_prompt(prompt_blocks, session_id): task.cancel() with pytest.raises(asyncio.CancelledError): await task + await asyncio.wait_for(cancel_called.wait(), timeout=5.0) try: agent._executor = executor From b9dc93b552de5b974d0a650786078027a2b69818 Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 11:15:56 -0400 Subject: [PATCH 06/34] fix(agent-server): handle ACP string token deltas --- .../openhands/agent_server/event_service.py | 33 ++++++++++++------ tests/agent_server/test_event_streaming.py | 34 +++++++++++++++++++ 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 908309477c..2148ab601a 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -636,31 +636,42 @@ async def start(self): "enabled" if streaming_enabled else "disabled (no LLM has stream=True)", ) - def _token_streaming_callback(chunk: LLMStreamChunk) -> None: + def _publish_stream_delta( + content: str | None = None, + reasoning_content: str | None = None, + ) -> None: # Published directly to _pub_sub (not via _callback_wrapper) so # deltas reach subscribers but are NOT persisted to # ConversationState.events. See StreamingDeltaEvent docstring. if not self._main_loop or not self._main_loop.is_running(): return + # Use `is not None` rather than truthiness: some providers + # emit legitimate empty-string chunks at stream boundaries + # (e.g. after a tool call) that we still want to forward. + if content is None and reasoning_content is None: + return + event = StreamingDeltaEvent( + content=content, + reasoning_content=reasoning_content, + ) + with suppress(RuntimeError): + asyncio.run_coroutine_threadsafe(self._pub_sub(event), self._main_loop) + + def _token_streaming_callback(chunk: LLMStreamChunk | str) -> None: + if isinstance(chunk, str): + _publish_stream_delta(content=chunk) + return + for choice in chunk.choices or (): delta = choice.delta if delta is None: continue content = getattr(delta, "content", None) reasoning = getattr(delta, "reasoning_content", None) - # Use `is not None` rather than truthiness: some providers - # emit legitimate empty-string chunks at stream boundaries - # (e.g. after a tool call) that we still want to forward. - if content is None and reasoning is None: - continue - event = StreamingDeltaEvent( + _publish_stream_delta( content=content if isinstance(content, str) else None, reasoning_content=reasoning if isinstance(reasoning, str) else None, ) - with suppress(RuntimeError): - asyncio.run_coroutine_threadsafe( - self._pub_sub(event), self._main_loop - ) conversation = LocalConversation( agent=agent, diff --git a/tests/agent_server/test_event_streaming.py b/tests/agent_server/test_event_streaming.py index 71b2f73ec9..bfae85edc3 100644 --- a/tests/agent_server/test_event_streaming.py +++ b/tests/agent_server/test_event_streaming.py @@ -242,6 +242,40 @@ async def test_acp_agents_wire_token_callback_without_llm_streaming(tmp_path): assert len(MockConv.call_args.kwargs["token_callbacks"]) == 1 +@pytest.mark.asyncio +async def test_acp_string_token_callback_publishes_delta(tmp_path): + """ACPAgent invokes token callbacks with plain text chunks.""" + service = EventService( + stored=StoredConversation( + id=uuid4(), + agent=ACPAgent(acp_command=["echo", "test"]), + workspace=LocalWorkspace(working_dir=str(tmp_path / "workspace")), + ), + conversations_dir=tmp_path / "conversations", + ) + collector = _CollectorSubscriber() + service._pub_sub.subscribe(collector) + (tmp_path / "workspace").mkdir(exist_ok=True) + + with _mock_local_conversation() as MockConv: + mock_conv = MagicMock() + mock_conv.state = MagicMock(execution_status="idle") + mock_conv._state = MagicMock() + mock_conv._on_event = MagicMock() + MockConv.return_value = mock_conv + + await service.start() + callback = MockConv.call_args.kwargs["token_callbacks"][0] + + callback("ACP live text") + await asyncio.sleep(0.05) + + delta_events = [e for e in collector.events if isinstance(e, StreamingDeltaEvent)] + assert len(delta_events) == 1 + assert delta_events[0].content == "ACP live text" + assert delta_events[0].reasoning_content is None + + @pytest.mark.asyncio async def test_multiple_chunks_produce_multiple_events(event_service, tmp_path): collector = _CollectorSubscriber() From 6c8902646ed4fbed9a95d869b7e1942dd82accf7 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 18:20:05 +0000 Subject: [PATCH 07/34] fix: address ACP async turn races (#3376) Co-authored-by: openhands --- .../openhands/sdk/agent/acp_agent.py | 85 ++++++++++++++----- .../conversation/impl/local_conversation.py | 11 +++ tests/agent_server/test_event_service.py | 17 ++-- 3 files changed, 85 insertions(+), 28 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index cc8ba6ec3c..65dd984517 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -94,6 +94,13 @@ # These errors can occur when the connection drops mid-conversation but the # session state is still valid on the server side. _ACP_PROMPT_MAX_RETRIES: int = int(os.environ.get("ACP_PROMPT_MAX_RETRIES", "3")) + +# After a timeout/cancellation, wait briefly for the ACP prompt task to react +# to session/cancel before rewiring callbacks for the next turn. +_ACP_CANCEL_DRAIN_TIMEOUT: float = float( + os.environ.get("ACP_CANCEL_DRAIN_TIMEOUT", "2.0") +) + _ACP_PROMPT_RETRY_DELAYS: tuple[float, ...] = (5.0, 15.0, 30.0) # seconds # Exception types that indicate transient connection issues worth retrying @@ -1283,6 +1290,39 @@ def _cancel_inflight_tool_calls(self) -> None: exc_info=True, ) + async def _arequest_session_cancel(self) -> None: + """Async variant of _request_session_cancel that waits for cancel send.""" + if self._conn is None or self._executor is None or self._session_id is None: + return + session_id = self._session_id + + async def _cancel() -> None: + result = self._conn.cancel(session_id) + if inspect.isawaitable(result): + await result + + try: + future = self._executor.portal.start_task_soon(_cancel) + await asyncio.wrap_future(future) + except Exception: + logger.warning("Failed to send ACP session cancel", exc_info=True) + + async def _drain_cancelled_prompt(self, future: Any | None) -> None: + """Let a cancelled/timed-out portal prompt quiesce before rewiring.""" + if future is None or future.done(): + return + try: + await asyncio.wait_for( + asyncio.shield(asyncio.wrap_future(future)), + timeout=_ACP_CANCEL_DRAIN_TIMEOUT, + ) + except asyncio.CancelledError: + if future.cancelled(): + return + raise + except Exception: + return + def _request_session_cancel(self) -> None: """Ask the ACP server to cancel the active session prompt.""" if self._conn is None or self._executor is None or self._session_id is None: @@ -1641,6 +1681,7 @@ async def astep( conversation: LocalConversation, on_event: ConversationCallbackType, on_token: ConversationTokenCallbackType | None = None, + prompt_message: MessageEvent | None = None, ) -> None: """Native-async variant of :meth:`step`. @@ -1672,11 +1713,14 @@ async def astep( state = conversation.state prompt_blocks: list[Any] | None = None - for event in reversed(list(state.events)): - if isinstance(event, MessageEvent) and event.source == "user": - prompt_blocks = self._build_acp_prompt(event) - if prompt_blocks: - break + if prompt_message is not None: + prompt_blocks = self._build_acp_prompt(prompt_message) + else: + for event in reversed(list(state.events)): + if isinstance(event, MessageEvent) and event.source == "user": + prompt_blocks = self._build_acp_prompt(event) + if prompt_blocks: + break if prompt_blocks is None: logger.warning("No user message found; finishing conversation") state.execution_status = ConversationExecutionStatus.FINISHED @@ -1685,6 +1729,7 @@ async def astep( self._reset_client_for_turn(on_token, on_event) t0 = time.monotonic() + prompt_future: Any | None = None try: logger.info( "Sending ACP prompt (timeout=%.0fs, blocks=%d, async)", @@ -1699,17 +1744,16 @@ async def astep( try: # Schedule the ACP prompt on the portal loop (where the # connection lives); await the future back on the caller - # loop. On timeout ``asyncio.wait_for`` cancels the - # caller-side asyncio future; the portal task may run to - # completion in the background (anyio starts it - # immediately on ``start_task_soon`` and - # ``concurrent.futures.Future.cancel()`` returns ``False`` - # for an already-running task), but - # ``_clear_turn_callbacks()`` in ``finally`` ensures any - # trailing ``session_update`` from that task is a no-op. - future = portal.start_task_soon(self._do_acp_prompt, prompt_blocks) + # loop. Shield the portal task from wait_for timeout so + # the timeout/cancellation handlers can send session/cancel + # and briefly drain the task before the next turn rewires + # callbacks. + prompt_future = portal.start_task_soon( + self._do_acp_prompt, + prompt_blocks, + ) response = await asyncio.wait_for( - asyncio.wrap_future(future), + asyncio.shield(asyncio.wrap_future(prompt_future)), timeout=self.acp_prompt_timeout, ) break @@ -1773,14 +1817,17 @@ async def astep( # before cancellation stays live in the event log forever # (``LocalConversation._emit_orphaned_action_errors`` only patches # ``ActionEvent``s, not ``ACPToolCallEvent``s). Cancel-emit on - # the caller thread while callbacks are still wired, then re-raise - # so ``arun()`` can transition to PAUSED. - self._request_session_cancel() + # the caller thread while callbacks are still wired, then wait + # briefly for the portal prompt to observe the session/cancel + # before allowing the next turn to rewire shared accumulators. + await self._arequest_session_cancel() self._cancel_inflight_tool_calls() + await self._drain_cancelled_prompt(prompt_future) raise except TimeoutError: - self._request_session_cancel() + await self._arequest_session_cancel() self._emit_turn_timeout(time.monotonic() - t0, state, on_event) + await self._drain_cancelled_prompt(prompt_future) except Exception as e: self._emit_turn_error(e, state, on_event) raise diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 1a52eafc6d..738f172941 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -957,6 +957,7 @@ async def arun(self) -> None: while True: logger.debug(f"Conversation arun iteration {iteration}") acp_step_user_message_id: str | None = None + acp_step_user_message: MessageEvent | None = None with self._state: if self._state.execution_status in [ ConversationExecutionStatus.PAUSED, @@ -1009,6 +1010,15 @@ async def arun(self) -> None: if isinstance(self.agent, ACPAgent): acp_step_user_message_id = self._state.last_user_message_id + acp_step_user_message = next( + ( + event + for event in reversed(self._state.events) + if isinstance(event, MessageEvent) + and event.source == "user" + ), + None, + ) else: await self.agent.astep( self, @@ -1056,6 +1066,7 @@ async def arun(self) -> None: self, on_event=self._on_event_with_state_lock, on_token=self._on_token, + prompt_message=acp_step_user_message, ) with self._state: iteration += 1 diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index e583288110..c072827e96 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -848,21 +848,20 @@ async def test_send_message_with_run_true_interrupts_running_acp_turn( second_step_seen = asyncio.Event() prompts_seen: list[str] = [] - def latest_user_text(conv: LocalConversation) -> str: - for event in reversed(list(conv.state.events)): - if isinstance(event, MessageEvent) and event.source == "user": - content = event.llm_message.content[0] - assert isinstance(content, TextContent) - return content.text - raise AssertionError("expected at least one user message") + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text async def blocking_astep( self, # noqa: ARG001 - conv: LocalConversation, + conv: LocalConversation, # noqa: ARG001 on_event, # noqa: ARG001 on_token=None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, ) -> None: - prompts_seen.append(latest_user_text(conv)) + prompts_seen.append(user_text(prompt_message)) if len(prompts_seen) == 1: first_step_started.set() try: From 49583a2bb11ccd906197efb69aa0f451fd67e309 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 18:22:57 +0000 Subject: [PATCH 08/34] fix: satisfy ACP prompt future typing (#3376) Co-authored-by: openhands --- .../openhands/sdk/agent/acp_agent.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 65dd984517..4f5a6cdd80 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -24,6 +24,7 @@ import time import uuid from collections.abc import Generator +from concurrent.futures import Future from pathlib import Path from typing import TYPE_CHECKING, Any, Literal @@ -1307,7 +1308,10 @@ async def _cancel() -> None: except Exception: logger.warning("Failed to send ACP session cancel", exc_info=True) - async def _drain_cancelled_prompt(self, future: Any | None) -> None: + async def _drain_cancelled_prompt( + self, + future: Future[PromptResponse | None] | None, + ) -> None: """Let a cancelled/timed-out portal prompt quiesce before rewiring.""" if future is None or future.done(): return @@ -1729,7 +1733,7 @@ async def astep( self._reset_client_for_turn(on_token, on_event) t0 = time.monotonic() - prompt_future: Any | None = None + prompt_future: Future[PromptResponse | None] | None = None try: logger.info( "Sending ACP prompt (timeout=%.0fs, blocks=%d, async)", @@ -1748,12 +1752,15 @@ async def astep( # the timeout/cancellation handlers can send session/cancel # and briefly drain the task before the next turn rewires # callbacks. - prompt_future = portal.start_task_soon( - self._do_acp_prompt, - prompt_blocks, + current_prompt_future: Future[PromptResponse | None] = ( + portal.start_task_soon( + self._do_acp_prompt, + prompt_blocks, + ) ) + prompt_future = current_prompt_future response = await asyncio.wait_for( - asyncio.shield(asyncio.wrap_future(prompt_future)), + asyncio.shield(asyncio.wrap_future(current_prompt_future)), timeout=self.acp_prompt_timeout, ) break From 55b6c3801e421f17231cc03b067e5e4c1b519243 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 18:28:08 +0000 Subject: [PATCH 09/34] test: update ACP arun prompt snapshot test (#3376) Co-authored-by: openhands --- .../local/test_conversation_send_message.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index ca010211b8..85f43dce97 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -223,21 +223,20 @@ async def test_acp_arun_accepts_user_message_while_step_is_in_flight(tmp_path): second_step_seen = asyncio.Event() prompts_seen: list[str] = [] - def latest_user_text(conv: LocalConversation) -> str: - for event in reversed(list(conv.state.events)): - if isinstance(event, MessageEvent) and event.source == "user": - content = event.llm_message.content[0] - assert isinstance(content, TextContent) - return content.text - raise AssertionError("expected at least one user message") + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text async def blocking_astep( self, # noqa: ARG001 - conv: LocalConversation, + conv: LocalConversation, # noqa: ARG001 on_event: ConversationCallbackType, # noqa: ARG001 on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, ) -> None: - prompts_seen.append(latest_user_text(conv)) + prompts_seen.append(user_text(prompt_message)) if len(prompts_seen) == 1: first_step_started.set() await release_first_step.wait() From c9bbb68b8c9db8e069e84973acd64f8c00e4bde9 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 18:57:27 +0000 Subject: [PATCH 10/34] fix: close ACP async ordering gaps (#3376) Co-authored-by: openhands --- .../openhands/sdk/agent/acp_agent.py | 11 ++-- .../conversation/impl/local_conversation.py | 10 ++-- .../local/test_conversation_send_message.py | 58 +++++++++++++++++++ 3 files changed, 70 insertions(+), 9 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 4f5a6cdd80..8f05418f5c 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1814,7 +1814,8 @@ async def astep( elapsed = time.monotonic() - t0 logger.info("ACP prompt returned in %.1fs (async)", elapsed) - self._finalize_successful_turn(response, elapsed, state, on_event) + with state: + self._finalize_successful_turn(response, elapsed, state, on_event) except asyncio.CancelledError: # ``asyncio.CancelledError`` inherits from ``BaseException``, not # ``Exception`` — so it would otherwise bypass the generic handler @@ -1824,12 +1825,12 @@ async def astep( # before cancellation stays live in the event log forever # (``LocalConversation._emit_orphaned_action_errors`` only patches # ``ActionEvent``s, not ``ACPToolCallEvent``s). Cancel-emit on - # the caller thread while callbacks are still wired, then wait - # briefly for the portal prompt to observe the session/cancel - # before allowing the next turn to rewire shared accumulators. + # the caller thread after the portal prompt has observed + # session/cancel, so late cancelled-turn updates cannot overwrite + # the terminal synthetic failures. await self._arequest_session_cancel() - self._cancel_inflight_tool_calls() await self._drain_cancelled_prompt(prompt_future) + self._cancel_inflight_tool_calls() raise except TimeoutError: await self._arequest_session_cancel() diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 738f172941..3598409a06 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1070,11 +1070,13 @@ async def arun(self) -> None: ) with self._state: iteration += 1 - if ( - self._state.execution_status - == ConversationExecutionStatus.FINISHED - and self._state.last_user_message_id is not None + acp_user_message_changed = ( + self._state.last_user_message_id is not None and self._state.last_user_message_id != acp_step_user_message_id + ) + if acp_user_message_changed and self._state.execution_status in ( + ConversationExecutionStatus.FINISHED, + ConversationExecutionStatus.IDLE, ): logger.info( "User message arrived during ACP step; continuing run" diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index 85f43dce97..70177f26b7 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -273,3 +273,61 @@ async def send_intervening_message() -> None: "send_message() should not wait for the in-flight ACP prompt to finish" ) assert prompts_seen == ["initial request", "intervening request"] + + +@pytest.mark.asyncio +async def test_acp_arun_marks_queued_message_running_after_finish_gap(tmp_path): + """Queued ACP messages should resume RUNNING even if send sees FINISHED.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=4, + stuck_detection=False, + ) + conversation.send_message("initial request") + + first_step_finished = asyncio.Event() + release_first_step = asyncio.Event() + second_step_seen = asyncio.Event() + second_step_statuses: list[ConversationExecutionStatus] = [] + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + if len(prompts_seen) == 1: + conv.state.execution_status = ConversationExecutionStatus.FINISHED + first_step_finished.set() + await release_first_step.wait() + else: + second_step_statuses.append(conv.state.execution_status) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + second_step_seen.set() + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + run_task = asyncio.create_task(conversation.arun()) + await asyncio.wait_for(first_step_finished.wait(), timeout=1.0) + await asyncio.to_thread(conversation.send_message, "intervening request") + assert conversation.state.execution_status == ConversationExecutionStatus.IDLE + release_first_step.set() + await asyncio.wait_for(second_step_seen.wait(), timeout=1.0) + await asyncio.wait_for(run_task, timeout=1.0) + + assert prompts_seen == ["initial request", "intervening request"] + assert second_step_statuses == [ConversationExecutionStatus.RUNNING] From ce68215eeabd0bc9c44180213d13dd28985ffa42 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 19:37:12 +0000 Subject: [PATCH 11/34] fix: address ACP cancellation edge cases (#3376) Co-authored-by: openhands --- .../openhands/sdk/agent/acp_agent.py | 39 ++++++++++++--- .../conversation/impl/local_conversation.py | 9 ++++ tests/sdk/agent/test_acp_agent.py | 7 ++- .../local/test_conversation_send_message.py | 49 +++++++++++++++++++ 4 files changed, 96 insertions(+), 8 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 8f05418f5c..92cd8572d8 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1311,21 +1311,44 @@ async def _cancel() -> None: async def _drain_cancelled_prompt( self, future: Future[PromptResponse | None] | None, - ) -> None: + ) -> bool: """Let a cancelled/timed-out portal prompt quiesce before rewiring.""" if future is None or future.done(): - return + return True try: await asyncio.wait_for( asyncio.shield(asyncio.wrap_future(future)), timeout=_ACP_CANCEL_DRAIN_TIMEOUT, ) + return True except asyncio.CancelledError: if future.cancelled(): - return + return True raise + except TimeoutError: + logger.warning( + "Timed out waiting for cancelled ACP prompt to drain; " + "the ACP session will be restarted before the next turn" + ) + return False except Exception: - return + return future.done() + + def _restart_session_after_drain_timeout( + self, + state: ConversationState, + on_event: ConversationCallbackType, + ) -> None: + """Restart ACP after a prompt failed to quiesce post-cancel.""" + logger.warning("Restarting ACP session after cancelled prompt drain timeout") + self._cleanup() + self._initialized = False + state.agent_state = { + key: value + for key, value in state.agent_state.items() + if key not in {"acp_session_id", "acp_session_cwd"} + } + self.init_state(state, on_event=on_event) def _request_session_cancel(self) -> None: """Ask the ACP server to cancel the active session prompt.""" @@ -1829,13 +1852,17 @@ async def astep( # session/cancel, so late cancelled-turn updates cannot overwrite # the terminal synthetic failures. await self._arequest_session_cancel() - await self._drain_cancelled_prompt(prompt_future) + drained = await self._drain_cancelled_prompt(prompt_future) self._cancel_inflight_tool_calls() + if not drained: + self._restart_session_after_drain_timeout(state, on_event) raise except TimeoutError: await self._arequest_session_cancel() + drained = await self._drain_cancelled_prompt(prompt_future) self._emit_turn_timeout(time.monotonic() - t0, state, on_event) - await self._drain_cancelled_prompt(prompt_future) + if not drained: + self._restart_session_after_drain_timeout(state, on_event) except Exception as e: self._emit_turn_error(e, state, on_event) raise diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 3598409a06..4944c2fda4 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1078,6 +1078,15 @@ async def arun(self) -> None: ConversationExecutionStatus.FINISHED, ConversationExecutionStatus.IDLE, ): + if iteration >= self.max_iteration_per_run: + logger.info( + "User message arrived during final ACP iteration; " + "leaving conversation idle for a follow-up run" + ) + self._state.execution_status = ( + ConversationExecutionStatus.IDLE + ) + break logger.info( "User message arrived during ACP step; continuing run" ) diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index ed2aeba135..d468134236 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1543,6 +1543,7 @@ def test_astep_emits_failed_tool_calls_on_cancellation(self, tmp_path): async def _run_with_cancel() -> None: prompt_entered = asyncio.Event() cancel_called = asyncio.Event() + prompt_released = threading.Event() caller_loop = asyncio.get_running_loop() async def _fake_prompt(prompt_blocks, session_id): @@ -1565,13 +1566,15 @@ async def _fake_prompt(prompt_blocks, session_id): # Signal caller loop that we're holding inside the prompt # so the cancel races deterministically. caller_loop.call_soon_threadsafe(prompt_entered.set) - # Block long enough for the cancel to land. - await asyncio.sleep(60) + # Block until cancellation releases the prompt, exercising + # prompt quiescing without leaving a long-running portal task. + await asyncio.to_thread(prompt_released.wait) return None async def _fake_cancel(session_id): assert session_id == "test-session" caller_loop.call_soon_threadsafe(cancel_called.set) + prompt_released.set() agent._conn.prompt = _fake_prompt agent._conn.cancel = _fake_cancel diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index 70177f26b7..eef1326dfa 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -331,3 +331,52 @@ async def blocking_astep( assert prompts_seen == ["initial request", "intervening request"] assert second_step_statuses == [ConversationExecutionStatus.RUNNING] + + +@pytest.mark.asyncio +async def test_acp_arun_leaves_queued_message_idle_at_iteration_cap(tmp_path): + """A queued ACP message at the run cap should wait for another run.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=1, + stuck_detection=False, + ) + conversation.send_message("initial request") + + step_finished = asyncio.Event() + release_step = asyncio.Event() + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + step_finished.set() + await release_step.wait() + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + run_task = asyncio.create_task(conversation.arun()) + await asyncio.wait_for(step_finished.wait(), timeout=1.0) + await asyncio.to_thread(conversation.send_message, "intervening request") + release_step.set() + await asyncio.wait_for(run_task, timeout=1.0) + + assert prompts_seen == ["initial request"] + assert conversation.state.execution_status == ConversationExecutionStatus.IDLE From f581a0115bf2b3ea8838acc3351046976127b1b7 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 20:02:22 +0000 Subject: [PATCH 12/34] fix: address ACP review edge cases (#3376) Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 2 +- .../openhands/sdk/agent/acp_agent.py | 16 +++- .../conversation/impl/local_conversation.py | 48 ++++++++-- .../local/test_conversation_send_message.py | 89 +++++++++++++++++++ 4 files changed, 142 insertions(+), 13 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 356efc6ca2..0880ffa940 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -901,7 +901,7 @@ async def interrupt(self): # state update (PAUSED + InterruptEvent) cleanly. if self._run_task is not None and not self._run_task.done(): with suppress(Exception): - await asyncio.wait_for(self._run_task, timeout=5.0) + await asyncio.wait_for(asyncio.shield(self._run_task), timeout=5.0) # Only clear _run_task if it actually finished; if # wait_for timed out the task may still be running and # clearing prematurely would allow a second run() to diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 92cd8572d8..0e38425847 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -768,6 +768,7 @@ def model_post_init(self, __context: object) -> None: # "installed" — already in subprocess history; skip further injection _suffix_install_state: str = PrivateAttr(default="unused") _installed_suffix: str | None = PrivateAttr(default=None) + _restart_session_on_next_turn: bool = PrivateAttr(default=False) # -- Helpers ----------------------------------------------------------- @@ -1304,7 +1305,12 @@ async def _cancel() -> None: try: future = self._executor.portal.start_task_soon(_cancel) - await asyncio.wrap_future(future) + await asyncio.wait_for( + asyncio.shield(asyncio.wrap_future(future)), + timeout=_ACP_CANCEL_DRAIN_TIMEOUT, + ) + except TimeoutError: + logger.warning("Timed out sending ACP session cancel; continuing cleanup") except Exception: logger.warning("Failed to send ACP session cancel", exc_info=True) @@ -1346,8 +1352,9 @@ def _restart_session_after_drain_timeout( state.agent_state = { key: value for key, value in state.agent_state.items() - if key not in {"acp_session_id", "acp_session_cwd"} + if key not in {"acp_session_id", "acp_session_cwd", "acp_suffix_installed"} } + self._restart_session_on_next_turn = False self.init_state(state, on_event=on_event) def _request_session_cancel(self) -> None: @@ -1739,6 +1746,9 @@ async def astep( """ state = conversation.state + if self._restart_session_on_next_turn: + self._restart_session_after_drain_timeout(state, on_event) + prompt_blocks: list[Any] | None = None if prompt_message is not None: prompt_blocks = self._build_acp_prompt(prompt_message) @@ -1855,7 +1865,7 @@ async def astep( drained = await self._drain_cancelled_prompt(prompt_future) self._cancel_inflight_tool_calls() if not drained: - self._restart_session_after_drain_timeout(state, on_event) + self._restart_session_on_next_turn = True raise except TimeoutError: await self._arequest_session_cancel() diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 4944c2fda4..affc5af050 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -953,6 +953,7 @@ async def arun(self) -> None: self._state.execution_status = ConversationExecutionStatus.RUNNING iteration = 0 + last_acp_prompt_user_message_id: str | None = None try: while True: logger.debug(f"Conversation arun iteration {iteration}") @@ -1009,15 +1010,35 @@ async def arun(self) -> None: ) if isinstance(self.agent, ACPAgent): - acp_step_user_message_id = self._state.last_user_message_id - acp_step_user_message = next( - ( - event - for event in reversed(self._state.events) - if isinstance(event, MessageEvent) - and event.source == "user" - ), - None, + user_messages = [ + event + for event in self._state.events + if isinstance(event, MessageEvent) + and event.source == "user" + ] + if last_acp_prompt_user_message_id is None: + acp_step_user_message = ( + user_messages[-1] if user_messages else None + ) + else: + last_prompt_index = next( + ( + index + for index, event in enumerate(user_messages) + if event.id == last_acp_prompt_user_message_id + ), + None, + ) + acp_step_user_message = ( + user_messages[last_prompt_index + 1] + if last_prompt_index is not None + and last_prompt_index + 1 < len(user_messages) + else None + ) + acp_step_user_message_id = ( + acp_step_user_message.id + if acp_step_user_message is not None + else None ) else: await self.agent.astep( @@ -1070,6 +1091,15 @@ async def arun(self) -> None: ) with self._state: iteration += 1 + if acp_step_user_message_id is not None: + last_acp_prompt_user_message_id = acp_step_user_message_id + + if self._state.execution_status in ( + ConversationExecutionStatus.ERROR, + ConversationExecutionStatus.STUCK, + ): + break + acp_user_message_changed = ( self._state.last_user_message_id is not None and self._state.last_user_message_id != acp_step_user_message_id diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index eef1326dfa..8d66071648 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -333,6 +333,95 @@ async def blocking_astep( assert second_step_statuses == [ConversationExecutionStatus.RUNNING] +@pytest.mark.asyncio +async def test_acp_arun_processes_multiple_queued_messages_fifo(tmp_path): + """ACP arun should not skip earlier messages queued during a prompt.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=4, + stuck_detection=False, + ) + conversation.send_message("initial request") + + first_step_finished = asyncio.Event() + release_first_step = asyncio.Event() + all_queued_steps_seen = asyncio.Event() + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + if len(prompts_seen) == 1: + first_step_finished.set() + await release_first_step.wait() + elif prompts_seen[-2:] == ["queued one", "queued two"]: + all_queued_steps_seen.set() + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + run_task = asyncio.create_task(conversation.arun()) + await asyncio.wait_for(first_step_finished.wait(), timeout=1.0) + await asyncio.to_thread(conversation.send_message, "queued one") + await asyncio.to_thread(conversation.send_message, "queued two") + release_first_step.set() + await asyncio.wait_for(all_queued_steps_seen.wait(), timeout=1.0) + await asyncio.wait_for(run_task, timeout=1.0) + + assert prompts_seen == ["initial request", "queued one", "queued two"] + + +@pytest.mark.asyncio +async def test_acp_arun_stops_after_agent_sets_error(tmp_path): + """ACP timeout/error statuses should not be replaced by max-iteration errors.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("initial request") + prompts_seen: list[str] = [] + + async def failing_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + assert prompt_message is not None + prompts_seen.append("prompt") + conv.state.execution_status = ConversationExecutionStatus.ERROR + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=failing_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert prompts_seen == ["prompt"] + assert conversation.state.execution_status == ConversationExecutionStatus.ERROR + + @pytest.mark.asyncio async def test_acp_arun_leaves_queued_message_idle_at_iteration_cap(tmp_path): """A queued ACP message at the run cap should wait for another run.""" From ffeb8816e7ca31da6701787778c1f00854b68c05 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 24 May 2026 20:21:06 +0000 Subject: [PATCH 13/34] fix: close ACP rerun race windows (#3376) Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 30 ++++++----- .../openhands/sdk/agent/acp_agent.py | 10 ++-- .../conversation/impl/local_conversation.py | 7 ++- .../local/test_conversation_send_message.py | 54 +++++++++++++++++++ 4 files changed, 83 insertions(+), 18 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 0880ffa940..776e44981d 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -838,21 +838,25 @@ async def _run_and_publish(): # wrapping up. A send_message(run=True) that arrived during # the wait_for_pending() tail above had its run() rejected as # "conversation_already_running" and suppressed, setting - # _rerun_requested. Honor it only while the conversation is - # still IDLE — i.e. that message is genuinely pending. If the - # run loop was still alive it already absorbed the message - # (LocalConversation.run() keeps looping on FINISHED) and we - # are FINISHED here, so the IDLE guard avoids a redundant run. - # A deliberate run=False append, or an IDLE reached via - # another path, never sets the flag. + # _rerun_requested. Honor it while the conversation is IDLE + # (pending input) or ACP-interrupted PAUSED (the old task + # finished its interrupt before the replacement run could + # start). If the run loop was still alive it already absorbed + # the message and we are FINISHED here, so the guard avoids a + # redundant run. A deliberate run=False append, or an IDLE + # reached via another path, never sets the flag. if self._rerun_requested: - self._rerun_requested = False - if ( - await self._get_execution_status() - == ConversationExecutionStatus.IDLE - ): - with suppress(ValueError): + status = await self._get_execution_status() + should_restart = status == ConversationExecutionStatus.IDLE or ( + status == ConversationExecutionStatus.PAUSED + and isinstance(conversation.agent, ACPAgent) + ) + if should_restart: + self._rerun_requested = False + try: await self.run() + except ValueError: + self._rerun_requested = True # Create task but don't await it - runs in background self._run_task = asyncio.create_task(_run_and_publish()) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 0e38425847..d36520befc 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1870,11 +1870,13 @@ async def astep( except TimeoutError: await self._arequest_session_cancel() drained = await self._drain_cancelled_prompt(prompt_future) - self._emit_turn_timeout(time.monotonic() - t0, state, on_event) - if not drained: - self._restart_session_after_drain_timeout(state, on_event) + with state: + self._emit_turn_timeout(time.monotonic() - t0, state, on_event) + if not drained: + self._restart_session_after_drain_timeout(state, on_event) except Exception as e: - self._emit_turn_error(e, state, on_event) + with state: + self._emit_turn_error(e, state, on_event) raise finally: self._clear_turn_callbacks() diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index affc5af050..4823a731db 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -953,7 +953,9 @@ async def arun(self) -> None: self._state.execution_status = ConversationExecutionStatus.RUNNING iteration = 0 - last_acp_prompt_user_message_id: str | None = None + last_acp_prompt_user_message_id = self._state.agent_state.get( + "acp_last_prompt_user_message_id" + ) try: while True: logger.debug(f"Conversation arun iteration {iteration}") @@ -1093,6 +1095,9 @@ async def arun(self) -> None: iteration += 1 if acp_step_user_message_id is not None: last_acp_prompt_user_message_id = acp_step_user_message_id + self._state.agent_state["acp_last_prompt_user_message_id"] = ( + acp_step_user_message_id + ) if self._state.execution_status in ( ConversationExecutionStatus.ERROR, diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index 8d66071648..80cd4ac6a6 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -387,6 +387,60 @@ async def blocking_astep( assert prompts_seen == ["initial request", "queued one", "queued two"] +@pytest.mark.asyncio +async def test_acp_arun_resumes_queued_messages_fifo_after_iteration_cap(tmp_path): + """Queued ACP messages should remain FIFO across follow-up runs.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=1, + stuck_detection=False, + ) + conversation.send_message("initial request") + + first_step_finished = asyncio.Event() + release_first_step = asyncio.Event() + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + if len(prompts_seen) == 1: + first_step_finished.set() + await release_first_step.wait() + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + first_run = asyncio.create_task(conversation.arun()) + await asyncio.wait_for(first_step_finished.wait(), timeout=1.0) + await asyncio.to_thread(conversation.send_message, "queued one") + await asyncio.to_thread(conversation.send_message, "queued two") + release_first_step.set() + await asyncio.wait_for(first_run, timeout=1.0) + + assert conversation.state.execution_status == ConversationExecutionStatus.IDLE + await asyncio.wait_for(conversation.arun(), timeout=1.0) + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert prompts_seen == ["initial request", "queued one", "queued two"] + + @pytest.mark.asyncio async def test_acp_arun_stops_after_agent_sets_error(tmp_path): """ACP timeout/error statuses should not be replaced by max-iteration errors.""" From 1c390afa4f108ada147c4dae657b4c416924bb2b Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 16:38:42 -0400 Subject: [PATCH 14/34] fix(acp): reassign agent state for prompt tracking --- .../sdk/conversation/impl/local_conversation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 4823a731db..8f339ae206 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1095,9 +1095,12 @@ async def arun(self) -> None: iteration += 1 if acp_step_user_message_id is not None: last_acp_prompt_user_message_id = acp_step_user_message_id - self._state.agent_state["acp_last_prompt_user_message_id"] = ( - acp_step_user_message_id - ) + self._state.agent_state = { + **self._state.agent_state, + "acp_last_prompt_user_message_id": ( + acp_step_user_message_id + ), + } if self._state.execution_status in ( ConversationExecutionStatus.ERROR, From c3749ffa83e04f8f64ff102aa5529ef5c65e591c Mon Sep 17 00:00:00 2001 From: Graham Neubig <398875+neubig@users.noreply.github.com> Date: Sun, 24 May 2026 21:48:49 -0400 Subject: [PATCH 15/34] fix(acp): resume session after cancel drain timeout Co-authored-by: openhands --- .../openhands/sdk/agent/acp_agent.py | 40 ++++++++++--- tests/sdk/agent/test_acp_agent.py | 57 +++++++++++++++++++ 2 files changed, 88 insertions(+), 9 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index d36520befc..665faf2d13 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -769,6 +769,7 @@ def model_post_init(self, __context: object) -> None: _suffix_install_state: str = PrivateAttr(default="unused") _installed_suffix: str | None = PrivateAttr(default=None) _restart_session_on_next_turn: bool = PrivateAttr(default=False) + _resumed_existing_session: bool = PrivateAttr(default=False) # -- Helpers ----------------------------------------------------------- @@ -932,6 +933,10 @@ def init_state( # will re-inject the suffix on the first turn after upgrade, which # is benign — the suffix is additive LLM-context guidance. suffix_already_installed = bool(state.agent_state.get("acp_suffix_installed")) + # Tests that patch out _start_acp_server rely on this best-effort + # initial value. The real start path overwrites it with whether + # load_session actually succeeded. + self._resumed_existing_session = bool(state.agent_state.get("acp_session_id")) try: self._start_acp_server(state) @@ -952,17 +957,22 @@ def init_state( # in a different working directory would at best silently miss the # prior session and at worst load a different session that happens to # exist at the new cwd. - state.agent_state = { + updated_agent_state = { **state.agent_state, "acp_agent_name": self._agent_name, "acp_agent_version": self._agent_version, "acp_session_id": self._session_id, "acp_session_cwd": self._working_dir, } + if not self._resumed_existing_session: + updated_agent_state.pop("acp_suffix_installed", None) + state.agent_state = updated_agent_state if self._installed_suffix: self._suffix_install_state = ( - "installed" if suffix_already_installed else "pending_first_prompt" + "installed" + if suffix_already_installed and self._resumed_existing_session + else "pending_first_prompt" ) # Emit a placeholder system prompt so the visualizer shows a section @@ -1086,7 +1096,9 @@ def _start_acp_server(self, state: ConversationState) -> None: ) prior_session_id = None - async def _init() -> tuple[Any, Any, Any, str, str, str]: + self._resumed_existing_session = False + + async def _init() -> tuple[Any, Any, Any, str, str, str, bool]: # Spawn the subprocess directly so we can install a # filtering reader that skips non-JSON-RPC lines some # ACP servers (e.g. claude-code-acp v0.1.x) write to @@ -1169,6 +1181,7 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: # subprocess crash) propagate — there is no working connection to # fall back on, and the outer init_state handler cleans up. session_id: str | None = None + resumed_existing_session = False if prior_session_id is not None: try: await conn.load_session( @@ -1177,6 +1190,7 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: mcp_servers=[], ) session_id = prior_session_id + resumed_existing_session = True logger.info( "Resumed ACP session: %s (cwd=%s)", session_id, @@ -1215,7 +1229,15 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: logger.info("Setting ACP session mode: %s", mode_id) await conn.set_session_mode(mode_id=mode_id, session_id=session_id) - return conn, process, filtered_reader, session_id, agent_name, agent_version + return ( + conn, + process, + filtered_reader, + session_id, + agent_name, + agent_version, + resumed_existing_session, + ) result = self._executor.run_async(_init) ( @@ -1225,6 +1247,7 @@ async def _init() -> tuple[Any, Any, Any, str, str, str]: self._session_id, self._agent_name, self._agent_version, + self._resumed_existing_session, ) = result self._working_dir = working_dir @@ -1349,11 +1372,10 @@ def _restart_session_after_drain_timeout( logger.warning("Restarting ACP session after cancelled prompt drain timeout") self._cleanup() self._initialized = False - state.agent_state = { - key: value - for key, value in state.agent_state.items() - if key not in {"acp_session_id", "acp_session_cwd", "acp_suffix_installed"} - } + # A local drain timeout means the cancelled prompt did not quiesce + # within our short grace window; it does not prove the ACP server lost + # its persisted session. Preserve the session id so the restarted + # subprocess can load_session() and retain conversation memory. self._restart_session_on_next_turn = False self.init_state(state, on_event=on_event) diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index d468134236..f4dcdccb2c 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -3766,6 +3766,34 @@ def test_resume_reads_session_id_from_agent_state(self, tmp_path): conn.new_session.assert_not_awaited() assert agent._session_id == "stored-sess" + def test_cancel_drain_restart_preserves_session_id_for_resume(self, tmp_path): + """A cancelled-prompt drain timeout restarts the subprocess, but should + still load the persisted ACP session so the server keeps conversation + memory. + """ + agent = _make_agent( + agent_context=AgentContext(system_message_suffix="Team rules.") + ) + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "stored-sess", + "acp_session_cwd": str(tmp_path), + "acp_suffix_installed": True, + } + conn = self._make_conn() + + with self._transport_patches(conn): + agent._restart_session_after_drain_timeout(state, on_event=lambda _: None) + + conn.load_session.assert_awaited_once() + conn.new_session.assert_not_awaited() + assert agent._session_id == "stored-sess" + assert state.agent_state["acp_session_id"] == "stored-sess" + assert state.agent_state["acp_session_cwd"] == str(tmp_path) + assert state.agent_state["acp_suffix_installed"] is True + assert agent._suffix_install_state == "installed" + def test_load_session_failure_falls_back_to_new_session(self, tmp_path): """ACPRequestError on load_session → new_session is called.""" agent = _make_agent() @@ -3878,6 +3906,35 @@ def test_fallback_replacement_id_lands_in_agent_state(self, tmp_path): assert state.agent_state["acp_session_id"] == "replacement-sess" assert state.agent_state["acp_session_cwd"] == str(tmp_path) + def test_fallback_replacement_clears_suffix_marker(self, tmp_path): + """If load_session fails, the replacement session has not seen any + suffix yet, even if the stale session had persisted the marker. + """ + agent = _make_agent( + agent_context=AgentContext(system_message_suffix="Team rules.") + ) + state = _make_state(tmp_path) + state.agent_state = { + **state.agent_state, + "acp_session_id": "stale-sess", + "acp_session_cwd": str(tmp_path), + "acp_suffix_installed": True, + } + conn = self._make_conn( + new_session_id="replacement-sess", + load_exc=ACPRequestError(-32602, "unknown session"), + ) + + with self._transport_patches(conn): + agent.init_state(state, on_event=lambda _: None) + + conn.load_session.assert_awaited_once() + conn.new_session.assert_awaited_once() + assert state.agent_state["acp_session_id"] == "replacement-sess" + assert state.agent_state["acp_session_cwd"] == str(tmp_path) + assert state.agent_state.get("acp_suffix_installed") is not True + assert agent._suffix_install_state == "pending_first_prompt" + def test_resume_path_still_applies_session_mode_and_model(self, tmp_path): """load_session must be followed by the same set_session_model and set_session_mode calls as new_session, so a resumed session honours From 6246d1ead1f4677ededd3ef97305f59c479dd5bf Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 12:51:43 -0400 Subject: [PATCH 16/34] Address ACP async review races --- .../openhands/agent_server/event_service.py | 9 +- .../openhands/sdk/agent/acp_agent.py | 74 ++++++++++++---- .../conversation/impl/local_conversation.py | 9 +- tests/sdk/agent/test_acp_agent.py | 23 +++-- .../local/test_conversation_send_message.py | 86 +++++++++++++++++-- 5 files changed, 168 insertions(+), 33 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 776e44981d..726c88c4e1 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -846,17 +846,20 @@ async def _run_and_publish(): # redundant run. A deliberate run=False append, or an IDLE # reached via another path, never sets the flag. if self._rerun_requested: + self._rerun_requested = False status = await self._get_execution_status() should_restart = status == ConversationExecutionStatus.IDLE or ( status == ConversationExecutionStatus.PAUSED and isinstance(conversation.agent, ACPAgent) ) if should_restart: - self._rerun_requested = False try: await self.run() - except ValueError: - self._rerun_requested = True + except ValueError as e: + if str(e) == "conversation_already_running": + self._rerun_requested = True + else: + raise # Create task but don't await it - runs in background self._run_task = asyncio.create_task(_run_and_publish()) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 665faf2d13..1d9e285ee7 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -26,7 +26,7 @@ from collections.abc import Generator from concurrent.futures import Future from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, NamedTuple from acp.client.connection import ClientSideConnection from acp.exceptions import RequestError as ACPRequestError @@ -155,6 +155,13 @@ _TERMINAL_TOOL_CALL_STATUSES: frozenset[str] = frozenset({"completed", "failed"}) +class _PromptDrainResult(NamedTuple): + drained: bool + completed: bool + response: PromptResponse | None + error: BaseException | None + + # Stable identifier stamped onto the sentinel LLM so downstream code # (e.g. title_utils) can detect "this LLM cannot be called" without # relying on the model name — which we overwrite with the real model @@ -1340,28 +1347,54 @@ async def _cancel() -> None: async def _drain_cancelled_prompt( self, future: Future[PromptResponse | None] | None, - ) -> bool: + ) -> _PromptDrainResult: """Let a cancelled/timed-out portal prompt quiesce before rewiring.""" - if future is None or future.done(): - return True + if future is None: + return _PromptDrainResult( + drained=True, completed=False, response=None, error=None + ) + if future.cancelled(): + return _PromptDrainResult( + drained=True, completed=False, response=None, error=None + ) + if future.done(): + try: + return _PromptDrainResult( + drained=True, + completed=True, + response=future.result(), + error=None, + ) + except BaseException as exc: + return _PromptDrainResult( + drained=True, completed=True, response=None, error=exc + ) try: - await asyncio.wait_for( + response = await asyncio.wait_for( asyncio.shield(asyncio.wrap_future(future)), timeout=_ACP_CANCEL_DRAIN_TIMEOUT, ) - return True + return _PromptDrainResult( + drained=True, completed=True, response=response, error=None + ) except asyncio.CancelledError: if future.cancelled(): - return True + return _PromptDrainResult( + drained=True, completed=False, response=None, error=None + ) raise except TimeoutError: logger.warning( "Timed out waiting for cancelled ACP prompt to drain; " "the ACP session will be restarted before the next turn" ) - return False - except Exception: - return future.done() + return _PromptDrainResult( + drained=False, completed=False, response=None, error=None + ) + except BaseException as exc: + return _PromptDrainResult( + drained=future.done(), completed=True, response=None, error=exc + ) def _restart_session_after_drain_timeout( self, @@ -1370,6 +1403,7 @@ def _restart_session_after_drain_timeout( ) -> None: """Restart ACP after a prompt failed to quiesce post-cancel.""" logger.warning("Restarting ACP session after cancelled prompt drain timeout") + self._clear_turn_callbacks() self._cleanup() self._initialized = False # A local drain timeout means the cancelled prompt did not quiesce @@ -1884,18 +1918,26 @@ async def astep( # session/cancel, so late cancelled-turn updates cannot overwrite # the terminal synthetic failures. await self._arequest_session_cancel() - drained = await self._drain_cancelled_prompt(prompt_future) + drain_result = await self._drain_cancelled_prompt(prompt_future) self._cancel_inflight_tool_calls() - if not drained: + if not drain_result.drained or drain_result.completed: self._restart_session_on_next_turn = True raise except TimeoutError: await self._arequest_session_cancel() - drained = await self._drain_cancelled_prompt(prompt_future) + drain_result = await self._drain_cancelled_prompt(prompt_future) with state: - self._emit_turn_timeout(time.monotonic() - t0, state, on_event) - if not drained: - self._restart_session_after_drain_timeout(state, on_event) + elapsed = time.monotonic() - t0 + if drain_result.completed and drain_result.error is None: + self._finalize_successful_turn( + drain_result.response, elapsed, state, on_event + ) + elif drain_result.completed and drain_result.error is not None: + self._emit_turn_error(drain_result.error, state, on_event) + self._restart_session_on_next_turn = True + else: + self._emit_turn_timeout(elapsed, state, on_event) + self._restart_session_on_next_turn = True except Exception as e: with state: self._emit_turn_error(e, state, on_event) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 8f339ae206..6766fee930 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1020,7 +1020,7 @@ async def arun(self) -> None: ] if last_acp_prompt_user_message_id is None: acp_step_user_message = ( - user_messages[-1] if user_messages else None + user_messages[0] if user_messages else None ) else: last_prompt_index = next( @@ -1085,6 +1085,13 @@ async def arun(self) -> None: # lock free while awaiting them so incoming user messages can # be persisted immediately; event callbacks take the lock only # for each individual mutation. + if acp_step_user_message is None: + with self._state: + self._state.execution_status = ( + ConversationExecutionStatus.FINISHED + ) + break + await self.agent.astep( self, on_event=self._on_event_with_state_lock, diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index f4dcdccb2c..b314bf3c1a 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1585,9 +1585,12 @@ async def _fake_cancel(session_id): ) await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) task.cancel() - with pytest.raises(asyncio.CancelledError): - await task - await asyncio.wait_for(cancel_called.wait(), timeout=5.0) + try: + with pytest.raises(asyncio.CancelledError): + await task + await asyncio.wait_for(cancel_called.wait(), timeout=5.0) + finally: + prompt_released.set() try: agent._executor = executor @@ -1639,14 +1642,19 @@ def test_astep_cancellation_does_not_mark_suffix_installed(self, tmp_path): async def _run_with_cancel() -> None: prompt_entered = asyncio.Event() + prompt_released = threading.Event() caller_loop = asyncio.get_running_loop() async def _fake_prompt(prompt_blocks, session_id): caller_loop.call_soon_threadsafe(prompt_entered.set) - await asyncio.sleep(60) + await asyncio.to_thread(prompt_released.wait) return None + async def _fake_cancel(session_id): + prompt_released.set() + agent._conn.prompt = _fake_prompt + agent._conn.cancel = _fake_cancel agent._session_id = "test-session" task = asyncio.create_task( @@ -1654,8 +1662,11 @@ async def _fake_prompt(prompt_blocks, session_id): ) await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) task.cancel() - with pytest.raises(asyncio.CancelledError): - await task + try: + with pytest.raises(asyncio.CancelledError): + await task + finally: + prompt_released.set() try: agent._executor = executor diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index 80cd4ac6a6..a4649a18a4 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -258,20 +258,14 @@ async def send_intervening_message() -> None: await asyncio.wait_for(first_step_started.wait(), timeout=1.0) send_task = asyncio.create_task(send_intervening_message()) - completed_promptly = True try: - await asyncio.wait_for(send_done.wait(), timeout=0.2) - except TimeoutError: - completed_promptly = False + await asyncio.wait_for(send_done.wait(), timeout=5.0) finally: release_first_step.set() await asyncio.wait_for(send_task, timeout=1.0) await asyncio.wait_for(second_step_seen.wait(), timeout=1.0) await asyncio.wait_for(run_task, timeout=1.0) - assert completed_promptly, ( - "send_message() should not wait for the in-flight ACP prompt to finish" - ) assert prompts_seen == ["initial request", "intervening request"] @@ -387,6 +381,84 @@ async def blocking_astep( assert prompts_seen == ["initial request", "queued one", "queued two"] +@pytest.mark.asyncio +async def test_acp_arun_processes_initial_queued_messages_fifo(tmp_path): + """ACP arun should process pre-run queued messages from oldest to newest.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("queued one") + conversation.send_message("queued two") + + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert prompts_seen == ["queued one", "queued two"] + + +@pytest.mark.asyncio +async def test_acp_arun_does_not_reprompt_when_cursor_is_current(tmp_path): + """ACP arun should finish when there is no queued user message.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("already processed") + conversation.state.agent_state = { + "acp_last_prompt_user_message_id": conversation.state.last_user_message_id + } + + prompts_seen: list[MessageEvent | None] = [] + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, # noqa: ARG001 + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(prompt_message) + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert prompts_seen == [] + assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED + + @pytest.mark.asyncio async def test_acp_arun_resumes_queued_messages_fifo_after_iteration_cap(tmp_path): """Queued ACP messages should remain FIFO across follow-up runs.""" From 682fad933666951726a1b37ca29b4c327ee84559 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 12:55:46 -0400 Subject: [PATCH 17/34] Clarify ACP queued-message cleanup fixes --- .../openhands/agent_server/event_service.py | 5 +++-- .../openhands/sdk/conversation/impl/local_conversation.py | 5 +++-- tests/sdk/agent/test_acp_agent.py | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 726c88c4e1..5edede7703 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -845,8 +845,9 @@ async def _run_and_publish(): # the message and we are FINISHED here, so the guard avoids a # redundant run. A deliberate run=False append, or an IDLE # reached via another path, never sets the flag. - if self._rerun_requested: - self._rerun_requested = False + rerun_requested = self._rerun_requested + self._rerun_requested = False + if rerun_requested: status = await self._get_execution_status() should_restart = status == ConversationExecutionStatus.IDLE or ( status == ConversationExecutionStatus.PAUSED diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 6766fee930..ae9789c4cc 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1115,9 +1115,10 @@ async def arun(self) -> None: ): break + last_user_message_id = self._state.last_user_message_id acp_user_message_changed = ( - self._state.last_user_message_id is not None - and self._state.last_user_message_id != acp_step_user_message_id + last_user_message_id is not None + and last_user_message_id != last_acp_prompt_user_message_id ) if acp_user_message_changed and self._state.execution_status in ( ConversationExecutionStatus.FINISHED, diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index b314bf3c1a..5e3d2e36f4 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1568,7 +1568,8 @@ async def _fake_prompt(prompt_blocks, session_id): caller_loop.call_soon_threadsafe(prompt_entered.set) # Block until cancellation releases the prompt, exercising # prompt quiescing without leaving a long-running portal task. - await asyncio.to_thread(prompt_released.wait) + released = await asyncio.to_thread(prompt_released.wait, 10.0) + assert released return None async def _fake_cancel(session_id): From 741399e4588ae8b6c2ef88903a1e83bbcc11175d Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 13:02:36 -0400 Subject: [PATCH 18/34] Fix ACP resume cursor after cancellation --- openhands-sdk/openhands/sdk/agent/acp_agent.py | 2 ++ .../sdk/conversation/impl/local_conversation.py | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 1d9e285ee7..625906ae2a 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1647,6 +1647,8 @@ def _clear_turn_callbacks(self) -> None: FIFOLock held by anyone — without unwiring, a stale ``on_event`` there would race with other threads mutating ``state.events``). """ + if self._client is None: + return self._client.on_event = None self._client.on_token = None self._client.on_activity = None diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index ae9789c4cc..05eb0cd7e8 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1092,14 +1092,7 @@ async def arun(self) -> None: ) break - await self.agent.astep( - self, - on_event=self._on_event_with_state_lock, - on_token=self._on_token, - prompt_message=acp_step_user_message, - ) with self._state: - iteration += 1 if acp_step_user_message_id is not None: last_acp_prompt_user_message_id = acp_step_user_message_id self._state.agent_state = { @@ -1109,6 +1102,15 @@ async def arun(self) -> None: ), } + await self.agent.astep( + self, + on_event=self._on_event_with_state_lock, + on_token=self._on_token, + prompt_message=acp_step_user_message, + ) + with self._state: + iteration += 1 + if self._state.execution_status in ( ConversationExecutionStatus.ERROR, ConversationExecutionStatus.STUCK, From 89a68ff996e88cdbdbf2eb1a0eb5f0369f9b6db7 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 13:20:11 -0400 Subject: [PATCH 19/34] Treat ACP prompt timeout as idle timeout --- .../openhands/sdk/agent/acp_agent.py | 72 ++++++++++++++--- tests/sdk/agent/test_acp_agent.py | 81 +++++++++++++++++++ 2 files changed, 143 insertions(+), 10 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 625906ae2a..f9ee355262 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -421,6 +421,10 @@ def __init__(self) -> None: # ACPAgent.step() to keep the agent-server's idle timer alive. self.on_activity: Any = None # Callable[[], None] | None self._last_activity_signal: float = float("-inf") + # Monotonic counter bumped for every non-fork session_update. ACPAgent + # uses this to distinguish an idle prompt from a long-running tool call + # such as `git` that is still sending protocol activity. + self._activity_version: int = 0 # Telemetry state from UsageUpdate (persists across turns) self._last_cost: float = 0.0 # last cumulative cost seen self._last_cost_by_session: dict[str, float] = {} @@ -447,6 +451,21 @@ def reset(self) -> None: # Note: telemetry state (_last_cost, _context_window, _last_activity_signal, # etc.) is intentionally NOT cleared — it accumulates across turns. + @property + def activity_version(self) -> int: + """Version counter for session_update activity observed this process.""" + return self._activity_version + + def has_inflight_tool_calls(self) -> bool: + """Return whether any live ACP tool call is not terminal yet.""" + return any( + tc.get("status") not in _TERMINAL_TOOL_CALL_STATUSES + for tc in self.accumulated_tool_calls + ) + + def _note_session_activity(self) -> None: + self._activity_version += 1 + def prepare_usage_sync(self, session_id: str) -> asyncio.Event: """Prepare per-turn UsageUpdate synchronization for a session.""" event = asyncio.Event() @@ -480,6 +499,8 @@ async def session_update( self._fork_accumulated_text.append(update.content.text) return + self._note_session_activity() + if isinstance(update, AgentMessageChunk): if isinstance(update.content, TextContentBlock): text = update.content.text @@ -1508,6 +1529,45 @@ async def _do_acp_prompt(self, prompt_blocks: list[Any]) -> PromptResponse | Non ) return response + async def _await_prompt_response_with_idle_timeout( + self, + prompt_future: Future[PromptResponse | None], + ) -> PromptResponse | None: + """Await a prompt response, extending the timeout during tool activity. + + ``acp_prompt_timeout`` is an idle timeout for ACP turns, not a hard wall + clock for legitimate long-running tools. Commands such as `git` can run + longer than the default prompt timeout while still being active inside + the ACP subprocess. If the timeout fires while we have seen new + session_update activity or while any tool call remains non-terminal, + keep waiting; otherwise treat it as a wedged ACP prompt. + """ + wrapped = asyncio.wrap_future(prompt_future) + last_activity_version = self._client.activity_version + while True: + try: + return await asyncio.wait_for( + asyncio.shield(wrapped), + timeout=self.acp_prompt_timeout, + ) + except TimeoutError as exc: + activity_version = self._client.activity_version + has_inflight_tool = self._client.has_inflight_tool_calls() + if activity_version != last_activity_version or has_inflight_tool: + last_activity_version = activity_version + logger.info( + "ACP prompt exceeded %.0fs but still has %s; " + "extending timeout", + self.acp_prompt_timeout, + "an in-flight tool call" + if has_inflight_tool + else "session activity", + ) + continue + raise TimeoutError( + f"ACP prompt timed out after {self.acp_prompt_timeout:.0f}s" + ) from exc + def _finalize_successful_turn( self, response: PromptResponse | None, @@ -1850,18 +1910,10 @@ async def astep( ) ) prompt_future = current_prompt_future - response = await asyncio.wait_for( - asyncio.shield(asyncio.wrap_future(current_prompt_future)), - timeout=self.acp_prompt_timeout, + response = await self._await_prompt_response_with_idle_timeout( + current_prompt_future ) break - except TimeoutError as exc: - # ``asyncio.TimeoutError`` is ``TimeoutError`` on 3.11+. - # Re-raise as a clean TimeoutError so the outer handler - # branches the same way as the sync path. - raise TimeoutError( - f"ACP prompt timed out after {self.acp_prompt_timeout:.0f}s" - ) from exc except _RETRIABLE_CONNECTION_ERRORS as e: if attempt < max_retries: delay = _ACP_PROMPT_RETRY_DELAYS[ diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 5e3d2e36f4..9c27c7e740 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -11,6 +11,12 @@ import pytest from acp.exceptions import RequestError as ACPRequestError +from acp.schema import ( + AgentMessageChunk, + TextContentBlock, + ToolCallProgress, + ToolCallStart, +) from openhands.sdk.agent.acp_agent import ( ACPAgent, @@ -1514,6 +1520,81 @@ def _message_text(ev: MessageEvent) -> str: ) assert conversation.state.execution_status == ConversationExecutionStatus.ERROR + def test_astep_extends_timeout_while_tool_call_is_inflight(self, tmp_path): + """A long-running ACP tool call should not trip prompt timeout. + + Commands like `git` can legitimately run longer than + ``acp_prompt_timeout`` while the ACP subprocess still has an active + tool call. The timeout should guard idle/wedged prompts, not active + tools. + """ + from openhands.sdk.utils.async_executor import AsyncExecutor + + agent = _make_agent(acp_prompt_timeout=0.05) + conversation = self._make_conversation_with_message(tmp_path) + emitted: list = [] + + mock_client = _OpenHandsACPBridge() + mock_client.get_turn_usage_update = MagicMock(return_value=object()) + agent._client = mock_client + agent._conn = MagicMock() + agent._session_id = "test-session" + + async def _fake_prompt(prompt_blocks, session_id): # noqa: ARG001 + await mock_client.session_update( + session_id, + ToolCallStart( + session_update="tool_call", + tool_call_id="git-1", + title="git status", + kind="execute", + status="in_progress", + ), + ) + await asyncio.sleep(0.08) + await mock_client.session_update( + session_id, + ToolCallProgress( + session_update="tool_call_update", + tool_call_id="git-1", + status="completed", + raw_output="ok", + ), + ) + await mock_client.session_update( + session_id, + AgentMessageChunk( + session_update="agent_message_chunk", + content=TextContentBlock(type="text", text="done"), + ), + ) + return None + + agent._conn.prompt = _fake_prompt + + executor = AsyncExecutor() + try: + agent._executor = executor + asyncio.run(agent.astep(conversation, on_event=emitted.append)) + finally: + executor.close() + + assert ( + conversation.state.execution_status == ConversationExecutionStatus.FINISHED + ) + assert any( + isinstance(e, ACPToolCallEvent) + and e.tool_call_id == "git-1" + and e.status == "completed" + for e in emitted + ) + assert any( + isinstance(e, ActionEvent) + and isinstance(e.action, FinishAction) + and e.action.message == "done" + for e in emitted + ) + def test_astep_emits_failed_tool_calls_on_cancellation(self, tmp_path): """``asyncio.CancelledError`` during astep must close in-flight ``ACPToolCallEvent``s as ``failed`` and re-raise. From 81b4713bef94f2260b80f3c72c2a480bf7350da8 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 13:43:21 -0400 Subject: [PATCH 20/34] Restore hard ACP prompt timeout --- .../openhands/sdk/agent/acp_agent.py | 76 ++++--------- tests/sdk/agent/test_acp_agent.py | 107 +++++++++--------- 2 files changed, 71 insertions(+), 112 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index f9ee355262..0ee22489a2 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -421,10 +421,6 @@ def __init__(self) -> None: # ACPAgent.step() to keep the agent-server's idle timer alive. self.on_activity: Any = None # Callable[[], None] | None self._last_activity_signal: float = float("-inf") - # Monotonic counter bumped for every non-fork session_update. ACPAgent - # uses this to distinguish an idle prompt from a long-running tool call - # such as `git` that is still sending protocol activity. - self._activity_version: int = 0 # Telemetry state from UsageUpdate (persists across turns) self._last_cost: float = 0.0 # last cumulative cost seen self._last_cost_by_session: dict[str, float] = {} @@ -451,21 +447,6 @@ def reset(self) -> None: # Note: telemetry state (_last_cost, _context_window, _last_activity_signal, # etc.) is intentionally NOT cleared — it accumulates across turns. - @property - def activity_version(self) -> int: - """Version counter for session_update activity observed this process.""" - return self._activity_version - - def has_inflight_tool_calls(self) -> bool: - """Return whether any live ACP tool call is not terminal yet.""" - return any( - tc.get("status") not in _TERMINAL_TOOL_CALL_STATUSES - for tc in self.accumulated_tool_calls - ) - - def _note_session_activity(self) -> None: - self._activity_version += 1 - def prepare_usage_sync(self, session_id: str) -> asyncio.Event: """Prepare per-turn UsageUpdate synchronization for a session.""" event = asyncio.Event() @@ -499,8 +480,6 @@ async def session_update( self._fork_accumulated_text.append(update.content.text) return - self._note_session_activity() - if isinstance(update, AgentMessageChunk): if isinstance(update.content, TextContentBlock): text = update.content.text @@ -1529,44 +1508,27 @@ async def _do_acp_prompt(self, prompt_blocks: list[Any]) -> PromptResponse | Non ) return response - async def _await_prompt_response_with_idle_timeout( + async def _await_prompt_response_with_timeout( self, prompt_future: Future[PromptResponse | None], ) -> PromptResponse | None: - """Await a prompt response, extending the timeout during tool activity. - - ``acp_prompt_timeout`` is an idle timeout for ACP turns, not a hard wall - clock for legitimate long-running tools. Commands such as `git` can run - longer than the default prompt timeout while still being active inside - the ACP subprocess. If the timeout fires while we have seen new - session_update activity or while any tool call remains non-terminal, - keep waiting; otherwise treat it as a wedged ACP prompt. + """Await an ACP prompt with a hard turn deadline. + + The terminal tool reports hard command timeouts back to the agent + instead of waiting forever for active commands. ACP prompts follow the + same rule: activity heartbeats keep the server alive, but they do not + extend this prompt deadline. The timeout handler sends ``session/cancel`` + and closes any in-flight tool cards. """ - wrapped = asyncio.wrap_future(prompt_future) - last_activity_version = self._client.activity_version - while True: - try: - return await asyncio.wait_for( - asyncio.shield(wrapped), - timeout=self.acp_prompt_timeout, - ) - except TimeoutError as exc: - activity_version = self._client.activity_version - has_inflight_tool = self._client.has_inflight_tool_calls() - if activity_version != last_activity_version or has_inflight_tool: - last_activity_version = activity_version - logger.info( - "ACP prompt exceeded %.0fs but still has %s; " - "extending timeout", - self.acp_prompt_timeout, - "an in-flight tool call" - if has_inflight_tool - else "session activity", - ) - continue - raise TimeoutError( - f"ACP prompt timed out after {self.acp_prompt_timeout:.0f}s" - ) from exc + try: + return await asyncio.wait_for( + asyncio.shield(asyncio.wrap_future(prompt_future)), + timeout=self.acp_prompt_timeout, + ) + except TimeoutError as exc: + raise TimeoutError( + f"ACP prompt timed out after {self.acp_prompt_timeout:.0f}s" + ) from exc def _finalize_successful_turn( self, @@ -1910,10 +1872,12 @@ async def astep( ) ) prompt_future = current_prompt_future - response = await self._await_prompt_response_with_idle_timeout( + response = await self._await_prompt_response_with_timeout( current_prompt_future ) break + except TimeoutError: + raise except _RETRIABLE_CONNECTION_ERRORS as e: if attempt < max_retries: delay = _ACP_PROMPT_RETRY_DELAYS[ diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 9c27c7e740..140529d814 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -11,12 +11,6 @@ import pytest from acp.exceptions import RequestError as ACPRequestError -from acp.schema import ( - AgentMessageChunk, - TextContentBlock, - ToolCallProgress, - ToolCallStart, -) from openhands.sdk.agent.acp_agent import ( ACPAgent, @@ -1520,19 +1514,20 @@ def _message_text(ev: MessageEvent) -> str: ) assert conversation.state.execution_status == ConversationExecutionStatus.ERROR - def test_astep_extends_timeout_while_tool_call_is_inflight(self, tmp_path): - """A long-running ACP tool call should not trip prompt timeout. + def test_astep_times_out_while_tool_call_is_inflight(self, tmp_path): + """A hard ACP prompt timeout still fires during an active tool call. - Commands like `git` can legitimately run longer than - ``acp_prompt_timeout`` while the ACP subprocess still has an active - tool call. The timeout should guard idle/wedged prompts, not active - tools. + Mirroring OpenHands command handling, active output/heartbeats keep the + runtime alive but do not let a never-ending command suppress the hard + turn deadline. The timeout path must cancel the ACP session and close + any streamed tool cards as failed. """ - from openhands.sdk.utils.async_executor import AsyncExecutor + from concurrent.futures import Future - agent = _make_agent(acp_prompt_timeout=0.05) + agent = _make_agent(acp_prompt_timeout=0.02) conversation = self._make_conversation_with_message(tmp_path) emitted: list = [] + cancel_called = threading.Event() mock_client = _OpenHandsACPBridge() mock_client.get_turn_usage_update = MagicMock(return_value=object()) @@ -1540,58 +1535,58 @@ def test_astep_extends_timeout_while_tool_call_is_inflight(self, tmp_path): agent._conn = MagicMock() agent._session_id = "test-session" - async def _fake_prompt(prompt_blocks, session_id): # noqa: ARG001 - await mock_client.session_update( - session_id, - ToolCallStart( - session_update="tool_call", - tool_call_id="git-1", - title="git status", - kind="execute", - status="in_progress", - ), - ) - await asyncio.sleep(0.08) - await mock_client.session_update( - session_id, - ToolCallProgress( - session_update="tool_call_update", - tool_call_id="git-1", - status="completed", - raw_output="ok", - ), - ) - await mock_client.session_update( - session_id, - AgentMessageChunk( - session_update="agent_message_chunk", - content=TextContentBlock(type="text", text="done"), - ), - ) - return None + class _FakePortal: + def __init__(self) -> None: + self.prompt_future: Future = Future() - agent._conn.prompt = _fake_prompt + def start_task_soon(self, fn, *args): # noqa: ANN001, ANN202 + if args: + entry = { + "tool_call_id": "git-1", + "title": "git status", + "tool_kind": "execute", + "status": "in_progress", + "raw_input": None, + "raw_output": None, + "content": None, + } + mock_client.accumulated_tool_calls.append(entry) + mock_client._emit_tool_call_event(entry) + return self.prompt_future - executor = AsyncExecutor() - try: - agent._executor = executor + cancel_called.set() + cancel_future: Future = Future() + cancel_future.set_result(None) + return cancel_future + + mock_executor = MagicMock() + mock_executor.portal = _FakePortal() + agent._executor = mock_executor + + with patch("openhands.sdk.agent.acp_agent._ACP_CANCEL_DRAIN_TIMEOUT", 0.01): asyncio.run(agent.astep(conversation, on_event=emitted.append)) - finally: - executor.close() - assert ( - conversation.state.execution_status == ConversationExecutionStatus.FINISHED - ) + assert cancel_called.is_set() + assert conversation.state.execution_status == ConversationExecutionStatus.ERROR assert any( isinstance(e, ACPToolCallEvent) and e.tool_call_id == "git-1" - and e.status == "completed" + and e.status == "failed" + and e.is_error for e in emitted ) + + def _message_text(ev: MessageEvent) -> str: + first = ev.llm_message.content[0] + return first.text if isinstance(first, TextContent) else "" + assert any( - isinstance(e, ActionEvent) - and isinstance(e.action, FinishAction) - and e.action.message == "done" + isinstance(e, MessageEvent) + and "ACP prompt timed out after" in _message_text(e) + for e in emitted + ) + assert not any( + isinstance(e, ActionEvent) and isinstance(e.action, FinishAction) for e in emitted ) From 70e44e288016be4b20f09f8f31c15a8cb7790b9b Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 13:53:36 -0400 Subject: [PATCH 21/34] Fix ACP interrupt cursor races --- .../openhands/agent_server/event_service.py | 120 +++++++++++++++--- .../openhands/sdk/agent/acp_agent.py | 15 ++- .../conversation/impl/local_conversation.py | 50 +++++++- tests/agent_server/test_event_service.py | 69 +++++++++- tests/sdk/agent/test_acp_agent.py | 98 +++++++++++++- .../local/test_conversation_send_message.py | 49 ++++++- 6 files changed, 368 insertions(+), 33 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 5edede7703..bd2bc53c1b 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -20,7 +20,11 @@ from openhands.sdk import LLM, AgentBase, Event, Message, get_logger from openhands.sdk.agent import ACPAgent from openhands.sdk.conversation.base import BaseConversation -from openhands.sdk.conversation.impl.local_conversation import LocalConversation +from openhands.sdk.conversation.impl.local_conversation import ( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, + ACP_SUPERSEDE_INFLIGHT_PROMPT, + LocalConversation, +) from openhands.sdk.conversation.response_utils import get_agent_final_response from openhands.sdk.conversation.secret_registry import SecretValue from openhands.sdk.conversation.state import ( @@ -72,6 +76,10 @@ class EventService: # Set when a send_message(run=True) is rejected because a run is still # wrapping up; consumed by _run_and_publish to re-run the stranded message. _rerun_requested: bool = field(default=False, init=False) + # Set only for the internal ACP interrupt/restart path triggered by a new + # send_message(run=True). Explicit user pause/interrupt clears it so user + # stop intent wins over an earlier automatic restart request. + _acp_internal_rerun_requested: bool = field(default=False, init=False) _run_lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False) _callback_wrapper: AsyncCallbackWrapper | None = field(default=None, init=False) _lease: ConversationLease | None = field(default=None, init=False) @@ -423,10 +431,18 @@ async def send_message(self, message: Message, run: bool = False): loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.send_message, message) if run: - if await self._should_interrupt_running_acp_turn(): - await self.interrupt() + should_interrupt_acp, active_acp_prompt_has_latest_message = ( + await self._running_acp_turn_state() + ) + interrupted_acp = False + if should_interrupt_acp: + await self._mark_acp_inflight_prompt_superseded() + self._acp_internal_rerun_requested = True + interrupted_acp = True + await self.interrupt(internal_acp_rerun=True) try: await self.run() + self._acp_internal_rerun_requested = False except ValueError as e: # run() refused. If a run is still wrapping up (its # wait_for_pending tail), the message we just appended won't be @@ -436,17 +452,64 @@ async def send_message(self, message: Message, run: bool = False): # is what keeps a deliberate run=False append, or an IDLE reached # via another path, from triggering an unwanted run. # "inactive_service" is terminal and must not re-arm. - if str(e) == "conversation_already_running": + if ( + str(e) == "conversation_already_running" + and not active_acp_prompt_has_latest_message + ): self._rerun_requested = True + if interrupted_acp: + self._acp_internal_rerun_requested = True - async def _should_interrupt_running_acp_turn(self) -> bool: + def _running_acp_turn_state_sync(self) -> tuple[bool, bool]: + """Return whether an active ACP run should be interrupted. + + The tuple is ``(should_interrupt, active_prompt_has_latest_message)``. + If the running ACP prompt has already advanced to the newly appended + user message, interrupting it would cancel the replacement prompt and + strand that message behind the persisted cursor. + """ if not self._conversation: - return False + return (False, False) if self._run_task is None or self._run_task.done(): - return False + return (False, False) if not isinstance(self._conversation.agent, ACPAgent): - return False - return await self._get_execution_status() == ConversationExecutionStatus.RUNNING + return (False, False) + with self._conversation._state as state: + if state.execution_status != ConversationExecutionStatus.RUNNING: + return (False, False) + inflight_prompt_user_message_id = state.agent_state.get( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID + ) + last_user_message_id = state.last_user_message_id + if ( + inflight_prompt_user_message_id is None + or last_user_message_id is None + ): + return (False, False) + active_prompt_has_latest_message = ( + inflight_prompt_user_message_id == last_user_message_id + ) + return ( + not active_prompt_has_latest_message, + active_prompt_has_latest_message, + ) + + async def _running_acp_turn_state(self) -> tuple[bool, bool]: + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, self._running_acp_turn_state_sync) + + def _mark_acp_inflight_prompt_superseded_sync(self) -> None: + if not self._conversation: + return + with self._conversation._state as state: + state.agent_state = { + **state.agent_state, + ACP_SUPERSEDE_INFLIGHT_PROMPT: True, + } + + async def _mark_acp_inflight_prompt_superseded(self) -> None: + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, self._mark_acp_inflight_prompt_superseded_sync) async def subscribe_to_events(self, subscriber: Subscriber[Event]) -> UUID: subscriber_id = self._pub_sub.subscribe(subscriber) @@ -839,19 +902,30 @@ async def _run_and_publish(): # the wait_for_pending() tail above had its run() rejected as # "conversation_already_running" and suppressed, setting # _rerun_requested. Honor it while the conversation is IDLE - # (pending input) or ACP-interrupted PAUSED (the old task - # finished its interrupt before the replacement run could - # start). If the run loop was still alive it already absorbed - # the message and we are FINISHED here, so the guard avoids a - # redundant run. A deliberate run=False append, or an IDLE - # reached via another path, never sets the flag. + # (pending input) or internally ACP-interrupted PAUSED (the + # old task finished its interrupt before the replacement run + # could start). Explicit user pause/interrupt clears the + # internal ACP flag, so user stop intent wins over an older + # automatic restart request. If the run loop was still alive + # it already absorbed the message and we are FINISHED here, + # so the guard avoids a redundant run. A deliberate + # run=False append, or an IDLE reached via another path, + # never sets the flag. rerun_requested = self._rerun_requested + acp_internal_rerun_requested = ( + self._acp_internal_rerun_requested + ) self._rerun_requested = False + self._acp_internal_rerun_requested = False if rerun_requested: status = await self._get_execution_status() - should_restart = status == ConversationExecutionStatus.IDLE or ( - status == ConversationExecutionStatus.PAUSED - and isinstance(conversation.agent, ACPAgent) + should_restart = ( + status == ConversationExecutionStatus.IDLE + or ( + acp_internal_rerun_requested + and status == ConversationExecutionStatus.PAUSED + and isinstance(conversation.agent, ACPAgent) + ) ) if should_restart: try: @@ -859,6 +933,9 @@ async def _run_and_publish(): except ValueError as e: if str(e) == "conversation_already_running": self._rerun_requested = True + self._acp_internal_rerun_requested = ( + acp_internal_rerun_requested + ) else: raise @@ -891,12 +968,14 @@ async def reject_pending_actions(self, reason: str): async def pause(self): if self._conversation: + self._rerun_requested = False + self._acp_internal_rerun_requested = False loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.pause) # Publish state update after pause to ensure stats are updated await self._publish_state_update() - async def interrupt(self): + async def interrupt(self, *, internal_acp_rerun: bool = False): """Immediately cancel an in-flight async LLM call. Delegates to :meth:`LocalConversation.interrupt` which cancels the @@ -904,6 +983,9 @@ async def interrupt(self): back to :meth:`pause`. """ if self._conversation: + if not internal_acp_rerun: + self._rerun_requested = False + self._acp_internal_rerun_requested = False self._conversation.interrupt() # Wait for the run task to finish so we can publish the final # state update (PAUSED + InterruptEvent) cleanly. diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 0ee22489a2..470693c21c 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1937,8 +1937,19 @@ async def astep( # the terminal synthetic failures. await self._arequest_session_cancel() drain_result = await self._drain_cancelled_prompt(prompt_future) - self._cancel_inflight_tool_calls() - if not drain_result.drained or drain_result.completed: + with state: + elapsed = time.monotonic() - t0 + if drain_result.completed and drain_result.error is None: + self._finalize_successful_turn( + drain_result.response, elapsed, state, on_event + ) + return + if drain_result.completed and drain_result.error is not None: + self._emit_turn_error(drain_result.error, state, on_event) + self._restart_session_on_next_turn = True + return + self._cancel_inflight_tool_calls() + if not drain_result.drained: self._restart_session_on_next_turn = True raise except TimeoutError: diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 05eb0cd7e8..850a510071 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -72,6 +72,10 @@ logger = get_logger(__name__) +ACP_LAST_PROMPT_USER_MESSAGE_ID = "acp_last_prompt_user_message_id" +ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID = "acp_inflight_prompt_user_message_id" +ACP_SUPERSEDE_INFLIGHT_PROMPT = "acp_supersede_inflight_prompt" + class LocalConversation(BaseConversation): agent: AgentBase @@ -954,7 +958,7 @@ async def arun(self) -> None: iteration = 0 last_acp_prompt_user_message_id = self._state.agent_state.get( - "acp_last_prompt_user_message_id" + ACP_LAST_PROMPT_USER_MESSAGE_ID ) try: while True: @@ -1094,10 +1098,9 @@ async def arun(self) -> None: with self._state: if acp_step_user_message_id is not None: - last_acp_prompt_user_message_id = acp_step_user_message_id self._state.agent_state = { **self._state.agent_state, - "acp_last_prompt_user_message_id": ( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID: ( acp_step_user_message_id ), } @@ -1110,6 +1113,27 @@ async def arun(self) -> None: ) with self._state: iteration += 1 + agent_state = dict(self._state.agent_state) + if ( + agent_state.get(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID) + == acp_step_user_message_id + ): + agent_state.pop(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None) + agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, None) + if ( + acp_step_user_message_id is not None + and self._state.execution_status + not in ( + ConversationExecutionStatus.ERROR, + ConversationExecutionStatus.STUCK, + ConversationExecutionStatus.PAUSED, + ) + ): + last_acp_prompt_user_message_id = acp_step_user_message_id + agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( + acp_step_user_message_id + ) + self._state.agent_state = agent_state if self._state.execution_status in ( ConversationExecutionStatus.ERROR, @@ -1177,6 +1201,22 @@ async def arun(self) -> None: # PAUSED so the conversation can be resumed later. logger.info("arun() interrupted via task cancellation") with self._state: + agent_state = dict(self._state.agent_state) + inflight_prompt_user_message_id = agent_state.pop( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None + ) + superseded_by_new_message = bool( + agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, False) + ) + if ( + superseded_by_new_message + and inflight_prompt_user_message_id is not None + ): + agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( + inflight_prompt_user_message_id + ) + self._state.agent_state = agent_state + # Emit synthetic error observations for any ActionEvents # that were in-flight when the interrupt landed. Without # these the LLM history would contain tool-call requests @@ -1188,6 +1228,10 @@ async def arun(self) -> None: self._on_event(InterruptEvent()) except Exception as e: with self._state: + agent_state = dict(self._state.agent_state) + agent_state.pop(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None) + agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, None) + self._state.agent_state = agent_state self._state.execution_status = ConversationExecutionStatus.ERROR self._on_event( ConversationErrorEvent( diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index c072827e96..69678296fe 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -25,7 +25,10 @@ from openhands.sdk import LLM, Agent, Conversation, Message from openhands.sdk.agent import ACPAgent from openhands.sdk.conversation.fifo_lock import FIFOLock -from openhands.sdk.conversation.impl.local_conversation import LocalConversation +from openhands.sdk.conversation.impl.local_conversation import ( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, + LocalConversation, +) from openhands.sdk.conversation.state import ( ConversationExecutionStatus, ConversationState, @@ -899,6 +902,70 @@ async def blocking_astep( assert prompts_seen == ["initial request", "intervening"] + @pytest.mark.asyncio + async def test_send_message_with_run_true_does_not_interrupt_current_acp_prompt( + self, event_service, tmp_path + ): + """Do not cancel the ACP prompt if it already advanced to the new message.""" + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=4, + stuck_detection=False, + ) + conversation.send_message("initial request") + conversation.state.execution_status = ConversationExecutionStatus.RUNNING + event_service._conversation = conversation + event_service._publish_state_update = AsyncMock() + + release_run = asyncio.Event() + event_service._run_task = asyncio.create_task(release_run.wait()) + original_send_message = conversation.send_message + + def send_and_mark_active_prompt(message): + original_send_message(message) + conversation.state.execution_status = ConversationExecutionStatus.RUNNING + conversation.state.agent_state = { + **conversation.state.agent_state, + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID: ( + conversation.state.last_user_message_id + ), + } + + conversation.send_message = send_and_mark_active_prompt # type: ignore[method-assign] + conversation.interrupt = MagicMock() # type: ignore[method-assign] + + try: + await event_service.send_message( + Message(role="user", content=[TextContent(text="intervening")]), + run=True, + ) + finally: + release_run.set() + await event_service._run_task + event_service._run_task = None + + conversation.interrupt.assert_not_called() + assert event_service._rerun_requested is False + + @pytest.mark.asyncio + async def test_explicit_interrupt_clears_internal_acp_rerun_request( + self, event_service + ): + """A later explicit stop should win over an earlier internal ACP rerun.""" + conversation = MagicMock() + event_service._conversation = conversation + event_service._publish_state_update = AsyncMock() + event_service._rerun_requested = True + event_service._acp_internal_rerun_requested = True + + await event_service.interrupt() + + conversation.interrupt.assert_called_once() + assert event_service._rerun_requested is False + assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio async def test_send_message_with_run_true_logs_exception(self, event_service): """Test that exceptions from conversation.run() are caught and logged.""" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 140529d814..4c053afe1f 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1642,8 +1642,9 @@ async def _fake_prompt(prompt_blocks, session_id): # Signal caller loop that we're holding inside the prompt # so the cancel races deterministically. caller_loop.call_soon_threadsafe(prompt_entered.set) - # Block until cancellation releases the prompt, exercising - # prompt quiescing without leaving a long-running portal task. + # Block beyond the cancel-drain timeout so this test exercises + # the non-quiesced cancellation path that must synthesize + # failed ACP tool-call events. released = await asyncio.to_thread(prompt_released.wait, 10.0) assert released return None @@ -1651,7 +1652,6 @@ async def _fake_prompt(prompt_blocks, session_id): async def _fake_cancel(session_id): assert session_id == "test-session" caller_loop.call_soon_threadsafe(cancel_called.set) - prompt_released.set() agent._conn.prompt = _fake_prompt agent._conn.cancel = _fake_cancel @@ -1664,7 +1664,11 @@ async def _fake_cancel(session_id): task.cancel() try: with pytest.raises(asyncio.CancelledError): - await task + with patch( + "openhands.sdk.agent.acp_agent._ACP_CANCEL_DRAIN_TIMEOUT", + 0.01, + ): + await task await asyncio.wait_for(cancel_called.wait(), timeout=5.0) finally: prompt_released.set() @@ -1688,6 +1692,81 @@ async def _fake_cancel(session_id): ) assert failed_tool_events[0].is_error is True + def test_astep_finalizes_prompt_that_completes_during_cancellation(self, tmp_path): + """If a cancelled ACP prompt drains successfully, keep the completed turn. + + The ACP server may finish the prompt while ``session/cancel`` is being + delivered. In that case the remote session has accepted the assistant + turn, so OpenHands must finalize the same turn locally instead of + discarding the response and later resuming from diverged session history. + """ + from acp.schema import AgentMessageChunk, TextContentBlock + + from openhands.sdk.utils.async_executor import AsyncExecutor + + agent = _make_agent() + conversation = self._make_conversation_with_message(tmp_path) + emitted: list = [] + + mock_client = _OpenHandsACPBridge() + mock_client.get_turn_usage_update = MagicMock(return_value=object()) + agent._client = mock_client + agent._conn = MagicMock() + + executor = AsyncExecutor() + + async def _run_with_cancel() -> None: + prompt_entered = asyncio.Event() + cancel_called = asyncio.Event() + prompt_released = threading.Event() + caller_loop = asyncio.get_running_loop() + + async def _fake_prompt(prompt_blocks, session_id): # noqa: ARG001 + caller_loop.call_soon_threadsafe(prompt_entered.set) + released = await asyncio.to_thread(prompt_released.wait, 10.0) + assert released + await mock_client.session_update( + session_id, + AgentMessageChunk( + session_update="agent_message_chunk", + content=TextContentBlock(type="text", text="done"), + ), + ) + return None + + async def _fake_cancel(session_id): + assert session_id == "test-session" + caller_loop.call_soon_threadsafe(cancel_called.set) + prompt_released.set() + + agent._conn.prompt = _fake_prompt + agent._conn.cancel = _fake_cancel + agent._session_id = "test-session" + + task = asyncio.create_task( + agent.astep(conversation, on_event=emitted.append) + ) + await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) + task.cancel() + await task + await asyncio.wait_for(cancel_called.wait(), timeout=5.0) + + try: + agent._executor = executor + asyncio.run(_run_with_cancel()) + finally: + executor.close() + + assert ( + conversation.state.execution_status == ConversationExecutionStatus.FINISHED + ) + assert any( + isinstance(e, ActionEvent) + and isinstance(e.action, FinishAction) + and e.action.message == "done" + for e in emitted + ) + def test_astep_cancellation_does_not_mark_suffix_installed(self, tmp_path): """Cancellation before a turn completes must leave ``_suffix_install_state`` as ``pending_first_prompt``. @@ -1724,11 +1803,12 @@ async def _run_with_cancel() -> None: async def _fake_prompt(prompt_blocks, session_id): caller_loop.call_soon_threadsafe(prompt_entered.set) - await asyncio.to_thread(prompt_released.wait) + released = await asyncio.to_thread(prompt_released.wait, 10.0) + assert released return None async def _fake_cancel(session_id): - prompt_released.set() + assert session_id == "test-session" agent._conn.prompt = _fake_prompt agent._conn.cancel = _fake_cancel @@ -1741,7 +1821,11 @@ async def _fake_cancel(session_id): task.cancel() try: with pytest.raises(asyncio.CancelledError): - await task + with patch( + "openhands.sdk.agent.acp_agent._ACP_CANCEL_DRAIN_TIMEOUT", + 0.01, + ): + await task finally: prompt_released.set() diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index a4649a18a4..c19a7780f8 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -7,6 +7,10 @@ from openhands.sdk.agent.acp_agent import ACPAgent from openhands.sdk.agent.base import AgentBase from openhands.sdk.conversation import Conversation, LocalConversation +from openhands.sdk.conversation.impl.local_conversation import ( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, + ACP_LAST_PROMPT_USER_MESSAGE_ID, +) from openhands.sdk.conversation.state import ( ConversationExecutionStatus, ConversationState, @@ -435,7 +439,7 @@ async def test_acp_arun_does_not_reprompt_when_cursor_is_current(tmp_path): ) conversation.send_message("already processed") conversation.state.agent_state = { - "acp_last_prompt_user_message_id": conversation.state.last_user_message_id + ACP_LAST_PROMPT_USER_MESSAGE_ID: conversation.state.last_user_message_id } prompts_seen: list[MessageEvent | None] = [] @@ -459,6 +463,49 @@ async def blocking_astep( assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED +@pytest.mark.asyncio +async def test_acp_arun_does_not_commit_cursor_on_explicit_interrupt(tmp_path): + """Explicit interruption should leave the in-flight ACP prompt retryable.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("cancel me") + first_message_id = conversation.state.last_user_message_id + + prompt_started = asyncio.Event() + + async def blocking_astep( + self, # noqa: ARG001 + conv: LocalConversation, # noqa: ARG001 + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, # noqa: ARG001 + ) -> None: + prompt_started.set() + await asyncio.Event().wait() + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=blocking_astep), + ): + task = asyncio.create_task(conversation.arun()) + await asyncio.wait_for(prompt_started.wait(), timeout=1.0) + conversation.interrupt() + await asyncio.wait_for(task, timeout=1.0) + + assert conversation.state.execution_status == ConversationExecutionStatus.PAUSED + assert ( + conversation.state.agent_state.get(ACP_LAST_PROMPT_USER_MESSAGE_ID) + != first_message_id + ) + assert ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID not in conversation.state.agent_state + + @pytest.mark.asyncio async def test_acp_arun_resumes_queued_messages_fifo_after_iteration_cap(tmp_path): """Queued ACP messages should remain FIFO across follow-up runs.""" From ff3b45bc6f0943a7fe46ac71e1ca33b66dea4d41 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 13:55:43 -0400 Subject: [PATCH 22/34] Format EventService ACP rerun logic --- .../openhands/agent_server/event_service.py | 27 +++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index bd2bc53c1b..c59964c06d 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -431,9 +431,10 @@ async def send_message(self, message: Message, run: bool = False): loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.send_message, message) if run: - should_interrupt_acp, active_acp_prompt_has_latest_message = ( - await self._running_acp_turn_state() - ) + ( + should_interrupt_acp, + active_acp_prompt_has_latest_message, + ) = await self._running_acp_turn_state() interrupted_acp = False if should_interrupt_acp: await self._mark_acp_inflight_prompt_superseded() @@ -481,10 +482,7 @@ def _running_acp_turn_state_sync(self) -> tuple[bool, bool]: ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID ) last_user_message_id = state.last_user_message_id - if ( - inflight_prompt_user_message_id is None - or last_user_message_id is None - ): + if inflight_prompt_user_message_id is None or last_user_message_id is None: return (False, False) active_prompt_has_latest_message = ( inflight_prompt_user_message_id == last_user_message_id @@ -912,20 +910,15 @@ async def _run_and_publish(): # run=False append, or an IDLE reached via another path, # never sets the flag. rerun_requested = self._rerun_requested - acp_internal_rerun_requested = ( - self._acp_internal_rerun_requested - ) + acp_internal_rerun_requested = self._acp_internal_rerun_requested self._rerun_requested = False self._acp_internal_rerun_requested = False if rerun_requested: status = await self._get_execution_status() - should_restart = ( - status == ConversationExecutionStatus.IDLE - or ( - acp_internal_rerun_requested - and status == ConversationExecutionStatus.PAUSED - and isinstance(conversation.agent, ACPAgent) - ) + should_restart = status == ConversationExecutionStatus.IDLE or ( + acp_internal_rerun_requested + and status == ConversationExecutionStatus.PAUSED + and isinstance(conversation.agent, ACPAgent) ) if should_restart: try: From af1ac3f962aed96bb54ede76c635fa72e661b982 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 14:00:40 -0400 Subject: [PATCH 23/34] Use reassignment-safe ACP cursor state updates --- .../conversation/impl/local_conversation.py | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 850a510071..cfb9e57e3d 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1113,13 +1113,15 @@ async def arun(self) -> None: ) with self._state: iteration += 1 - agent_state = dict(self._state.agent_state) + updated_agent_state = dict(self._state.agent_state) if ( - agent_state.get(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID) + updated_agent_state.get(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID) == acp_step_user_message_id ): - agent_state.pop(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None) - agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, None) + updated_agent_state.pop( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None + ) + updated_agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, None) if ( acp_step_user_message_id is not None and self._state.execution_status @@ -1130,10 +1132,10 @@ async def arun(self) -> None: ) ): last_acp_prompt_user_message_id = acp_step_user_message_id - agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( + updated_agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( acp_step_user_message_id ) - self._state.agent_state = agent_state + self._state.agent_state = updated_agent_state if self._state.execution_status in ( ConversationExecutionStatus.ERROR, @@ -1201,21 +1203,21 @@ async def arun(self) -> None: # PAUSED so the conversation can be resumed later. logger.info("arun() interrupted via task cancellation") with self._state: - agent_state = dict(self._state.agent_state) - inflight_prompt_user_message_id = agent_state.pop( + updated_agent_state = dict(self._state.agent_state) + inflight_prompt_user_message_id = updated_agent_state.pop( ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None ) superseded_by_new_message = bool( - agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, False) + updated_agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, False) ) if ( superseded_by_new_message and inflight_prompt_user_message_id is not None ): - agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( + updated_agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( inflight_prompt_user_message_id ) - self._state.agent_state = agent_state + self._state.agent_state = updated_agent_state # Emit synthetic error observations for any ActionEvents # that were in-flight when the interrupt landed. Without @@ -1228,10 +1230,10 @@ async def arun(self) -> None: self._on_event(InterruptEvent()) except Exception as e: with self._state: - agent_state = dict(self._state.agent_state) - agent_state.pop(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None) - agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, None) - self._state.agent_state = agent_state + updated_agent_state = dict(self._state.agent_state) + updated_agent_state.pop(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None) + updated_agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, None) + self._state.agent_state = updated_agent_state self._state.execution_status = ConversationExecutionStatus.ERROR self._on_event( ConversationErrorEvent( From c988567337de340b5f55ce0c3443aff7327ca171 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Mon, 25 May 2026 15:12:27 -0400 Subject: [PATCH 24/34] Fix remaining ACP async race reviews --- .../openhands/agent_server/event_service.py | 35 ++++------ .../openhands/sdk/agent/acp_agent.py | 4 +- .../conversation/impl/local_conversation.py | 22 +++++++ tests/agent_server/test_event_service.py | 40 +++++++++++ tests/sdk/agent/test_acp_agent.py | 7 +- .../local/test_conversation_send_message.py | 66 +++++++++++++++++++ 6 files changed, 148 insertions(+), 26 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index c59964c06d..8dfbbbfbc9 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -432,12 +432,11 @@ async def send_message(self, message: Message, run: bool = False): await loop.run_in_executor(None, self._conversation.send_message, message) if run: ( - should_interrupt_acp, + did_mark_acp_prompt_superseded, active_acp_prompt_has_latest_message, - ) = await self._running_acp_turn_state() + ) = await self._mark_running_acp_prompt_superseded() interrupted_acp = False - if should_interrupt_acp: - await self._mark_acp_inflight_prompt_superseded() + if did_mark_acp_prompt_superseded: self._acp_internal_rerun_requested = True interrupted_acp = True await self.interrupt(internal_acp_rerun=True) @@ -461,10 +460,10 @@ async def send_message(self, message: Message, run: bool = False): if interrupted_acp: self._acp_internal_rerun_requested = True - def _running_acp_turn_state_sync(self) -> tuple[bool, bool]: - """Return whether an active ACP run should be interrupted. + def _mark_running_acp_prompt_superseded_sync(self) -> tuple[bool, bool]: + """Mark the currently running ACP prompt superseded if needed. - The tuple is ``(should_interrupt, active_prompt_has_latest_message)``. + The tuple is ``(did_mark_superseded, active_prompt_has_latest_message)``. If the running ACP prompt has already advanced to the newly appended user message, interrupting it would cancel the replacement prompt and strand that message behind the persisted cursor. @@ -487,27 +486,19 @@ def _running_acp_turn_state_sync(self) -> tuple[bool, bool]: active_prompt_has_latest_message = ( inflight_prompt_user_message_id == last_user_message_id ) - return ( - not active_prompt_has_latest_message, - active_prompt_has_latest_message, - ) - - async def _running_acp_turn_state(self) -> tuple[bool, bool]: - loop = asyncio.get_running_loop() - return await loop.run_in_executor(None, self._running_acp_turn_state_sync) - - def _mark_acp_inflight_prompt_superseded_sync(self) -> None: - if not self._conversation: - return - with self._conversation._state as state: + if active_prompt_has_latest_message: + return (False, True) state.agent_state = { **state.agent_state, ACP_SUPERSEDE_INFLIGHT_PROMPT: True, } + return (True, False) - async def _mark_acp_inflight_prompt_superseded(self) -> None: + async def _mark_running_acp_prompt_superseded(self) -> tuple[bool, bool]: loop = asyncio.get_running_loop() - await loop.run_in_executor(None, self._mark_acp_inflight_prompt_superseded_sync) + return await loop.run_in_executor( + None, self._mark_running_acp_prompt_superseded_sync + ) async def subscribe_to_events(self, subscriber: Subscriber[Event]) -> UUID: subscriber_id = self._pub_sub.subscribe(subscriber) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 470693c21c..0b11a625f1 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1943,11 +1943,11 @@ async def astep( self._finalize_successful_turn( drain_result.response, elapsed, state, on_event ) - return + raise if drain_result.completed and drain_result.error is not None: self._emit_turn_error(drain_result.error, state, on_event) self._restart_session_on_next_turn = True - return + raise self._cancel_inflight_tool_calls() if not drain_result.drained: self._restart_session_on_next_turn = True diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index cfb9e57e3d..a7340d1ad9 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1091,6 +1091,28 @@ async def arun(self) -> None: # for each individual mutation. if acp_step_user_message is None: with self._state: + last_user_message_id = self._state.last_user_message_id + acp_user_message_changed = ( + last_user_message_id is not None + and last_user_message_id != last_acp_prompt_user_message_id + ) + if acp_user_message_changed: + if iteration >= self.max_iteration_per_run: + logger.info( + "User message arrived before ACP finish; " + "leaving conversation idle for a follow-up run" + ) + self._state.execution_status = ( + ConversationExecutionStatus.IDLE + ) + break + logger.info( + "User message arrived before ACP finish; continuing run" + ) + self._state.execution_status = ( + ConversationExecutionStatus.RUNNING + ) + continue self._state.execution_status = ( ConversationExecutionStatus.FINISHED ) diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index 69678296fe..e4e88655ad 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -27,6 +27,7 @@ from openhands.sdk.conversation.fifo_lock import FIFOLock from openhands.sdk.conversation.impl.local_conversation import ( ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, + ACP_SUPERSEDE_INFLIGHT_PROMPT, LocalConversation, ) from openhands.sdk.conversation.state import ( @@ -949,6 +950,45 @@ def send_and_mark_active_prompt(message): conversation.interrupt.assert_not_called() assert event_service._rerun_requested is False + @pytest.mark.asyncio + async def test_acp_supersede_mark_rechecks_current_prompt( + self, event_service, tmp_path + ): + """Do not attach the supersede marker to a replacement ACP prompt.""" + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=4, + stuck_detection=False, + ) + conversation.send_message("initial request") + conversation.send_message("replacement request") + latest_user_message_id = conversation.state.last_user_message_id + assert latest_user_message_id is not None + conversation.state.execution_status = ConversationExecutionStatus.RUNNING + conversation.state.agent_state = { + **conversation.state.agent_state, + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID: latest_user_message_id, + } + event_service._conversation = conversation + release_run = asyncio.Event() + event_service._run_task = asyncio.create_task(release_run.wait()) + + try: + ( + marked, + active_prompt_has_latest, + ) = await event_service._mark_running_acp_prompt_superseded() + finally: + release_run.set() + await event_service._run_task + event_service._run_task = None + + assert marked is False + assert active_prompt_has_latest is True + assert ACP_SUPERSEDE_INFLIGHT_PROMPT not in conversation.state.agent_state + @pytest.mark.asyncio async def test_explicit_interrupt_clears_internal_acp_rerun_request( self, event_service diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 4c053afe1f..7369d93257 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1692,13 +1692,15 @@ async def _fake_cancel(session_id): ) assert failed_tool_events[0].is_error is True - def test_astep_finalizes_prompt_that_completes_during_cancellation(self, tmp_path): + def test_astep_finalizes_and_reraises_completed_cancelled_prompt(self, tmp_path): """If a cancelled ACP prompt drains successfully, keep the completed turn. The ACP server may finish the prompt while ``session/cancel`` is being delivered. In that case the remote session has accepted the assistant turn, so OpenHands must finalize the same turn locally instead of discarding the response and later resuming from diverged session history. + The original cancellation still propagates so explicit user stop intent + wins at the conversation layer. """ from acp.schema import AgentMessageChunk, TextContentBlock @@ -1748,7 +1750,8 @@ async def _fake_cancel(session_id): ) await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) task.cancel() - await task + with pytest.raises(asyncio.CancelledError): + await task await asyncio.wait_for(cancel_called.wait(), timeout=5.0) try: diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index c19a7780f8..f12c9dd9f9 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -463,6 +463,72 @@ async def blocking_astep( assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED +@pytest.mark.asyncio +async def test_acp_arun_rechecks_messages_before_finishing(tmp_path): + """A user message appended in the finish gap should be sent in the same run.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("already processed") + conversation.state.agent_state = { + ACP_LAST_PROMPT_USER_MESSAGE_ID: conversation.state.last_user_message_id + } + conversation._agent_ready = True + + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def record_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + + original_exit = ConversationState.__exit__ + exit_count = 0 + injected = False + + def inject_after_empty_selection( + state: ConversationState, exc_type, exc_val, exc_tb + ) -> None: + nonlocal exit_count, injected + original_exit(state, exc_type, exc_val, exc_tb) + if state is conversation.state: + exit_count += 1 + if exit_count == 2 and not injected: + injected = True + conversation.send_message("arrived in finish gap") + + with ( + patch.object(ConversationState, "__exit__", new=inject_after_empty_selection), + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=record_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert injected is True + assert prompts_seen == ["arrived in finish gap"] + assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED + assert ( + conversation.state.agent_state.get(ACP_LAST_PROMPT_USER_MESSAGE_ID) + == conversation.state.last_user_message_id + ) + + @pytest.mark.asyncio async def test_acp_arun_does_not_commit_cursor_on_explicit_interrupt(tmp_path): """Explicit interruption should leave the in-flight ACP prompt retryable.""" From 9d7d2bee678e64146880749ce822cc4259103e26 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 01:15:45 +0000 Subject: [PATCH 25/34] fix: address ACP async review feedback Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 9 ++ .../openhands/sdk/agent/acp_agent.py | 4 +- .../conversation/impl/local_conversation.py | 93 +++++++++++++----- tests/agent_server/test_event_service.py | 27 ++++++ tests/sdk/agent/test_acp_agent.py | 78 +++++++++++++++ .../local/test_conversation_send_message.py | 97 ++++++++++++++++++- 6 files changed, 281 insertions(+), 27 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 8dfbbbfbc9..82428cd766 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -80,6 +80,10 @@ class EventService: # send_message(run=True). Explicit user pause/interrupt clears it so user # stop intent wins over an earlier automatic restart request. _acp_internal_rerun_requested: bool = field(default=False, init=False) + # Incremented for explicit user pause/interrupt requests. Internal ACP + # supersede restarts compare this generation after their interrupt drains + # so a later Stop/Pause cannot be overwritten by an automatic restart. + _explicit_interrupt_generation: int = field(default=0, init=False) _run_lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False) _callback_wrapper: AsyncCallbackWrapper | None = field(default=None, init=False) _lease: ConversationLease | None = field(default=None, init=False) @@ -431,6 +435,7 @@ async def send_message(self, message: Message, run: bool = False): loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.send_message, message) if run: + explicit_interrupt_generation = self._explicit_interrupt_generation ( did_mark_acp_prompt_superseded, active_acp_prompt_has_latest_message, @@ -440,6 +445,8 @@ async def send_message(self, message: Message, run: bool = False): self._acp_internal_rerun_requested = True interrupted_acp = True await self.interrupt(internal_acp_rerun=True) + if self._explicit_interrupt_generation != explicit_interrupt_generation: + return try: await self.run() self._acp_internal_rerun_requested = False @@ -952,6 +959,7 @@ async def reject_pending_actions(self, reason: str): async def pause(self): if self._conversation: + self._explicit_interrupt_generation += 1 self._rerun_requested = False self._acp_internal_rerun_requested = False loop = asyncio.get_running_loop() @@ -968,6 +976,7 @@ async def interrupt(self, *, internal_acp_rerun: bool = False): """ if self._conversation: if not internal_acp_rerun: + self._explicit_interrupt_generation += 1 self._rerun_requested = False self._acp_internal_rerun_requested = False self._conversation.interrupt() diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 0b11a625f1..2af85eb957 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1410,8 +1410,8 @@ def _restart_session_after_drain_timeout( # within our short grace window; it does not prove the ACP server lost # its persisted session. Preserve the session id so the restarted # subprocess can load_session() and retain conversation memory. - self._restart_session_on_next_turn = False self.init_state(state, on_event=on_event) + self._restart_session_on_next_turn = False def _request_session_cancel(self) -> None: """Ask the ACP server to cancel the active session prompt.""" @@ -1945,7 +1945,7 @@ async def astep( ) raise if drain_result.completed and drain_result.error is not None: - self._emit_turn_error(drain_result.error, state, on_event) + self._cancel_inflight_tool_calls() self._restart_session_on_next_turn = True raise self._cancel_inflight_tool_calls() diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index a7340d1ad9..fd449acd1b 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -5,6 +5,7 @@ import uuid from collections.abc import Mapping from pathlib import Path +from typing import TypeGuard from openhands.sdk.agent.acp_agent import ACPAgent from openhands.sdk.agent.base import AgentBase @@ -44,7 +45,7 @@ from openhands.sdk.event.conversation_error import ConversationErrorEvent from openhands.sdk.hooks import HookConfig, HookEventProcessor, create_hook_callback from openhands.sdk.io import LocalFileStore -from openhands.sdk.llm import LLM, Message, TextContent +from openhands.sdk.llm import LLM, Message, TextContent, content_to_str from openhands.sdk.llm.llm_profile_store import LLMProfileStore from openhands.sdk.llm.llm_registry import LLMRegistry from openhands.sdk.logger import get_logger @@ -75,6 +76,20 @@ ACP_LAST_PROMPT_USER_MESSAGE_ID = "acp_last_prompt_user_message_id" ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID = "acp_inflight_prompt_user_message_id" ACP_SUPERSEDE_INFLIGHT_PROMPT = "acp_supersede_inflight_prompt" +ACP_STOP_HOOK_FEEDBACK_PREFIX = "[Stop hook feedback]" + + +def _is_acp_prompt_message(event: Event) -> TypeGuard[MessageEvent]: + if not isinstance(event, MessageEvent): + return False + if event.source == "user": + return True + if event.source != "environment" or event.llm_message.role != "user": + return False + return any( + part.startswith(ACP_STOP_HOOK_FEEDBACK_PREFIX) + for part in content_to_str(event.llm_message.content) + ) class LocalConversation(BaseConversation): @@ -827,7 +842,9 @@ def run(self) -> None: if not should_stop: logger.info("Stop hook denied agent stopping") if feedback: - prefixed = f"[Stop hook feedback] {feedback}" + prefixed = ( + f"{ACP_STOP_HOOK_FEEDBACK_PREFIX} {feedback}" + ) feedback_msg = MessageEvent( source="environment", llm_message=Message( @@ -983,7 +1000,9 @@ async def arun(self) -> None: if not should_stop: logger.info("Stop hook denied agent stopping") if feedback: - prefixed = f"[Stop hook feedback] {feedback}" + prefixed = ( + f"{ACP_STOP_HOOK_FEEDBACK_PREFIX} {feedback}" + ) feedback_msg = MessageEvent( source="environment", llm_message=Message( @@ -1016,31 +1035,41 @@ async def arun(self) -> None: ) if isinstance(self.agent, ACPAgent): - user_messages = [ + acp_prompt_messages = [ event for event in self._state.events - if isinstance(event, MessageEvent) - and event.source == "user" + if _is_acp_prompt_message(event) ] if last_acp_prompt_user_message_id is None: acp_step_user_message = ( - user_messages[0] if user_messages else None + acp_prompt_messages[0] if acp_prompt_messages else None ) else: last_prompt_index = next( ( index - for index, event in enumerate(user_messages) + for index, event in enumerate(acp_prompt_messages) if event.id == last_acp_prompt_user_message_id ), None, ) - acp_step_user_message = ( - user_messages[last_prompt_index + 1] - if last_prompt_index is not None - and last_prompt_index + 1 < len(user_messages) - else None - ) + if last_prompt_index is None: + logger.info( + "ACP prompt cursor %s no longer exists; " + "restarting from first available prompt", + last_acp_prompt_user_message_id, + ) + acp_step_user_message = ( + acp_prompt_messages[0] + if acp_prompt_messages + else None + ) + else: + acp_step_user_message = ( + acp_prompt_messages[last_prompt_index + 1] + if last_prompt_index + 1 < len(acp_prompt_messages) + else None + ) acp_step_user_message_id = ( acp_step_user_message.id if acp_step_user_message is not None @@ -1091,12 +1120,20 @@ async def arun(self) -> None: # for each individual mutation. if acp_step_user_message is None: with self._state: - last_user_message_id = self._state.last_user_message_id - acp_user_message_changed = ( - last_user_message_id is not None - and last_user_message_id != last_acp_prompt_user_message_id + acp_prompt_messages = [ + event + for event in self._state.events + if _is_acp_prompt_message(event) + ] + latest_acp_prompt_message_id = ( + acp_prompt_messages[-1].id if acp_prompt_messages else None ) - if acp_user_message_changed: + acp_prompt_message_changed = ( + latest_acp_prompt_message_id is not None + and latest_acp_prompt_message_id + != last_acp_prompt_user_message_id + ) + if acp_prompt_message_changed: if iteration >= self.max_iteration_per_run: logger.info( "User message arrived before ACP finish; " @@ -1165,12 +1202,20 @@ async def arun(self) -> None: ): break - last_user_message_id = self._state.last_user_message_id - acp_user_message_changed = ( - last_user_message_id is not None - and last_user_message_id != last_acp_prompt_user_message_id + acp_prompt_messages = [ + event + for event in self._state.events + if _is_acp_prompt_message(event) + ] + latest_acp_prompt_message_id = ( + acp_prompt_messages[-1].id if acp_prompt_messages else None + ) + acp_prompt_message_changed = ( + latest_acp_prompt_message_id is not None + and latest_acp_prompt_message_id + != last_acp_prompt_user_message_id ) - if acp_user_message_changed and self._state.execution_status in ( + if acp_prompt_message_changed and self._state.execution_status in ( ConversationExecutionStatus.FINISHED, ConversationExecutionStatus.IDLE, ): diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index e4e88655ad..f06a1d056e 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -1006,6 +1006,33 @@ async def test_explicit_interrupt_clears_internal_acp_rerun_request( assert event_service._rerun_requested is False assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio + async def test_internal_acp_rerun_does_not_override_explicit_interrupt( + self, event_service + ): + """Explicit Stop/Pause should win while an internal ACP interrupt drains.""" + conversation = MagicMock() + conversation.send_message = MagicMock() + event_service._conversation = conversation + event_service._mark_running_acp_prompt_superseded = AsyncMock( + return_value=(True, False) + ) + event_service.run = AsyncMock() + + async def interrupt_and_simulate_user_stop(internal_acp_rerun=False): + assert internal_acp_rerun is True + event_service._explicit_interrupt_generation += 1 + event_service._rerun_requested = False + event_service._acp_internal_rerun_requested = False + + event_service.interrupt = interrupt_and_simulate_user_stop + + await event_service.send_message(Message(role="user", content=[]), run=True) + + event_service.run.assert_not_awaited() + assert event_service._rerun_requested is False + assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio async def test_send_message_with_run_true_logs_exception(self, event_service): """Test that exceptions from conversation.run() are caught and logged.""" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 7369d93257..5ae72258bf 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -34,6 +34,7 @@ MessageEvent, SystemPromptEvent, ) +from openhands.sdk.event.conversation_error import ConversationErrorEvent from openhands.sdk.llm import ImageContent, Message, TextContent from openhands.sdk.skills import KeywordTrigger, Skill from openhands.sdk.tool.builtins.finish import FinishAction @@ -1770,6 +1771,69 @@ async def _fake_cancel(session_id): for e in emitted ) + def test_astep_cancelled_prompt_error_pauses_without_turn_error(self, tmp_path): + """Explicit cancellation should not emit stale prompt errors.""" + from openhands.sdk.utils.async_executor import AsyncExecutor + + agent = _make_agent() + conversation = self._make_conversation_with_message(tmp_path) + emitted: list = [] + + mock_client = _OpenHandsACPBridge() + mock_client.get_turn_usage_update = MagicMock(return_value=object()) + agent._client = mock_client + agent._conn = MagicMock() + + executor = AsyncExecutor() + + async def _run_with_cancel() -> None: + prompt_entered = asyncio.Event() + cancel_called = asyncio.Event() + prompt_released = threading.Event() + caller_loop = asyncio.get_running_loop() + + async def _fake_prompt(prompt_blocks, session_id): # noqa: ARG001 + caller_loop.call_soon_threadsafe(prompt_entered.set) + released = await asyncio.to_thread(prompt_released.wait, 10.0) + assert released + raise RuntimeError("late prompt failure") + + async def _fake_cancel(session_id): + assert session_id == "test-session" + caller_loop.call_soon_threadsafe(cancel_called.set) + prompt_released.set() + + agent._conn.prompt = _fake_prompt + agent._conn.cancel = _fake_cancel + agent._session_id = "test-session" + + task = asyncio.create_task( + agent.astep(conversation, on_event=emitted.append) + ) + await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) + task.cancel() + with pytest.raises(asyncio.CancelledError): + await task + await asyncio.wait_for(cancel_called.wait(), timeout=5.0) + + try: + agent._executor = executor + asyncio.run(_run_with_cancel()) + finally: + executor.close() + + assert not any( + isinstance(e, MessageEvent) + and e.source == "agent" + and any( + isinstance(c, TextContent) and c.text.startswith("ACP error:") + for c in e.llm_message.content + ) + for e in emitted + ) + assert not any(isinstance(e, ConversationErrorEvent) for e in emitted) + assert agent._restart_session_on_next_turn is True + def test_astep_cancellation_does_not_mark_suffix_installed(self, tmp_path): """Cancellation before a turn completes must leave ``_suffix_install_state`` as ``pending_first_prompt``. @@ -3904,6 +3968,20 @@ def test_first_launch_calls_new_session(self, tmp_path): conn.load_session.assert_not_awaited() assert agent._session_id == "fresh-sess" + def test_cancel_drain_restart_keeps_retry_flag_when_init_fails(self, tmp_path): + """A failed replacement session should leave the deferred restart armed.""" + agent = _make_agent() + state = _make_state(tmp_path) + agent._restart_session_on_next_turn = True + + with patch.object(ACPAgent, "init_state", side_effect=RuntimeError("boom")): + with pytest.raises(RuntimeError, match="boom"): + agent._restart_session_after_drain_timeout( + state, on_event=lambda _: None + ) + + assert agent._restart_session_on_next_turn is True + def test_init_state_writes_session_id_into_agent_state(self, tmp_path): """init_state lands the session id in state.agent_state so ConversationState's base_state.json persistence carries it forward. diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index f12c9dd9f9..588a2b2cc9 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -1,5 +1,5 @@ import asyncio -from unittest.mock import patch +from unittest.mock import MagicMock, patch import pytest from pydantic import SecretStr @@ -463,6 +463,101 @@ async def blocking_astep( assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED +@pytest.mark.asyncio +async def test_acp_arun_recovers_when_persisted_cursor_is_missing(tmp_path): + """A stale persisted ACP cursor should not tight-loop the run.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("surviving message") + surviving_message_id = conversation.state.last_user_message_id + conversation.state.agent_state = {ACP_LAST_PROMPT_USER_MESSAGE_ID: "missing-id"} + + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def record_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=record_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert prompts_seen == ["surviving message"] + assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED + assert ( + conversation.state.agent_state.get(ACP_LAST_PROMPT_USER_MESSAGE_ID) + == surviving_message_id + ) + + +@pytest.mark.asyncio +async def test_acp_arun_sends_stop_hook_feedback_to_acp(tmp_path): + """ACP stop-hook feedback should be queued as the next prompt.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("initial request") + + hook = MagicMock() + hook.run_stop.side_effect = [(False, "please continue"), (True, None)] + conversation._hook_processor = hook + prompts_seen: list[str] = [] + + def user_text(event: MessageEvent | None) -> str: + assert event is not None + content = event.llm_message.content[0] + assert isinstance(content, TextContent) + return content.text + + async def finish_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + prompts_seen.append(user_text(prompt_message)) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=finish_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert hook.run_stop.call_count == 2 + assert prompts_seen == [ + "initial request", + "[Stop hook feedback] please continue", + ] + assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED + + @pytest.mark.asyncio async def test_acp_arun_rechecks_messages_before_finishing(tmp_path): """A user message appended in the finish gap should be sent in the same run.""" From 1deacf2809885496295328f97b85fa2fa270d15d Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 01:55:42 +0000 Subject: [PATCH 26/34] fix: handle ACP double cancellation Co-authored-by: openhands --- .../openhands/sdk/agent/acp_agent.py | 13 +++- tests/sdk/agent/test_acp_agent.py | 61 +++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 2af85eb957..0b44459106 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1921,6 +1921,10 @@ async def astep( elapsed = time.monotonic() - t0 logger.info("ACP prompt returned in %.1fs (async)", elapsed) + # ``on_event`` may be LocalConversation._on_event_with_state_lock, + # which re-acquires this same FIFOLock. This is safe because astep() + # finalization runs on the event-loop thread and FIFOLock is + # reentrant for the owning thread. with state: self._finalize_successful_turn(response, elapsed, state, on_event) except asyncio.CancelledError: @@ -1936,7 +1940,14 @@ async def astep( # session/cancel, so late cancelled-turn updates cannot overwrite # the terminal synthetic failures. await self._arequest_session_cancel() - drain_result = await self._drain_cancelled_prompt(prompt_future) + try: + drain_result = await self._drain_cancelled_prompt(prompt_future) + except asyncio.CancelledError: + with state: + self._cancel_inflight_tool_calls() + if prompt_future is not None and not prompt_future.done(): + self._restart_session_on_next_turn = True + raise with state: elapsed = time.monotonic() - t0 if drain_result.completed and drain_result.error is None: diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 5ae72258bf..62626ae721 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1834,6 +1834,67 @@ async def _fake_cancel(session_id): assert not any(isinstance(e, ConversationErrorEvent) for e in emitted) assert agent._restart_session_on_next_turn is True + def test_astep_double_cancel_during_drain_restarts_next_turn(self, tmp_path): + """A second cancellation during drain should quarantine the live prompt.""" + from openhands.sdk.utils.async_executor import AsyncExecutor + + agent = _make_agent() + conversation = self._make_conversation_with_message(tmp_path) + + mock_client = _OpenHandsACPBridge() + mock_client.get_turn_usage_update = MagicMock(return_value=object()) + agent._client = mock_client + agent._conn = MagicMock() + + executor = AsyncExecutor() + + async def _run_with_double_cancel() -> None: + prompt_entered = asyncio.Event() + prompt_released = threading.Event() + caller_loop = asyncio.get_running_loop() + + async def _fake_prompt(prompt_blocks, session_id): # noqa: ARG001 + caller_loop.call_soon_threadsafe(prompt_entered.set) + released = await asyncio.to_thread(prompt_released.wait, 10.0) + assert released + return None + + async def _fake_cancel(session_id): + assert session_id == "test-session" + + async def _raise_during_drain(self, future): # noqa: ARG001 + assert future is not None + assert not future.done() + raise asyncio.CancelledError + + agent._conn.prompt = _fake_prompt + agent._conn.cancel = _fake_cancel + agent._session_id = "test-session" + + with patch.object( + ACPAgent, + "_drain_cancelled_prompt", + new=_raise_during_drain, + ): + task = asyncio.create_task( + agent.astep(conversation, on_event=lambda _: None) + ) + await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) + task.cancel() + try: + with pytest.raises(asyncio.CancelledError): + await task + finally: + prompt_released.set() + + try: + agent._executor = executor + asyncio.run(_run_with_double_cancel()) + finally: + executor.close() + + assert agent._restart_session_on_next_turn is True + def test_astep_cancellation_does_not_mark_suffix_installed(self, tmp_path): """Cancellation before a turn completes must leave ``_suffix_install_state`` as ``pending_first_prompt``. From 91d11a86bcee4ecb2f550fecaa3ef8b74df7e0aa Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 02:27:06 +0000 Subject: [PATCH 27/34] fix: close ACP cancellation restart races Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 10 +++- .../openhands/sdk/agent/acp_agent.py | 2 +- .../conversation/impl/local_conversation.py | 8 ++- tests/agent_server/test_event_service.py | 42 ++++++++++++++ tests/sdk/agent/test_acp_agent.py | 55 +++++++++++++++++++ .../local/test_conversation_send_message.py | 46 ++++++++++++++++ 6 files changed, 158 insertions(+), 5 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 82428cd766..372735c64e 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -477,6 +477,9 @@ def _mark_running_acp_prompt_superseded_sync(self) -> tuple[bool, bool]: """ if not self._conversation: return (False, False) + # asyncio.Task.done() reads a simple internal flag; reading it from a + # thread-pool executor is safe under CPython's GIL even though it + # technically crosses asyncio's single-threaded boundary. if self._run_task is None or self._run_task.done(): return (False, False) if not isinstance(self._conversation.agent, ACPAgent): @@ -909,12 +912,17 @@ async def _run_and_publish(): # never sets the flag. rerun_requested = self._rerun_requested acp_internal_rerun_requested = self._acp_internal_rerun_requested + rerun_generation = self._explicit_interrupt_generation self._rerun_requested = False self._acp_internal_rerun_requested = False if rerun_requested: status = await self._get_execution_status() - should_restart = status == ConversationExecutionStatus.IDLE or ( + acp_internal_rerun_still_valid = ( acp_internal_rerun_requested + and self._explicit_interrupt_generation == rerun_generation + ) + should_restart = status == ConversationExecutionStatus.IDLE or ( + acp_internal_rerun_still_valid and status == ConversationExecutionStatus.PAUSED and isinstance(conversation.agent, ACPAgent) ) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 0b44459106..1f9d0accb5 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1939,8 +1939,8 @@ async def astep( # the caller thread after the portal prompt has observed # session/cancel, so late cancelled-turn updates cannot overwrite # the terminal synthetic failures. - await self._arequest_session_cancel() try: + await self._arequest_session_cancel() drain_result = await self._drain_cancelled_prompt(prompt_future) except asyncio.CancelledError: with state: diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index fd449acd1b..892cb4acaa 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1277,10 +1277,12 @@ async def arun(self) -> None: superseded_by_new_message = bool( updated_agent_state.pop(ACP_SUPERSEDE_INFLIGHT_PROMPT, False) ) + completed_cancelled_prompt = ( + self._state.execution_status == ConversationExecutionStatus.FINISHED + ) if ( - superseded_by_new_message - and inflight_prompt_user_message_id is not None - ): + superseded_by_new_message or completed_cancelled_prompt + ) and inflight_prompt_user_message_id is not None: updated_agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( inflight_prompt_user_message_id ) diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index f06a1d056e..6b6e411c76 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -1033,6 +1033,48 @@ async def interrupt_and_simulate_user_stop(internal_acp_rerun=False): assert event_service._rerun_requested is False assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio + async def test_internal_acp_rerun_rechecks_explicit_interrupt_before_restart( + self, event_service, tmp_path + ): + """Explicit Stop/Pause should win during final restart status checks.""" + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + mock_arun = AsyncMock() + event_service._conversation = conversation + event_service._publish_state_update = AsyncMock() + event_service._rerun_requested = True + event_service._acp_internal_rerun_requested = True + + status_calls = 0 + + async def status_with_late_explicit_interrupt(): + nonlocal status_calls + status_calls += 1 + if status_calls == 1: + return ConversationExecutionStatus.IDLE + event_service._explicit_interrupt_generation += 1 + event_service._rerun_requested = False + event_service._acp_internal_rerun_requested = False + return ConversationExecutionStatus.PAUSED + + event_service._get_execution_status = status_with_late_explicit_interrupt + + with patch.object(conversation, "arun", mock_arun): + await event_service.run() + assert event_service._run_task is not None + await asyncio.wait_for(event_service._run_task, timeout=1.0) + + mock_arun.assert_awaited_once() + assert status_calls == 2 + assert event_service._rerun_requested is False + assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio async def test_send_message_with_run_true_logs_exception(self, event_service): """Test that exceptions from conversation.run() are caught and logged.""" diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 62626ae721..8a2deb9103 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -1895,6 +1895,61 @@ async def _raise_during_drain(self, future): # noqa: ARG001 assert agent._restart_session_on_next_turn is True + def test_astep_double_cancel_during_cancel_send_restarts_next_turn(self, tmp_path): + """A second cancellation during session/cancel should quarantine prompt.""" + from openhands.sdk.utils.async_executor import AsyncExecutor + + agent = _make_agent() + conversation = self._make_conversation_with_message(tmp_path) + + mock_client = _OpenHandsACPBridge() + mock_client.get_turn_usage_update = MagicMock(return_value=object()) + agent._client = mock_client + agent._conn = MagicMock() + + executor = AsyncExecutor() + + async def _run_with_cancelled_cancel_send() -> None: + prompt_entered = asyncio.Event() + prompt_released = threading.Event() + caller_loop = asyncio.get_running_loop() + + async def _fake_prompt(prompt_blocks, session_id): # noqa: ARG001 + caller_loop.call_soon_threadsafe(prompt_entered.set) + released = await asyncio.to_thread(prompt_released.wait, 10.0) + assert released + return None + + async def _raise_during_cancel_send(self): # noqa: ARG001 + raise asyncio.CancelledError + + agent._conn.prompt = _fake_prompt + agent._session_id = "test-session" + + with patch.object( + ACPAgent, + "_arequest_session_cancel", + new=_raise_during_cancel_send, + ): + task = asyncio.create_task( + agent.astep(conversation, on_event=lambda _: None) + ) + await asyncio.wait_for(prompt_entered.wait(), timeout=5.0) + task.cancel() + try: + with pytest.raises(asyncio.CancelledError): + await task + finally: + prompt_released.set() + + try: + agent._executor = executor + asyncio.run(_run_with_cancelled_cancel_send()) + finally: + executor.close() + + assert agent._restart_session_on_next_turn is True + def test_astep_cancellation_does_not_mark_suffix_installed(self, tmp_path): """Cancellation before a turn completes must leave ``_suffix_install_state`` as ``pending_first_prompt``. diff --git a/tests/sdk/conversation/local/test_conversation_send_message.py b/tests/sdk/conversation/local/test_conversation_send_message.py index 588a2b2cc9..ec124f2da8 100644 --- a/tests/sdk/conversation/local/test_conversation_send_message.py +++ b/tests/sdk/conversation/local/test_conversation_send_message.py @@ -667,6 +667,52 @@ async def blocking_astep( assert ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID not in conversation.state.agent_state +@pytest.mark.asyncio +async def test_acp_arun_commits_cursor_when_cancelled_prompt_completed(tmp_path): + """Completed ACP prompts should not be replayed after cancellation.""" + + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + conversation.send_message("complete during cancel") + first_message_id = conversation.state.last_user_message_id + prompts_seen: list[str] = [] + + async def finishing_cancelled_astep( + self, # noqa: ARG001 + conv: LocalConversation, + on_event: ConversationCallbackType, # noqa: ARG001 + on_token: ConversationTokenCallbackType | None = None, # noqa: ARG001 + prompt_message: MessageEvent | None = None, + ) -> None: + assert prompt_message is not None + content = prompt_message.llm_message.content[0] + assert isinstance(content, TextContent) + prompts_seen.append(content.text) + conv.state.execution_status = ConversationExecutionStatus.FINISHED + raise asyncio.CancelledError + + with ( + patch.object(ACPAgent, "init_state", autospec=True), + patch.object(ACPAgent, "astep", new=finishing_cancelled_astep), + ): + await asyncio.wait_for(conversation.arun(), timeout=1.0) + assert conversation.state.execution_status == ConversationExecutionStatus.PAUSED + await asyncio.wait_for(conversation.arun(), timeout=1.0) + + assert conversation.state.execution_status == ConversationExecutionStatus.FINISHED + assert ( + conversation.state.agent_state.get(ACP_LAST_PROMPT_USER_MESSAGE_ID) + == first_message_id + ) + assert ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID not in conversation.state.agent_state + assert prompts_seen == ["complete during cancel"] + + @pytest.mark.asyncio async def test_acp_arun_resumes_queued_messages_fifo_after_iteration_cap(tmp_path): """Queued ACP messages should remain FIFO across follow-up runs.""" From eb7bed3586132957e219c2583f7499fb27f59f4b Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 02:54:58 +0000 Subject: [PATCH 28/34] fix: harden ACP cleanup interrupts Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 21 +++++++--- .../openhands/sdk/agent/acp_agent.py | 41 ++++++++++++++++--- tests/agent_server/test_event_service.py | 37 +++++++++++++++++ tests/sdk/agent/test_acp_agent.py | 29 +++++++++++++ 4 files changed, 118 insertions(+), 10 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 372735c64e..675dfd3118 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -448,7 +448,11 @@ async def send_message(self, message: Message, run: bool = False): if self._explicit_interrupt_generation != explicit_interrupt_generation: return try: - await self.run() + await self.run( + acp_internal_rerun_generation=explicit_interrupt_generation + if interrupted_acp + else None + ) self._acp_internal_rerun_requested = False except ValueError as e: # run() refused. If a run is still wrapping up (its @@ -698,8 +702,6 @@ async def start(self): self._pub_sub, loop=asyncio.get_running_loop() ) - from openhands.sdk.agent import ACPAgent - # Only wire token streaming for agents that can actually emit token # callbacks. SDK LLM agents need stream=True, while ACP agents emit # AgentMessageChunk text through their bridge without exposing an LLM. @@ -821,7 +823,7 @@ def _token_streaming_callback(chunk: LLMStreamChunk | str) -> None: # Publish initial state update await self._publish_state_update() - async def run(self): + async def run(self, acp_internal_rerun_generation: int | None = None): """Run the conversation asynchronously in the background. This method starts the conversation run in a background task and returns @@ -844,6 +846,11 @@ async def run(self): == ConversationExecutionStatus.RUNNING ): raise ValueError("conversation_already_running") + if ( + acp_internal_rerun_generation is not None + and self._explicit_interrupt_generation != acp_internal_rerun_generation + ): + return # Check if there's already a running task if self._run_task is not None and not self._run_task.done(): @@ -928,7 +935,11 @@ async def _run_and_publish(): ) if should_restart: try: - await self.run() + await self.run( + acp_internal_rerun_generation=rerun_generation + if acp_internal_rerun_still_valid + else None + ) except ValueError as e: if str(e) == "conversation_already_running": self._rerun_requested = True diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 1f9d0accb5..7f3cb6508f 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1663,6 +1663,28 @@ def _emit_turn_error( ) state.execution_status = ConversationExecutionStatus.ERROR + def _handle_cancelled_cleanup_interruption( + self, + prompt_future: Future[PromptResponse | None] | None, + elapsed: float, + state: ConversationState, + on_event: ConversationCallbackType, + ) -> None: + """Repair state when cancellation interrupts cancel/drain cleanup.""" + if prompt_future is not None and prompt_future.done(): + try: + response = prompt_future.result() + except BaseException: + self._cancel_inflight_tool_calls() + self._restart_session_on_next_turn = True + else: + self._finalize_successful_turn(response, elapsed, state, on_event) + return + + self._cancel_inflight_tool_calls() + if prompt_future is not None: + self._restart_session_on_next_turn = True + def _clear_turn_callbacks(self) -> None: """Unwire per-turn bridge callbacks so trailing ``session_update`` between turns is a no-op (fires on the portal thread with no @@ -1944,9 +1966,10 @@ async def astep( drain_result = await self._drain_cancelled_prompt(prompt_future) except asyncio.CancelledError: with state: - self._cancel_inflight_tool_calls() - if prompt_future is not None and not prompt_future.done(): - self._restart_session_on_next_turn = True + elapsed = time.monotonic() - t0 + self._handle_cancelled_cleanup_interruption( + prompt_future, elapsed, state, on_event + ) raise with state: elapsed = time.monotonic() - t0 @@ -1964,8 +1987,16 @@ async def astep( self._restart_session_on_next_turn = True raise except TimeoutError: - await self._arequest_session_cancel() - drain_result = await self._drain_cancelled_prompt(prompt_future) + try: + await self._arequest_session_cancel() + drain_result = await self._drain_cancelled_prompt(prompt_future) + except asyncio.CancelledError: + with state: + elapsed = time.monotonic() - t0 + self._handle_cancelled_cleanup_interruption( + prompt_future, elapsed, state, on_event + ) + raise with state: elapsed = time.monotonic() - t0 if drain_result.completed and drain_result.error is None: diff --git a/tests/agent_server/test_event_service.py b/tests/agent_server/test_event_service.py index 6b6e411c76..407778504e 100644 --- a/tests/agent_server/test_event_service.py +++ b/tests/agent_server/test_event_service.py @@ -1033,6 +1033,43 @@ async def interrupt_and_simulate_user_stop(internal_acp_rerun=False): assert event_service._rerun_requested is False assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio + async def test_internal_acp_send_message_restart_rechecks_generation_in_run( + self, event_service, tmp_path + ): + """A late explicit Stop/Pause should prevent direct ACP restart.""" + agent = ACPAgent(acp_command=["echo", "test"]) + conversation = LocalConversation( + agent=agent, + workspace=str(tmp_path), + max_iteration_per_run=3, + stuck_detection=False, + ) + mock_arun = AsyncMock() + event_service._conversation = conversation + event_service._publish_state_update = AsyncMock() + event_service._mark_running_acp_prompt_superseded = AsyncMock( + return_value=(True, False) + ) + event_service.interrupt = AsyncMock() + + async def status_with_late_explicit_interrupt(): + event_service._explicit_interrupt_generation += 1 + event_service._rerun_requested = False + event_service._acp_internal_rerun_requested = False + return ConversationExecutionStatus.PAUSED + + event_service._get_execution_status = status_with_late_explicit_interrupt + + with patch.object(conversation, "arun", mock_arun): + await event_service.send_message(Message(role="user", content=[]), run=True) + + event_service.interrupt.assert_awaited_once_with(internal_acp_rerun=True) + mock_arun.assert_not_awaited() + assert event_service._run_task is None + assert event_service._rerun_requested is False + assert event_service._acp_internal_rerun_requested is False + @pytest.mark.asyncio async def test_internal_acp_rerun_rechecks_explicit_interrupt_before_restart( self, event_service, tmp_path diff --git a/tests/sdk/agent/test_acp_agent.py b/tests/sdk/agent/test_acp_agent.py index 8a2deb9103..6fb29a0e9e 100644 --- a/tests/sdk/agent/test_acp_agent.py +++ b/tests/sdk/agent/test_acp_agent.py @@ -6,11 +6,13 @@ import json import threading import uuid +from concurrent.futures import Future from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest from acp.exceptions import RequestError as ACPRequestError +from acp.schema import PromptResponse from openhands.sdk.agent.acp_agent import ( ACPAgent, @@ -1950,6 +1952,33 @@ async def _raise_during_cancel_send(self): # noqa: ARG001 assert agent._restart_session_on_next_turn is True + def test_cleanup_interruption_finalizes_completed_prompt(self, tmp_path): + """A completed prompt should be finalized if cleanup is cancelled.""" + agent = _make_agent() + conversation = self._make_conversation_with_message(tmp_path) + mock_client = _OpenHandsACPBridge() + mock_client.get_turn_usage_update = MagicMock(return_value=object()) + agent._client = mock_client + agent._session_id = "test-session" + + prompt_future: Future[PromptResponse | None] = Future() + prompt_future.set_result(None) + emitted = [] + + with conversation.state as state: + agent._handle_cancelled_cleanup_interruption( + prompt_future, + 0.1, + state, + emitted.append, + ) + + assert ( + conversation.state.execution_status == ConversationExecutionStatus.FINISHED + ) + assert agent._restart_session_on_next_turn is False + assert any(isinstance(event, ActionEvent) for event in emitted) + def test_astep_cancellation_does_not_mark_suffix_installed(self, tmp_path): """Cancellation before a turn completes must leave ``_suffix_install_state`` as ``pending_first_prompt``. From 80dc346e32f0fb9a2676a5d105dcf2e944314fa2 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 03:28:52 +0000 Subject: [PATCH 29/34] fix: guard ACP restart ordering Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 16 +++++--- .../openhands/sdk/agent/acp_agent.py | 39 +++++++++++++------ .../conversation/impl/local_conversation.py | 26 +++++++++++-- 3 files changed, 61 insertions(+), 20 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 675dfd3118..62dc39e605 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -84,6 +84,7 @@ class EventService: # supersede restarts compare this generation after their interrupt drains # so a later Stop/Pause cannot be overwritten by an automatic restart. _explicit_interrupt_generation: int = field(default=0, init=False) + _closing: bool = field(default=False, init=False) _run_lock: asyncio.Lock = field(default_factory=asyncio.Lock, init=False) _callback_wrapper: AsyncCallbackWrapper | None = field(default=None, init=False) _lease: ConversationLease | None = field(default=None, init=False) @@ -432,10 +433,10 @@ async def batch_get_events(self, event_ids: list[str]) -> list[Event | None]: async def send_message(self, message: Message, run: bool = False): if not self._conversation: raise ValueError("inactive_service") + explicit_interrupt_generation = self._explicit_interrupt_generation loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.send_message, message) if run: - explicit_interrupt_generation = self._explicit_interrupt_generation ( did_mark_acp_prompt_superseded, active_acp_prompt_has_latest_message, @@ -481,10 +482,7 @@ def _mark_running_acp_prompt_superseded_sync(self) -> tuple[bool, bool]: """ if not self._conversation: return (False, False) - # asyncio.Task.done() reads a simple internal flag; reading it from a - # thread-pool executor is safe under CPython's GIL even though it - # technically crosses asyncio's single-threaded boundary. - if self._run_task is None or self._run_task.done(): + if self._run_task is None: return (False, False) if not isinstance(self._conversation.agent, ACPAgent): return (False, False) @@ -836,7 +834,7 @@ async def run(self, acp_internal_rerun_generation: int | None = None): Raises: ValueError: If the service is inactive or conversation is already running. """ - if not self._conversation: + if not self._conversation or self._closing: raise ValueError("inactive_service") # Use lock to make check-and-set atomic, preventing race conditions @@ -846,6 +844,8 @@ async def run(self, acp_internal_rerun_generation: int | None = None): == ConversationExecutionStatus.RUNNING ): raise ValueError("conversation_already_running") + if self._closing: + raise ValueError("inactive_service") if ( acp_internal_rerun_generation is not None and self._explicit_interrupt_generation != acp_internal_rerun_generation @@ -1040,6 +1040,10 @@ async def set_security_analyzer( ) async def close(self): + self._closing = True + self._explicit_interrupt_generation += 1 + self._rerun_requested = False + self._acp_internal_rerun_requested = False if self._lease_task is not None: self._lease_task.cancel() with suppress(asyncio.CancelledError): diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index 7f3cb6508f..aa3a228384 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1290,12 +1290,13 @@ def _cancel_inflight_tool_calls(self) -> None: spinning forever. This method closes those cards before we wipe the in-memory accumulator on retry / turn abort. - Uses the bridge's ``on_event`` directly (the same callback driving - live emissions); call this *before* ``_reset_client_for_turn`` so - the callback is still wired up. No-op if ``on_event`` was never - set (e.g. during tests exercising the bridge in isolation). + Captures the bridge's ``on_event`` callback, then unwires the bridge + before emitting synthetic terminal events so trailing updates from the + abandoned portal prompt cannot land after these failures. No-op if + ``on_event`` was never set (e.g. tests exercising the bridge alone). """ on_event = self._client.on_event + self._clear_turn_callbacks() if on_event is None: return for tc in self._client.accumulated_tool_calls: @@ -1530,6 +1531,10 @@ async def _await_prompt_response_with_timeout( f"ACP prompt timed out after {self.acp_prompt_timeout:.0f}s" ) from exc + @staticmethod + def _prompt_response_was_cancelled(response: PromptResponse | None) -> bool: + return response is not None and response.stop_reason == "cancelled" + def _finalize_successful_turn( self, response: PromptResponse | None, @@ -1678,7 +1683,11 @@ def _handle_cancelled_cleanup_interruption( self._cancel_inflight_tool_calls() self._restart_session_on_next_turn = True else: - self._finalize_successful_turn(response, elapsed, state, on_event) + if self._prompt_response_was_cancelled(response): + self._cancel_inflight_tool_calls() + self._restart_session_on_next_turn = True + else: + self._finalize_successful_turn(response, elapsed, state, on_event) return self._cancel_inflight_tool_calls() @@ -1974,9 +1983,13 @@ async def astep( with state: elapsed = time.monotonic() - t0 if drain_result.completed and drain_result.error is None: - self._finalize_successful_turn( - drain_result.response, elapsed, state, on_event - ) + if self._prompt_response_was_cancelled(drain_result.response): + self._cancel_inflight_tool_calls() + self._restart_session_on_next_turn = True + else: + self._finalize_successful_turn( + drain_result.response, elapsed, state, on_event + ) raise if drain_result.completed and drain_result.error is not None: self._cancel_inflight_tool_calls() @@ -2000,9 +2013,13 @@ async def astep( with state: elapsed = time.monotonic() - t0 if drain_result.completed and drain_result.error is None: - self._finalize_successful_turn( - drain_result.response, elapsed, state, on_event - ) + if self._prompt_response_was_cancelled(drain_result.response): + self._emit_turn_timeout(elapsed, state, on_event) + self._restart_session_on_next_turn = True + else: + self._finalize_successful_turn( + drain_result.response, elapsed, state, on_event + ) elif drain_result.completed and drain_result.error is not None: self._emit_turn_error(drain_result.error, state, on_event) self._restart_session_on_next_turn = True diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 892cb4acaa..129b0d4bc5 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -965,6 +965,21 @@ async def arun(self) -> None: self._cancel_token = CancellationToken() with self._state: + if isinstance(self.agent, ACPAgent) and self._state.execution_status in ( + ConversationExecutionStatus.FINISHED, + ConversationExecutionStatus.IDLE, + ): + updated_agent_state = dict(self._state.agent_state) + inflight_prompt_user_message_id = updated_agent_state.get( + ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID + ) + if inflight_prompt_user_message_id is not None: + updated_agent_state[ACP_LAST_PROMPT_USER_MESSAGE_ID] = ( + inflight_prompt_user_message_id + ) + updated_agent_state.pop(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID, None) + self._state.agent_state = updated_agent_state + if self._state.execution_status in [ ConversationExecutionStatus.IDLE, ConversationExecutionStatus.PAUSED, @@ -972,11 +987,11 @@ async def arun(self) -> None: ConversationExecutionStatus.STUCK, ]: self._state.execution_status = ConversationExecutionStatus.RUNNING + last_acp_prompt_user_message_id = self._state.agent_state.get( + ACP_LAST_PROMPT_USER_MESSAGE_ID + ) iteration = 0 - last_acp_prompt_user_message_id = self._state.agent_state.get( - ACP_LAST_PROMPT_USER_MESSAGE_ID - ) try: while True: logger.debug(f"Conversation arun iteration {iteration}") @@ -1156,6 +1171,11 @@ async def arun(self) -> None: break with self._state: + if self._state.execution_status in ( + ConversationExecutionStatus.PAUSED, + ConversationExecutionStatus.STUCK, + ): + break if acp_step_user_message_id is not None: self._state.agent_state = { **self._state.agent_state, From 24ee53f6a4dbd0404141d3925e408db0faf7a86e Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 03:55:12 +0000 Subject: [PATCH 30/34] fix: address ACP review nits Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 2 +- openhands-sdk/openhands/sdk/agent/acp_agent.py | 2 +- .../openhands/sdk/conversation/impl/local_conversation.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 62dc39e605..eb57f14fab 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -729,7 +729,7 @@ def _publish_stream_delta( content=content, reasoning_content=reasoning_content, ) - with suppress(RuntimeError): + with suppress(RuntimeError): # main loop already closed during teardown asyncio.run_coroutine_threadsafe(self._pub_sub(event), self._main_loop) def _token_streaming_callback(chunk: LLMStreamChunk | str) -> None: diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index aa3a228384..d456d051de 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1381,7 +1381,7 @@ async def _drain_cancelled_prompt( except asyncio.CancelledError: if future.cancelled(): return _PromptDrainResult( - drained=True, completed=False, response=None, error=None + drained=False, completed=False, response=None, error=None ) raise except TimeoutError: diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 129b0d4bc5..943d5c9cb6 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1050,6 +1050,10 @@ async def arun(self) -> None: ) if isinstance(self.agent, ACPAgent): + # Re-scan prompt messages under the lock each time we need + # the latest tail; the list is usually tiny, and correctness + # is more important than caching stale prompt snapshots. + acp_prompt_messages = [ event for event in self._state.events From 8b59878f0fc0fb6799ed5003e38ee5ec67b5cddf Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 04:31:13 +0000 Subject: [PATCH 31/34] fix: preserve explicit ACP pauses Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 4 ++-- .../sdk/conversation/impl/local_conversation.py | 11 +++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index eb57f14fab..5124a4c0ad 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -437,6 +437,8 @@ async def send_message(self, message: Message, run: bool = False): loop = asyncio.get_running_loop() await loop.run_in_executor(None, self._conversation.send_message, message) if run: + if self._explicit_interrupt_generation != explicit_interrupt_generation: + return ( did_mark_acp_prompt_superseded, active_acp_prompt_has_latest_message, @@ -451,8 +453,6 @@ async def send_message(self, message: Message, run: bool = False): try: await self.run( acp_internal_rerun_generation=explicit_interrupt_generation - if interrupted_acp - else None ) self._acp_internal_rerun_requested = False except ValueError as e: diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 943d5c9cb6..68e92c6fed 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1174,12 +1174,14 @@ async def arun(self) -> None: ) break + acp_step_start_event_count = 0 with self._state: if self._state.execution_status in ( ConversationExecutionStatus.PAUSED, ConversationExecutionStatus.STUCK, ): break + acp_step_start_event_count = len(self._state.events) if acp_step_user_message_id is not None: self._state.agent_state = { **self._state.agent_state, @@ -1196,6 +1198,10 @@ async def arun(self) -> None: ) with self._state: iteration += 1 + pause_requested_during_acp_step = any( + isinstance(event, PauseEvent) + for event in self._state.events[acp_step_start_event_count:] + ) updated_agent_state = dict(self._state.agent_state) if ( updated_agent_state.get(ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID) @@ -1225,6 +1231,11 @@ async def arun(self) -> None: ConversationExecutionStatus.STUCK, ): break + if pause_requested_during_acp_step: + self._state.execution_status = ( + ConversationExecutionStatus.PAUSED + ) + break acp_prompt_messages = [ event From 8fb9d70af5e58eb85460c95640a149109a860c0d Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 04:58:20 +0000 Subject: [PATCH 32/34] fix: quarantine ACP cancel timeouts Co-authored-by: openhands --- openhands-sdk/openhands/sdk/agent/acp_agent.py | 12 +++++++++++- .../sdk/conversation/impl/local_conversation.py | 6 ++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/agent/acp_agent.py b/openhands-sdk/openhands/sdk/agent/acp_agent.py index d456d051de..1391d4ac1d 100644 --- a/openhands-sdk/openhands/sdk/agent/acp_agent.py +++ b/openhands-sdk/openhands/sdk/agent/acp_agent.py @@ -1341,7 +1341,10 @@ async def _cancel() -> None: timeout=_ACP_CANCEL_DRAIN_TIMEOUT, ) except TimeoutError: - logger.warning("Timed out sending ACP session cancel; continuing cleanup") + logger.warning( + "Timed out sending ACP session cancel; restarting ACP session" + ) + self._restart_session_on_next_turn = True except Exception: logger.warning("Failed to send ACP session cancel", exc_info=True) @@ -1722,6 +1725,11 @@ def step( """ state = conversation.state + if self._restart_session_on_next_turn: + # If restart initialization fails, let the conversation transition + # to ERROR rather than reusing an ambiguous ACP session. + self._restart_session_after_drain_timeout(state, on_event) + # Conversation implementations already attach per-turn AgentContext # extensions to MessageEvent.extended_content; MessageEvent.to_llm_message() # merges those extensions with the user text. @@ -1858,6 +1866,8 @@ async def astep( state = conversation.state if self._restart_session_on_next_turn: + # If restart initialization fails, let the conversation transition + # to ERROR rather than reusing an ambiguous ACP session. self._restart_session_after_drain_timeout(state, on_event) prompt_blocks: list[Any] | None = None diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 68e92c6fed..ce59e91623 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -968,7 +968,13 @@ async def arun(self) -> None: if isinstance(self.agent, ACPAgent) and self._state.execution_status in ( ConversationExecutionStatus.FINISHED, ConversationExecutionStatus.IDLE, + ConversationExecutionStatus.ERROR, ): + # If the process crashed after ACP finish events were persisted + # but before the cursor commit, startup may normalize RUNNING to + # ERROR. Promote the in-flight cursor so resume does not replay + # a prompt the ACP session already accepted. + updated_agent_state = dict(self._state.agent_state) inflight_prompt_user_message_id = updated_agent_state.get( ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID From 0418ccfc1d5c056f479de603ec0a7264e4d7408a Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 05:25:04 +0000 Subject: [PATCH 33/34] fix: guard ACP queued reruns Co-authored-by: openhands --- .../openhands/agent_server/event_service.py | 20 +++++++++++++------ .../conversation/impl/local_conversation.py | 6 ------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/openhands-agent-server/openhands/agent_server/event_service.py b/openhands-agent-server/openhands/agent_server/event_service.py index 5124a4c0ad..3d5f6beb6b 100644 --- a/openhands-agent-server/openhands/agent_server/event_service.py +++ b/openhands-agent-server/openhands/agent_server/event_service.py @@ -924,14 +924,20 @@ async def _run_and_publish(): self._acp_internal_rerun_requested = False if rerun_requested: status = await self._get_execution_status() + rerun_generation_still_valid = ( + self._explicit_interrupt_generation == rerun_generation + ) acp_internal_rerun_still_valid = ( acp_internal_rerun_requested - and self._explicit_interrupt_generation == rerun_generation + and rerun_generation_still_valid ) - should_restart = status == ConversationExecutionStatus.IDLE or ( - acp_internal_rerun_still_valid - and status == ConversationExecutionStatus.PAUSED - and isinstance(conversation.agent, ACPAgent) + should_restart = rerun_generation_still_valid and ( + status == ConversationExecutionStatus.IDLE + or ( + acp_internal_rerun_still_valid + and status == ConversationExecutionStatus.PAUSED + and isinstance(conversation.agent, ACPAgent) + ) ) if should_restart: try: @@ -1000,7 +1006,9 @@ async def interrupt(self, *, internal_acp_rerun: bool = False): self._acp_internal_rerun_requested = False self._conversation.interrupt() # Wait for the run task to finish so we can publish the final - # state update (PAUSED + InterruptEvent) cleanly. + # state update (PAUSED + InterruptEvent) cleanly. The shield keeps + # the 5s timeout from force-cancelling a cleanup that still needs + # to drain its ACP prompt/cancel handshake. if self._run_task is not None and not self._run_task.done(): with suppress(Exception): await asyncio.wait_for(asyncio.shield(self._run_task), timeout=5.0) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index ce59e91623..68e92c6fed 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -968,13 +968,7 @@ async def arun(self) -> None: if isinstance(self.agent, ACPAgent) and self._state.execution_status in ( ConversationExecutionStatus.FINISHED, ConversationExecutionStatus.IDLE, - ConversationExecutionStatus.ERROR, ): - # If the process crashed after ACP finish events were persisted - # but before the cursor commit, startup may normalize RUNNING to - # ERROR. Promote the in-flight cursor so resume does not replay - # a prompt the ACP session already accepted. - updated_agent_state = dict(self._state.agent_state) inflight_prompt_user_message_id = updated_agent_state.get( ACP_INFLIGHT_PROMPT_USER_MESSAGE_ID From da40f3689e477bf5f1bc31f9228ab4b5b966f459 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 26 May 2026 05:25:45 +0000 Subject: [PATCH 34/34] fix: track async runs before initialization Co-authored-by: openhands --- .../openhands/sdk/conversation/impl/local_conversation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index 68e92c6fed..501650ebb8 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -960,9 +960,9 @@ async def arun(self) -> None: observation is patched with a synthetic ``AgentErrorEvent`` so the LLM conversation history stays consistent. """ - self._ensure_agent_ready() self._arun_task = asyncio.current_task() self._cancel_token = CancellationToken() + self._ensure_agent_ready() with self._state: if isinstance(self.agent, ACPAgent) and self._state.execution_status in (