diff --git a/miles/rollout/session/sessions.py b/miles/rollout/session/sessions.py index 0c285bf8bb..b61aaaa2a6 100644 --- a/miles/rollout/session/sessions.py +++ b/miles/rollout/session/sessions.py @@ -265,6 +265,14 @@ async def chat_completions(request: Request, session_id: str): session.append_record(record) # --- lock released here --- + # Signal abort to the agent via a header so it can stop immediately, + # independent of how the caller's LLM client normalizes finish_reason. + # (litellm remaps "abort" → "stop" or "tool_calls" in some versions, + # masking the signal; reading raw response headers is the only + # reliable cross-version detection path.) + if choice.get("finish_reason") == "abort": + result["headers"]["x-sglang-aborted"] = "1" + return backend.build_proxy_response(result) finally: _inflight_chat["count"] -= 1