From c29d31bb4ab6828ad42923900dd4c65e270cf428 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Fri, 12 Jun 2026 19:09:43 +0000
Subject: [PATCH] Deduplicate the agent/TTS websocket connect path into
 diagnostics

Both aai_cli/tts/session.py and aai_cli/agent/session.py carried a
near-identical _open_ws: connect with a Bearer Authorization header and
map any failure through diagnostics.classify_error. A fix to either
error path had to be made twice. Replace both with a single
diagnostics.open_authorized_ws helper (diagnostics already owns the
shared classification and is imported by both sessions; ws.py can't
host it without an import cycle).

Strengthen the TTS connect tests to pin the per-caller details the
shared helper now carries: max_size=None on the TTS socket and the
active environment's streaming-TTS host in the rejected-handshake
suggestion.

https://claude.ai/code/session_016aDo6v4hCVhFZgqyruXgdr
---
 aai_cli/agent/session.py         | 22 +++++++---------------
 aai_cli/streaming/diagnostics.py | 24 ++++++++++++++++++++++++
 aai_cli/tts/session.py           | 29 ++++++++---------------------
 tests/test_tts_session.py        | 10 ++++++++--
 4 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/aai_cli/agent/session.py b/aai_cli/agent/session.py
index a8c54faa..88ea78b4 100644
--- a/aai_cli/agent/session.py
+++ b/aai_cli/agent/session.py
@@ -205,20 +205,6 @@ def _send_audio_loop(ws: _WebSocket, session: VoiceAgentSession, mic: _IO) -> No
             return
 
 
-def _open_ws(connect: _Connect, api_key: str) -> _WebSocket:
-    """Open the Voice Agent socket, mapping a connect failure to a clean CLIError.
-
-    A rejected handshake (HTTP 401/403) gets the shared actionable suggestion
-    (whoami / environment / network); anything else keeps the wsutil mapping.
-    """
-    try:
-        return connect(ws_url(), additional_headers={"Authorization": f"Bearer {api_key}"})
-    except Exception as exc:
-        raise diagnostics.classify_error(
-            exc, "Could not connect to the voice agent", host=environments.active().agents_host
-        ) from exc
-
-
 def _session_update_message(config: AgentRunConfig) -> str:
     """The initial session.update payload as a JSON string: persona, greeting, voice."""
     return json.dumps(
@@ -270,7 +256,13 @@ def run_session(
         ready_event=ready_event,
     )
 
-    ws = _open_ws(connect, api_key)
+    ws = diagnostics.open_authorized_ws(
+        connect,
+        api_key,
+        ws_url(),
+        message="Could not connect to the voice agent",
+        host=environments.active().agents_host,
+    )
 
     # The mic opens lazily on first iteration, inside the capture thread; a failure
     # there (no device, sounddevice missing) must reach the user instead of vanishing
diff --git a/aai_cli/streaming/diagnostics.py b/aai_cli/streaming/diagnostics.py
index a5a70975..aeed2541 100644
--- a/aai_cli/streaming/diagnostics.py
+++ b/aai_cli/streaming/diagnostics.py
@@ -10,6 +10,7 @@
 from __future__ import annotations
 
 import logging
+from collections.abc import Callable
 
 from aai_cli import ws as wsutil
 from aai_cli.errors import APIError, CLIError, NotAuthenticated
@@ -77,3 +78,26 @@ def classify_error(error: object, message: str, *, host: str) -> CLIError:
     if rejected is not None:
         return rejected
     return wsutil.auth_or_api_error(error, message)
+
+
+def open_authorized_ws[T](
+    connect: Callable[..., T],
+    api_key: str,
+    url: str,
+    *,
+    message: str,
+    host: str,
+    **connect_kwargs: object,
+) -> T:
+    """Open a Bearer-authorized WebSocket, mapping a connect failure via ``classify_error``.
+
+    The one connect path for the raw-websocket sessions (agent, speak), so a
+    rejected handshake (HTTP 401/403) carries the same actionable suggestion in
+    both and everything else keeps the shared classification.
+    """
+    try:
+        return connect(
+            url, additional_headers={"Authorization": f"Bearer {api_key}"}, **connect_kwargs
+        )
+    except Exception as exc:
+        raise classify_error(exc, message, host=host) from exc
diff --git a/aai_cli/tts/session.py b/aai_cli/tts/session.py
index 9aaf6e35..7349ba2d 100644
--- a/aai_cli/tts/session.py
+++ b/aai_cli/tts/session.py
@@ -133,26 +133,6 @@ def _default_connect(
     return connect(url, additional_headers=additional_headers, max_size=max_size)
 
 
-def _open_ws(connect: _Connect, api_key: str, url: str) -> _WebSocket:
-    """Open the TTS socket, mapping a connect failure to a clean CLIError.
-
-    A rejected handshake (HTTP 401/403) gets the shared actionable suggestion
-    (whoami / environment / network); anything else keeps the wsutil mapping.
-    """
-    try:
-        return connect(
-            url,
-            additional_headers={"Authorization": f"Bearer {api_key}"},
-            max_size=None,
-        )
-    except Exception as exc:
-        raise diagnostics.classify_error(
-            exc,
-            "Could not connect to the TTS service",
-            host=environments.active().streaming_tts_host,
-        ) from exc
-
-
 def _run_protocol(
     ws: _WebSocket, config: SpeakConfig, on_warning: Callable[[str], None] | None
 ) -> SpeakResult:
@@ -202,7 +182,14 @@ def synthesize(
     if connect is None:
         connect = _default_connect
 
-    ws = _open_ws(connect, api_key, ws_url(config.query_params()))
+    ws = diagnostics.open_authorized_ws(
+        connect,
+        api_key,
+        ws_url(config.query_params()),
+        message="Could not connect to the TTS service",
+        host=environments.active().streaming_tts_host,
+        max_size=None,  # no frame cap: a synthesis's Audio frames can exceed the 1 MiB default
+    )
     try:
         return _run_protocol(ws, config, on_warning)
     except (CLIError, KeyboardInterrupt, BrokenPipeError):
diff --git a/tests/test_tts_session.py b/tests/test_tts_session.py
index 123e4439..f8549bbb 100644
--- a/tests/test_tts_session.py
+++ b/tests/test_tts_session.py
@@ -139,6 +139,8 @@ def test_synthesize_drives_the_full_protocol():
     assert result.audio_duration_seconds == pytest.approx(6 / 2 / 24000)
     # Auth header carries the key as a Bearer token.
     assert captured["kwargs"]["additional_headers"]["Authorization"] == "Bearer k"
+    # The frame-size cap is lifted: Audio frames can exceed websockets' 1 MiB default.
+    assert captured["kwargs"]["max_size"] is None
     assert ws.closed is True
 
 
@@ -280,6 +282,8 @@ def _connect(*_a, **_k):
 
 
 def test_synthesize_maps_forbidden_connect_to_api_error():
+    _use_env("sandbox000")
+
     class Resp:
         status_code = 403
 
@@ -292,10 +296,11 @@ def _connect(*_a, **_k):
     with pytest.raises(APIError) as exc:
         session.synthesize("k", session.SpeakConfig(text="hi"), connect=_connect)
     assert "Could not connect to the TTS service" in exc.value.message
-    # The rejected handshake carries the actionable next steps.
+    # The rejected handshake carries the actionable next steps, env host included.
     assert exc.value.suggestion is not None
     assert "assembly whoami" in exc.value.suggestion
     assert "--sandbox" in exc.value.suggestion
+    assert "streaming-tts.sandbox000" in exc.value.suggestion
 
 
 def test_synthesize_handshake_401_is_not_authenticated_with_suggestion():
@@ -340,7 +345,8 @@ def test_synthesize_maps_unexpected_protocol_error_to_api_error():
 def test_synthesize_without_connect_uses_real_client_and_fails_cleanly():
     # No `connect` provided: synthesize imports websockets' real sync client and
     # attempts a connection. pytest-socket blocks socket creation, so this must
-    # surface as a clean CLIError (mapped in _open_ws), never a raw socket error.
+    # surface as a clean CLIError (mapped in diagnostics.open_authorized_ws),
+    # never a raw socket error.
     _use_env("sandbox000")
     with pytest.raises(CLIError):
         session.synthesize("k", session.SpeakConfig(text="hi"))