From 927ac05a9b54709f29ebbb7b67a0565719b371ca Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 04:52:15 +0000
Subject: [PATCH 01/11] Fix QA findings: non-interactive auth, input
 validation, error hygiene

Auth/login:
- Auto-login now only runs in interactive sessions; headless/CI runs get
  a clean not_authenticated error (exit 4) instead of a 120s browser wait
- Explicit empty --api-key is a usage error instead of a browser flow
- Loopback callback server binds before the browser opens
- Browser flow prints a waiting hint with the --api-key alternative
- Login timeout is typed not_authenticated; whoami exits 4 on rejected key

Transcribe/transcripts/llm/account:
- An explicit source plus --sample is now a usage error
- A directory passed as the audio source fails fast before credentials
- list-transcripts requests no longer carry a bogus model_config param
  (assemblyai 0.64.4 + pydantic 2.13.4 serialization workaround)
- validate_key network errors compact to one line (no httpx Request repr)
- transcribe --show-code honors -o (srt/utterances/json/id/status)
- New client-side validation: --limit >= 1, --audio-start >= 0,
  --language-code vs --language-detection conflict, --speakers-expected
  requires speaker labels, unknown PII policies list valid values
- yt-dlp errors no longer print twice (quiet logger)
- llm --follow with empty piped stdin is a usage error, not silent exit 0
- usage validates --start/--end dates before session resolution

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 aai_cli/auth/flow.py            |  20 ++--
 aai_cli/auth/loopback.py        |  64 +++++++++----
 aai_cli/client.py               |  64 ++++++++++---
 aai_cli/code_gen/transcribe.py  |  32 ++++++-
 aai_cli/commands/account.py     |   4 +-
 aai_cli/commands/llm.py         |  23 +++--
 aai_cli/commands/login.py       |  24 ++++-
 aai_cli/commands/transcribe.py  |  45 ++++++++-
 aai_cli/commands/transcripts.py |   2 +-
 aai_cli/context.py              |  17 +++-
 aai_cli/youtube.py              |   9 ++
 tests/test_account_command.py   |  16 ++++
 tests/test_auth_flow.py         | 156 +++++++++++++++++++++++---------
 tests/test_auth_loopback.py     |  28 ++++++
 tests/test_client.py            |  55 +++++++++++
 tests/test_code_gen.py          |  63 +++++++++++++
 tests/test_context.py           | 125 ++++++++++++++++++++++++-
 tests/test_keys.py              |   1 +
 tests/test_llm_command.py       |  39 ++++++++
 tests/test_login.py             |  66 +++++++++++++-
 tests/test_sessions_command.py  |   1 +
 tests/test_source_validation.py |  51 +++++++++++
 tests/test_transcribe.py        | 115 +++++++++++++++++++++++
 tests/test_transcripts.py       |  12 +++
 tests/test_youtube.py           |  41 +++++++++
 25 files changed, 970 insertions(+), 103 deletions(-)

diff --git a/aai_cli/auth/flow.py b/aai_cli/auth/flow.py
index c8a086fe..62243eda 100644
--- a/aai_cli/auth/flow.py
+++ b/aai_cli/auth/flow.py
@@ -8,7 +8,7 @@
 
 from aai_cli import output
 from aai_cli.auth import ams, discovery, endpoints, loopback
-from aai_cli.errors import APIError
+from aai_cli.errors import APIError, NotAuthenticated
 
 
 @dataclass
@@ -97,8 +97,8 @@ def _open_browser(url: str) -> None:
         )
 
 
-def _capture() -> loopback.CallbackResult:
-    return loopback.capture_callback()
+def _start_capture() -> loopback.CallbackCapture:
+    return loopback.start_capture()
 
 
 def _reusable_cli_key(token: _Token) -> str | None:
@@ -137,13 +137,21 @@ def find_or_create_cli_key(account_id: int, session_jwt: str) -> str:
 
 def run_login_flow() -> LoginResult:
     """Drive the full browser + AMS login and return a LoginResult."""
+    # Bind the loopback callback server *before* opening the browser: if the port is
+    # taken, fail cleanly now instead of stranding the user mid-OAuth in a flow that
+    # can never call back.
+    capture = _start_capture()
     _open_browser(discovery.build_start_url())
-    result = _capture()
+    output.error_console.print(
+        "[aai.muted]Waiting up to 2 minutes for you to finish signing in…[/aai.muted]\n"
+        "[aai.muted]No browser here? Run 'aai login --api-key <KEY>' instead.[/aai.muted]"
+    )
+    result = capture.wait()
 
     if result.error == "timeout":
-        raise APIError(
+        raise NotAuthenticated(
             "Login timed out waiting for the browser.",
-            suggestion="Run 'aai login' again.",
+            suggestion="Run 'aai login' again, or use 'aai login --api-key <KEY>'.",
         )
     if result.token_type != "discovery_oauth" or not result.token:  # noqa: S105
         raise APIError(
diff --git a/aai_cli/auth/loopback.py b/aai_cli/auth/loopback.py
index b27b77e7..b00b8cac 100644
--- a/aai_cli/auth/loopback.py
+++ b/aai_cli/auth/loopback.py
@@ -30,15 +30,47 @@ class CallbackResult:
     error: str | None = None
 
 
-def capture_callback(
-    timeout: float = 120.0,  # pragma: no mutate (default window; tests pass explicit timeouts)
-) -> CallbackResult:
-    """Bind the fixed loopback port, capture one OAuth callback, return its token.
+@dataclass
+class CallbackCapture:
+    """A loopback callback server that is already bound and serving.
 
-    Only a callback to the registered path that carries a `token` is accepted; any
-    other request (a different path, or no token) gets a 4xx and the server keeps
-    waiting, so a stray request can't end the capture early. Returns a
-    CallbackResult; `error="timeout"` if no matching callback arrives in time.
+    Splitting the bind (`start_capture`) from the blocking wait lets the login flow
+    fail on a taken port *before* it sends the user's browser into the OAuth flow.
+    `wait()` blocks for one matching callback and always shuts the server down.
+    """
+
+    result: CallbackResult
+    done: threading.Event
+    server: HTTPServer
+    thread: threading.Thread
+
+    def wait(
+        self,
+        timeout: float = 120.0,  # pragma: no mutate (default window; tests pass explicit timeouts)
+    ) -> CallbackResult:
+        """Block for one OAuth callback (or the timeout), then shut the server down.
+
+        Returns the CallbackResult; `error="timeout"` if no matching callback
+        arrived in time.
+        """
+        try:
+            if not self.done.wait(timeout):
+                self.result.error = "timeout"
+        finally:
+            self.server.shutdown()  # stop serve_forever()
+            self.thread.join(timeout=5)  # pragma: no mutate (cleanup grace period only)
+            self.server.server_close()  # close the listening socket (shutdown() leaves it open)
+        return self.result
+
+
+def start_capture() -> CallbackCapture:
+    """Bind the fixed loopback port and start serving; the returned capture's
+    ``wait()`` collects one OAuth callback.
+
+    Raises a clean APIError when the bind fails (port taken) so callers can abort
+    before opening the browser. Only a callback to the registered path that carries
+    a `token` is accepted; any other request (a different path, or no token) gets a
+    4xx and the server keeps waiting, so a stray request can't end the capture early.
     """
     result = CallbackResult()
     done = threading.Event()
@@ -81,11 +113,11 @@ def log_message(self, format: str, *args: object) -> None:  # silence stderr log
         ) from exc
     thread = threading.Thread(target=server.serve_forever, daemon=True)
     thread.start()
-    try:
-        if not done.wait(timeout):
-            result.error = "timeout"
-    finally:
-        server.shutdown()  # stop serve_forever()
-        thread.join(timeout=5)
-        server.server_close()  # close the listening socket (shutdown() leaves it open)
-    return result
+    return CallbackCapture(result=result, done=done, server=server, thread=thread)
+
+
+def capture_callback(
+    timeout: float = 120.0,  # pragma: no mutate (default window; tests pass explicit timeouts)
+) -> CallbackResult:
+    """Bind the port, capture one OAuth callback, and shut down (one-shot helper)."""
+    return start_capture().wait(timeout)
diff --git a/aai_cli/client.py b/aai_cli/client.py
index 7e59d4ea..fe327147 100644
--- a/aai_cli/client.py
+++ b/aai_cli/client.py
@@ -49,19 +49,36 @@ def resolve_audio_source(source: str | None, *, sample: bool, check_local: bool
     don't have yet is legitimate.
     """
     if sample:
+        if source:
+            # Never silently prefer one over the other: the user asked for both.
+            raise UsageError(
+                "An audio source and --sample cannot be combined.",
+                suggestion="Pass the file/URL or --sample, not both.",
+            )
         return SAMPLE_AUDIO_URL
     if not source:
         raise UsageError(
             "Provide an audio path or URL.",
             suggestion="Or pass --sample to use the hosted demo file.",
         )
-    if check_local and not source.startswith(("http://", "https://")) and not Path(source).exists():
-        raise CLIError(
-            f"File not found: {source}",
-            error_type="file_not_found",
-            exit_code=2,
-            suggestion="Check the path. For remote audio, pass an http(s):// URL.",
-        )
+    if check_local and not source.startswith(("http://", "https://")):
+        path = Path(source)
+        if not path.exists():
+            raise CLIError(
+                f"File not found: {source}",
+                error_type="file_not_found",
+                exit_code=2,
+                suggestion="Check the path. For remote audio, pass an http(s):// URL.",
+            )
+        if not path.is_file():
+            # A directory (or socket/FIFO) would otherwise fall through to credential
+            # resolution and fail much later as an opaque upload error.
+            raise CLIError(
+                f"Not a file: {source}",
+                error_type="not_a_file",
+                exit_code=2,
+                suggestion="Pass an audio file, not a directory.",
+            )
     return source
 
 
@@ -90,17 +107,42 @@ def _sdk_errors(message: str) -> Generator[None]:
         raise APIError(f"{message}: {exc}") from exc
 
 
+def _list_transcript_params(limit: int) -> aai.ListTranscriptParameters:
+    """List-transcripts params that serialize without the spurious ``model_config`` key.
+
+    assemblyai==0.64.4 under pydantic==2.13.4: the SDK's pydantic-v1-shim request model
+    picks up the v2-style ``model_config`` class attribute as a regular field, so the
+    ``.dict(exclude_none=True)`` the SDK puts on the query string ships a junk
+    ``?model_config=...`` param on every request. Null the bogus field out so
+    ``exclude_none`` drops it from the wire.
+    """
+    params = aai.ListTranscriptParameters(limit=limit)
+    object.__setattr__(params, "model_config", None)
+    return params
+
+
+# httpx-backed SDK errors embed a multi-line repr ("…\nReason: …\nRequest: <Request(…)>").
+_REQUEST_REPR_RE = re.compile(r"Request: <[^>]*>")
+
+
+def _compact_reason(exc: object) -> str:
+    """``str(exc)`` as a single clean line: drop the trailing ``Request: <…>`` repr and
+    collapse all whitespace/newlines, keeping the informative reason text."""
+    text = _REQUEST_REPR_RE.sub("", str(exc))
+    return re.sub(r"\s+", " ", text).strip()
+
+
 def validate_key(api_key: str) -> bool:
     """True if the key authenticates, False on an auth failure. Raises APIError otherwise."""
     _configure(api_key)
     try:
-        aai.Transcriber().list_transcripts(aai.ListTranscriptParameters(limit=1))
+        aai.Transcriber().list_transcripts(_list_transcript_params(1))
     except aai.types.AssemblyAIError as exc:
         if is_auth_failure(exc):
             return False
-        raise APIError(f"Could not validate key: {exc}") from exc
+        raise APIError(f"Could not validate key: {_compact_reason(exc)}") from exc
     except Exception as exc:
-        raise APIError(f"Network error contacting AssemblyAI: {exc}") from exc
+        raise APIError(f"Network error contacting AssemblyAI: {_compact_reason(exc)}") from exc
     return True
 
 
@@ -114,7 +156,7 @@ def _item_to_dict(item: Any) -> dict[str, Any]:
 def list_transcripts(api_key: str, *, limit: int = 10) -> list[dict[str, object]]:
     _configure(api_key)
     with _sdk_errors("Could not list transcripts"):
-        resp = aai.Transcriber().list_transcripts(aai.ListTranscriptParameters(limit=limit))
+        resp = aai.Transcriber().list_transcripts(_list_transcript_params(limit))
     return [_item_to_dict(item) for item in resp.transcripts]
 
 
diff --git a/aai_cli/code_gen/transcribe.py b/aai_cli/code_gen/transcribe.py
index a35a8723..5527e8c4 100644
--- a/aai_cli/code_gen/transcribe.py
+++ b/aai_cli/code_gen/transcribe.py
@@ -5,12 +5,27 @@
 from aai_cli import environments, llm
 from aai_cli.code_gen import serialize, snippets
 
+# ``-o/--output`` choice -> printed-result code, mirroring the run path's
+# ``client._FIELD_RENDERERS`` semantics: plain fields, the speaker-labeled
+# utterances loop, the SRT export endpoint, and the raw ``json_response`` payload.
+_OUTPUT_SNIPPETS: dict[str, str] = {
+    "text": "print(transcript.text)",
+    "id": "print(transcript.id)",
+    "status": "print(transcript.status.value)",
+    "utterances": (
+        'for utt in transcript.utterances or []:\n    print(f"Speaker {utt.speaker}: {utt.text}")'
+    ),
+    "srt": "print(transcript.export_subtitles_srt())",
+    "json": "print(json.dumps(transcript.json_response, default=str))",
+}
+
 
 def render(
     merged: dict[str, object],
     source: str,
     *,
     llm_gateway: dict[str, object] | None = None,
+    output: str | None = None,
 ) -> str:
     """Generate a runnable transcribe script reproducing this CLI invocation.
 
@@ -18,7 +33,13 @@ def render(
     script transforms the transcript through AssemblyAI's LLM Gateway and prints that
     result instead of the analysis sections — mirroring how `--llm-gateway-prompt`
     replaces the normal output.
+
+    When `output` (a ``-o/--output`` field name) is given, the script prints that one
+    field instead — and, as in the real command, it takes precedence over the LLM chain
+    and the analysis sections.
     """
+    if output is not None:
+        llm_gateway = None  # `-o` returns before the chain runs in the real command
     if merged:
         kwargs = "\n".join(serialize.config_kwarg_lines(merged, indent=4))
         config_block = f"config = aai.TranscriptionConfig(\n{kwargs}\n)"
@@ -31,8 +52,12 @@ def render(
     if llm_gateway:
         imports.append("from openai import OpenAI")
 
+    stdlib_imports = ["import os"]
+    if output == "json":
+        stdlib_imports.insert(0, "import json")
+
     parts = [
-        "import os",
+        *stdlib_imports,
         "",
         *imports,
         "",
@@ -59,7 +84,10 @@ def render(
         "",
     ]
 
-    if llm_gateway:
+    if output is not None:
+        # Unknown names fall back to the plain text, like select_transcript_field does.
+        parts.append(_OUTPUT_SNIPPETS.get(output, _OUTPUT_SNIPPETS["text"]))
+    elif llm_gateway:
         parts += _llm_gateway_block(llm_gateway)
     else:
         parts.append(snippets.result_handling(merged))
diff --git a/aai_cli/commands/account.py b/aai_cli/commands/account.py
index 445929eb..c507fe3a 100644
--- a/aai_cli/commands/account.py
+++ b/aai_cli/commands/account.py
@@ -156,10 +156,12 @@ def usage(
     """Show usage over a date range (defaults to the last 30 days)."""
 
     def body(state: AppState, json_mode: bool) -> None:
-        _, jwt = resolve_session(state)
+        # Parse/validate the date flags before any session resolution or network
+        # work, so a bad --start/--end is a fast usage error even when not logged in.
         today = datetime.now(UTC).date()
         start_date = _utc_day_start(start or (today - timedelta(days=30)).isoformat())
         end_date = _utc_day_start(end or today.isoformat())
+        _, jwt = resolve_session(state)
         data = ams.get_usage(jwt, start_date, end_date, window)
 
         def render(d: dict[str, object]) -> object:
diff --git a/aai_cli/commands/llm.py b/aai_cli/commands/llm.py
index 69848246..b971d660 100644
--- a/aai_cli/commands/llm.py
+++ b/aai_cli/commands/llm.py
@@ -1,7 +1,5 @@
 from __future__ import annotations
 
-from contextlib import suppress
-
 import typer
 from rich.markup import escape
 
@@ -14,6 +12,11 @@
 
 app = typer.Typer()
 
+_FOLLOW_STDIN_MESSAGE = (
+    "--follow needs transcript text piped on stdin, e.g. "
+    '`aai stream -o text | aai llm -f "summarize action items as I talk"`.'
+)
+
 
 def _validate_follow_args(
     prompt: str | None, output_field: str | None, transcript_id: str | None
@@ -35,10 +38,7 @@ def _validate_follow_args(
             "combined with --transcript-id."
         )
     if not stdio.stdin_is_piped():
-        raise UsageError(
-            "--follow needs transcript text piped on stdin, e.g. "
-            '`aai stream -o text | aai llm -f "summarize action items as I talk"`.'
-        )
+        raise UsageError(_FOLLOW_STDIN_MESSAGE)
     return prompt
 
 
@@ -116,13 +116,20 @@ def ask(transcript_text: str) -> str:
             )
             return gateway.content_of(response)
 
+        transcript: list[str] = []
+        interrupted = False
         with FollowRenderer(json_mode=json_mode) as render:
-            transcript: list[str] = []
             # Ctrl-C is the normal "stop watching" signal -> exit cleanly (code 0).
-            with suppress(KeyboardInterrupt):
+            try:
                 for turn in stdio.iter_piped_stdin_lines():
                     transcript.append(turn)
                     render(ask("\n".join(transcript)), len(transcript))
+            except KeyboardInterrupt:
+                interrupted = True
+        if not transcript and not interrupted:
+            # An empty pipe (`aai llm -f "…" </dev/null`) would otherwise exit 0
+            # silently, having asked nothing.
+            raise UsageError(_FOLLOW_STDIN_MESSAGE)
 
     def body(state: AppState, json_mode: bool) -> None:
         if not prompt:
diff --git a/aai_cli/commands/login.py b/aai_cli/commands/login.py
index f7954de2..0db75ad0 100644
--- a/aai_cli/commands/login.py
+++ b/aai_cli/commands/login.py
@@ -6,7 +6,7 @@
 
 from aai_cli import client, config, environments, help_panels, options, output
 from aai_cli.context import AppState, persist_browser_login, resolve_profile, run_command
-from aai_cli.errors import APIError
+from aai_cli.errors import APIError, UsageError
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
@@ -31,7 +31,20 @@ def login(
     def body(state: AppState, json_mode: bool) -> None:
         profile = resolve_profile(state)
         env = environments.active().name
-        if api_key:
+        if api_key is None:
+            persist_browser_login(profile, env)
+        elif not api_key.strip():
+            # An explicitly-passed empty/whitespace key (e.g. --api-key "$UNSET_VAR")
+            # must fail loudly, not silently fall into the browser flow as if the
+            # flag had never been passed.
+            raise UsageError(
+                "--api-key was given an empty value.",
+                suggestion=(
+                    "Pass a real key: aai login --api-key <KEY> "
+                    "(check that the shell variable you expanded is set)."
+                ),
+            )
+        else:
             # Non-interactive escape hatch for CI/automation: no AMS session is
             # obtained, so account self-service commands won't work for this profile.
             if not client.validate_key(api_key):
@@ -45,8 +58,6 @@ def body(state: AppState, json_mode: bool) -> None:
             # api-key-only, so account self-service must report it needs a browser
             # login rather than silently reusing the old (possibly different) identity.
             config.clear_session(profile)
-        else:
-            persist_browser_login(profile, env)
         output.emit(
             {"authenticated": True, "profile": profile, "env": env},
             lambda _d: (
@@ -138,5 +149,10 @@ def render(_d: dict[str, object]) -> Table:
             "session": session_label,
         }
         output.emit(data, render, json_mode=json_mode)
+        if not reachable:
+            # A rejected key must fail the command (exit 4, the auth code used by
+            # NotAuthenticated) so CI can use whoami as a preflight check; the
+            # rendered status above still lands on stdout in both modes.
+            raise typer.Exit(code=4)
 
     run_command(ctx, body, json=json_out)
diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py
index 5b845947..cc81d0ad 100644
--- a/aai_cli/commands/transcribe.py
+++ b/aai_cli/commands/transcribe.py
@@ -19,12 +19,45 @@
     transcribe_exec,
     transcribe_render,
 )
+
+# The package attribute `code_gen.transcribe` is the wrapper function, so the module's
+# render() (which also takes the -o output field) is imported from the submodule itself.
+from aai_cli.code_gen.transcribe import render as render_transcribe_code
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import UsageError
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
+# The PII policy strings the SDK accepts, validated client-side so a typo'd
+# --redact-pii-policy fails before any upload — mirroring how an unknown --config
+# key is rejected with the valid field list.
+_PII_POLICY_VALUES = frozenset(policy.value for policy in aai.PIIRedactionPolicy)
+
+
+def _validate_pii_policies(policies: list[str] | None) -> None:
+    unknown = [p for p in policies or [] if p not in _PII_POLICY_VALUES]
+    if unknown:
+        valid = ", ".join(sorted(_PII_POLICY_VALUES))
+        raise UsageError(f"Unknown PII policy(s) {unknown}. Valid policies: {valid}.")
+
+
+def _validate_language_flags(language_code: str | None, language_detection: bool | None) -> None:
+    if language_code and language_detection:
+        raise UsageError(
+            "--language-code and --language-detection can't be combined.",
+            suggestion="Force a language or auto-detect it, not both.",
+        )
+
+
+def _validate_speakers_expected(merged: dict[str, object]) -> None:
+    # Checked on the merged dict so `--config speaker_labels=true` also counts.
+    if merged.get("speakers_expected") and not merged.get("speaker_labels"):
+        raise UsageError(
+            "--speakers-expected only applies when diarization is enabled.",
+            suggestion="Add --speaker-labels.",
+        )
+
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
@@ -238,6 +271,7 @@ def transcribe(
         None,
         "--audio-start",
         help="Start offset in ms.",
+        min=0,
         rich_help_panel=help_panels.OPT_CUSTOMIZATION,
     ),
     audio_end: int | None = typer.Option(
@@ -332,6 +366,9 @@ def transcribe(
     """
 
     def body(state: AppState, json_mode: bool) -> None:
+        _validate_language_flags(language_code, language_detection)
+        pii_policies = config_builder.split_csv(redact_pii_policy)
+        _validate_pii_policies(pii_policies)
         flags: dict[str, object] = {
             "speech_model": config_builder.enum_value(speech_model),
             "language_code": language_code,
@@ -346,7 +383,7 @@ def body(state: AppState, json_mode: bool) -> None:
             "speakers_expected": speakers_expected,
             "multichannel": multichannel,
             "redact_pii": redact_pii,
-            "redact_pii_policies": config_builder.split_csv(redact_pii_policy),
+            "redact_pii_policies": pii_policies,
             "redact_pii_sub": config_builder.enum_value(redact_pii_sub),
             "redact_pii_audio": redact_pii_audio,
             "filter_profanity": filter_profanity,
@@ -387,6 +424,8 @@ def body(state: AppState, json_mode: bool) -> None:
             flags=flags, overrides=config_kv, config_file=config_file
         )
 
+        _validate_speakers_expected(merged)
+
         if show_code:
             # Print-only: build the equivalent script and exit without transcribing or
             # authenticating (raw stdout, so `--show-code > script.py` runs). No
@@ -397,7 +436,9 @@ def body(state: AppState, json_mode: bool) -> None:
                 else "your-audio-file.mp3"
             )
             gateway = code_gen.gateway_options(list(llm_prompt or []), model, max_tokens)
-            output.print_code(code_gen.transcribe(merged, audio, llm_gateway=gateway))
+            output.print_code(
+                render_transcribe_code(merged, audio, llm_gateway=gateway, output=output_field)
+            )
             return
 
         tc = config_builder.construct_transcription_config(merged)
diff --git a/aai_cli/commands/transcripts.py b/aai_cli/commands/transcripts.py
index a753652f..b33fe37c 100644
--- a/aai_cli/commands/transcripts.py
+++ b/aai_cli/commands/transcripts.py
@@ -73,7 +73,7 @@ def body(state: AppState, json_mode: bool) -> None:
 )
 def list_(
     ctx: typer.Context,
-    limit: int = typer.Option(10, "--limit", help="How many transcripts to show."),
+    limit: int = typer.Option(10, "--limit", help="How many transcripts to show.", min=1),
     json_out: bool = options.json_option(),
 ) -> None:
     """List recent transcripts."""
diff --git a/aai_cli/context.py b/aai_cli/context.py
index 5dba568a..c46c764a 100644
--- a/aai_cli/context.py
+++ b/aai_cli/context.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import os
+import sys
 from collections.abc import Callable
 from dataclasses import dataclass
 
@@ -127,10 +128,22 @@ def _rerun_after_login_error() -> CLIError:
     )
 
 
+def _interactive_session() -> bool:
+    """True only when a human can complete a browser login: stdin and stderr are both
+    real TTYs and no agent/CI context is detected (`output.is_agentic`)."""
+    return sys.stdin.isatty() and sys.stderr.isatty() and not output.is_agentic()
+
+
 def _should_auto_login(ctx: typer.Context, err: NotAuthenticated) -> bool:
     command_name = ctx.command.name if ctx.command else None
     if command_name in {"login", "logout"}:
         return False
+    # CI/pipelines/agents have no human to finish a browser sign-in; starting one
+    # would bind a loopback port and block for up to two minutes. Surface the
+    # original NotAuthenticated (with its 'aai login' / ASSEMBLYAI_API_KEY
+    # suggestion) instead.
+    if not _interactive_session():
+        return False
     # An invalid ASSEMBLYAI_API_KEY would still take precedence after browser login,
     # so retrying cannot fix that case.
     return not (os.environ.get(config.ENV_API_KEY) and err.message == REJECTED_KEY_MESSAGE)
@@ -153,7 +166,9 @@ def run_command(
             output.emit_error(err, json_mode=json_mode)
             raise typer.Exit(code=err.exit_code) from None
         try:
-            if not state.quiet:
+            # Suppressed in json_mode too: --json stderr must stay machine-readable,
+            # never mix human prose into it.
+            if not state.quiet and not json_mode:
                 output.error_console.print(
                     "[aai.muted]Not signed in; starting browser login.[/aai.muted]"
                 )
diff --git a/aai_cli/youtube.py b/aai_cli/youtube.py
index 86af557a..9c109941 100644
--- a/aai_cli/youtube.py
+++ b/aai_cli/youtube.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 import re
 from pathlib import Path
 
@@ -11,6 +12,13 @@
     re.IGNORECASE,
 )
 
+# yt-dlp's default logger prints its own "ERROR: …" line straight to stderr before the
+# CLI can raise its one clean error, duplicating the message. Route yt-dlp's output to
+# a swallow-everything logger (NullHandler, no propagation) instead.
+_YTDLP_LOGGER = logging.getLogger("aai_cli.youtube.yt_dlp")
+_YTDLP_LOGGER.addHandler(logging.NullHandler())
+_YTDLP_LOGGER.propagate = False
+
 
 def is_youtube_url(source: str | None) -> bool:
     """True if `source` looks like a YouTube watch/share URL."""
@@ -41,6 +49,7 @@ def download_audio(url: str, dest_dir: Path) -> Path:
         "quiet": True,
         "no_warnings": True,
         "noprogress": True,
+        "logger": _YTDLP_LOGGER,
     }
     try:
         # yt-dlp types `params` as a private `_Params` TypedDict, but a plain options
diff --git a/tests/test_account_command.py b/tests/test_account_command.py
index 636afa5b..788d3514 100644
--- a/tests/test_account_command.py
+++ b/tests/test_account_command.py
@@ -38,6 +38,7 @@ def test_balance_formats_dollars(monkeypatch, mocker):
 
 
 def test_balance_without_session_runs_login(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     get_balance = mocker.patch(
         "aai_cli.commands.account.ams.get_balance",
@@ -302,6 +303,21 @@ def test_usage_rejects_invalid_date(mocker):
     get_usage.assert_not_called()
 
 
+def test_usage_invalid_date_fails_before_session_resolution(monkeypatch, mocker):
+    # Not logged in + a bad --start/--end: date validation must run before
+    # resolve_session, so the user gets a fast exit-2 usage error, not a login flow.
+    def _no_login():
+        raise AssertionError("login flow must not start for an invalid date")
+
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
+    monkeypatch.setattr("aai_cli.context.run_login_flow", _no_login)
+    get_usage = mocker.patch("aai_cli.commands.account.ams.get_usage", autospec=True)
+    result = runner.invoke(app, ["usage", "--end", "not-a-date"])
+    assert result.exit_code == 2
+    assert "Invalid date 'not-a-date'" in result.output
+    get_usage.assert_not_called()
+
+
 def test_limits_renders_services(monkeypatch, mocker):
     _auth()
     _human(monkeypatch)
diff --git a/tests/test_auth_flow.py b/tests/test_auth_flow.py
index b2d73e58..873f8f30 100644
--- a/tests/test_auth_flow.py
+++ b/tests/test_auth_flow.py
@@ -1,7 +1,24 @@
 import pytest
 
 from aai_cli.auth import flow, loopback
-from aai_cli.errors import APIError
+from aai_cli.errors import APIError, NotAuthenticated
+
+
+class _FakeCapture:
+    """Stands in for an already-bound loopback server: wait() returns a canned result."""
+
+    def __init__(self, result, log=None):
+        self._result = result
+        self._log = log
+
+    def wait(self, timeout=120.0):
+        if self._log is not None:
+            self._log.append("wait")
+        return self._result
+
+
+def _fake_start_capture(monkeypatch, result):
+    monkeypatch.setattr(flow, "_start_capture", lambda: _FakeCapture(result))
 
 
 def test_find_or_create_reuses_existing_cli_key(monkeypatch):
@@ -72,10 +89,10 @@ def test_find_or_create_raises_when_no_projects(monkeypatch):
     assert "no project" in exc.value.message
 
 
-def test_capture_delegates_to_loopback(monkeypatch):
-    sentinel = loopback.CallbackResult(token="tok", token_type="discovery_oauth")
-    monkeypatch.setattr(flow.loopback, "capture_callback", lambda: sentinel)
-    assert flow._capture() is sentinel
+def test_start_capture_delegates_to_loopback(monkeypatch):
+    sentinel = object()
+    monkeypatch.setattr(flow.loopback, "start_capture", lambda: sentinel)
+    assert flow._start_capture() is sentinel
 
 
 def test_run_login_flow_opens_the_discovery_start_url(monkeypatch):
@@ -83,10 +100,8 @@ def test_run_login_flow_opens_the_discovery_start_url(monkeypatch):
     seen = {}
     monkeypatch.setattr(flow.discovery, "build_start_url", lambda: "start-url")
     monkeypatch.setattr(flow, "_open_browser", lambda url: seen.setdefault("url", url))
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -109,10 +124,8 @@ def test_run_login_flow_opens_the_discovery_start_url(monkeypatch):
 
 def test_run_login_flow_rejects_wrong_token_type(monkeypatch):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="something_else"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="something_else")
     )
     with pytest.raises(APIError) as exc:
         flow.run_login_flow()
@@ -122,10 +135,8 @@ def test_run_login_flow_rejects_wrong_token_type(monkeypatch):
 def test_run_login_flow_happy_path(monkeypatch):
     opened = {}
     monkeypatch.setattr(flow, "_open_browser", lambda url: opened.setdefault("url", url))
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -147,13 +158,15 @@ def test_run_login_flow_happy_path(monkeypatch):
     assert opened["url"].startswith("https://")
 
 
-def test_run_login_flow_timeout_raises(monkeypatch):
+def test_run_login_flow_timeout_raises_auth_typed_error(monkeypatch):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(flow, "_capture", lambda: loopback.CallbackResult(error="timeout"))
-    with pytest.raises(APIError) as exc:
+    _fake_start_capture(monkeypatch, loopback.CallbackResult(error="timeout"))
+    with pytest.raises(NotAuthenticated) as exc:
         flow.run_login_flow()
     assert exc.value.message == "Login timed out waiting for the browser."
-    assert exc.value.suggestion == "Run 'aai login' again."
+    assert exc.value.error_type == "not_authenticated"  # auth-typed, not api_error
+    assert exc.value.exit_code == 4
+    assert exc.value.suggestion == "Run 'aai login' again, or use 'aai login --api-key <KEY>'."
 
 
 def test_find_or_create_reuses_token_with_token_name_field(monkeypatch):
@@ -188,10 +201,8 @@ def test_run_login_flow_uses_exchange_account(monkeypatch):
     # The signed-in account comes from exchange()'s response; the flow must not make a
     # second round-trip to fetch it.
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -219,10 +230,8 @@ def fake_find(acct, jwt):
 
 def test_run_login_flow_multi_org_notes_selection(monkeypatch, capsys):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -259,10 +268,8 @@ def test_open_browser_prints_fallback_to_stderr(monkeypatch, capsys):
 
 def test_run_login_flow_missing_session_token_raises_api_error(monkeypatch):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -275,10 +282,8 @@ def test_run_login_flow_missing_session_token_raises_api_error(monkeypatch):
 
 def test_run_login_flow_org_missing_id_raises_api_error(monkeypatch):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -294,10 +299,8 @@ def test_run_login_flow_org_missing_id_raises_api_error(monkeypatch):
 
 def test_run_login_flow_zero_orgs_raises(monkeypatch):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth"),
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
     )
     monkeypatch.setattr(
         flow.ams,
@@ -314,10 +317,9 @@ def test_run_login_flow_zero_orgs_raises(monkeypatch):
 
 def test_run_login_flow_returns_session_material(monkeypatch):
     monkeypatch.setattr(flow, "_open_browser", lambda url: None)
-    monkeypatch.setattr(
-        flow,
-        "_capture",
-        lambda: loopback.CallbackResult(token="tok", token_type="discovery_oauth", error=None),
+    _fake_start_capture(
+        monkeypatch,
+        loopback.CallbackResult(token="tok", token_type="discovery_oauth", error=None),
     )
     monkeypatch.setattr(
         flow.ams,
@@ -343,3 +345,67 @@ def test_run_login_flow_returns_session_material(monkeypatch):
     assert result.session_jwt == "jwt_1"
     assert result.session_token == "tok_1"
     assert result.account_id == 99
+
+
+def _stub_ams_happy_path(monkeypatch):
+    monkeypatch.setattr(
+        flow.ams,
+        "discover",
+        lambda token: {
+            "organizations": [{"organization_id": "org_1"}],
+            "intermediate_session_token": "ist",
+        },
+    )
+    monkeypatch.setattr(
+        flow.ams,
+        "exchange",
+        lambda ist, org: {"account": {"id": 9}, "session_jwt": "jwt", "session_token": "t"},
+    )
+    monkeypatch.setattr(flow, "find_or_create_cli_key", lambda acct, jwt: "sk_final")
+
+
+def test_run_login_flow_binds_loopback_before_opening_browser(monkeypatch):
+    # The callback server must be bound before the browser launches: a taken port
+    # has to fail the flow before the user is mid-OAuth. wait() only happens after.
+    order = []
+
+    def fake_start():
+        order.append("bind")
+        return _FakeCapture(
+            loopback.CallbackResult(token="tok", token_type="discovery_oauth"), log=order
+        )
+
+    monkeypatch.setattr(flow, "_start_capture", fake_start)
+    monkeypatch.setattr(flow, "_open_browser", lambda url: order.append("browser"))
+    _stub_ams_happy_path(monkeypatch)
+
+    assert flow.run_login_flow().api_key == "sk_final"
+    assert order == ["bind", "browser", "wait"]
+
+
+def test_run_login_flow_bind_failure_never_opens_browser(monkeypatch):
+    def fail_start():
+        raise APIError("Could not start the login callback server on 127.0.0.1:8123.")
+
+    monkeypatch.setattr(flow, "_start_capture", fail_start)
+    opened = []
+    monkeypatch.setattr(flow, "_open_browser", lambda url: opened.append(url))
+
+    with pytest.raises(APIError, match="callback server"):
+        flow.run_login_flow()
+    assert opened == []  # the user is never sent into an OAuth flow that already failed
+
+
+def test_run_login_flow_prints_waiting_hint(monkeypatch, capsys):
+    # Headless/slow logins must not sit in 120s of silence: the flow says it is
+    # waiting and names the non-browser alternative.
+    monkeypatch.setattr(flow, "_open_browser", lambda url: None)
+    _fake_start_capture(
+        monkeypatch, loopback.CallbackResult(token="tok", token_type="discovery_oauth")
+    )
+    _stub_ams_happy_path(monkeypatch)
+
+    assert flow.run_login_flow().api_key == "sk_final"
+    err = capsys.readouterr().err
+    assert "Waiting up to 2 minutes" in err
+    assert "aai login --api-key" in err
diff --git a/tests/test_auth_loopback.py b/tests/test_auth_loopback.py
index a5817caf..6e85c347 100644
--- a/tests/test_auth_loopback.py
+++ b/tests/test_auth_loopback.py
@@ -157,3 +157,31 @@ def test_capture_raises_clean_error_when_port_unavailable(monkeypatch):
             loopback.capture_callback(timeout=1.0)
     finally:
         busy.close()
+
+
+def test_start_capture_raises_clean_error_when_port_unavailable(monkeypatch):
+    # The bind failure surfaces from start_capture() itself — i.e. before any
+    # caller would open a browser — not from the later wait().
+    busy = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    busy.bind((endpoints.LOOPBACK_HOST, 0))
+    busy.listen(1)
+    port = busy.getsockname()[1]
+    monkeypatch.setenv("AAI_AUTH_PORT", str(port))
+    try:
+        with pytest.raises(APIError, match="callback server"):
+            loopback.start_capture()
+    finally:
+        busy.close()
+
+
+def test_start_capture_is_serving_before_wait_is_called():
+    # start_capture() returns with the server already bound and answering — the
+    # whole point of splitting bind from wait. The callback can land before wait().
+    capture = loopback.start_capture()
+    assert capture.thread.daemon is True  # never blocks interpreter shutdown
+    status = _hit("/callback?stytch_token_type=discovery_oauth&token=tok_pre")
+    assert status == 200  # answered while no one is waiting yet
+    result = capture.wait(timeout=5.0)
+    assert result.token == "tok_pre"
+    assert result.token_type == "discovery_oauth"
+    assert result.error is None
diff --git a/tests/test_client.py b/tests/test_client.py
index 13d90311..434701f2 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -26,6 +26,61 @@ def test_validate_key_true_on_success(mocker):
     assert params.limit == 1
 
 
+def test_list_transcript_params_serialize_without_model_config():
+    # assemblyai==0.64.4 + pydantic==2.13.4: the SDK's own ListTranscriptParameters
+    # leaks a spurious `model_config` field into .dict(exclude_none=True) — exactly
+    # what the SDK serializes onto the query string. The helper must drop it.
+    params = client._list_transcript_params(3)
+    assert params.dict(exclude_none=True) == {"limit": 3}
+
+
+def test_validate_key_probe_serializes_without_model_config(mocker):
+    T = mocker.patch.object(client.aai, "Transcriber", autospec=True)
+    T.return_value.list_transcripts.return_value = mocker.MagicMock()
+    assert client.validate_key("sk_good") is True
+    params = T.return_value.list_transcripts.call_args.args[0]
+    # No junk `model_config` query param on the wire (and still a one-row probe).
+    assert params.dict(exclude_none=True) == {"limit": 1}
+
+
+def test_list_transcripts_params_serialize_without_model_config(mocker):
+    resp = mocker.MagicMock()
+    resp.transcripts = []
+    T = mocker.patch.object(client.aai, "Transcriber", autospec=True)
+    T.return_value.list_transcripts.return_value = resp
+    assert client.list_transcripts("sk", limit=7) == []
+    params = T.return_value.list_transcripts.call_args.args[0]
+    assert params.dict(exclude_none=True) == {"limit": 7}
+
+
+def test_validate_key_sdk_error_message_is_one_clean_line(mocker):
+    # httpx-backed SDK failures embed a multi-line repr; the CLI error must keep the
+    # reason but collapse it to one line and drop the `Request: <…>` tail.
+    raw = (
+        "failed to retrieve transcripts: \n"
+        "Reason: [Errno -3] Temporary failure in name resolution\n"
+        "Request: <Request('GET', 'https://api.assemblyai.com/v2/transcript?limit=1')>"
+    )
+    T = mocker.patch.object(client.aai, "Transcriber", autospec=True)
+    T.return_value.list_transcripts.side_effect = aai.types.AssemblyAIError(raw)
+    with pytest.raises(APIError) as exc:
+        client.validate_key("sk")
+    assert exc.value.message == (
+        "Could not validate key: failed to retrieve transcripts: "
+        "Reason: [Errno -3] Temporary failure in name resolution"
+    )
+
+
+def test_validate_key_network_error_message_is_one_clean_line(mocker):
+    T = mocker.patch.object(client.aai, "Transcriber", autospec=True)
+    T.return_value.list_transcripts.side_effect = ConnectionError(
+        "connection refused\nRequest: <Request('GET', 'https://api.assemblyai.com/x')>"
+    )
+    with pytest.raises(APIError) as exc:
+        client.validate_key("sk")
+    assert exc.value.message == "Network error contacting AssemblyAI: connection refused"
+
+
 def test_validate_key_false_on_auth_error(mocker):
     T = mocker.patch.object(client.aai, "Transcriber", autospec=True)
     T.return_value.list_transcripts.side_effect = aai.types.AssemblyAIError(
diff --git a/tests/test_code_gen.py b/tests/test_code_gen.py
index 270e8065..94b0a229 100644
--- a/tests/test_code_gen.py
+++ b/tests/test_code_gen.py
@@ -2,10 +2,12 @@
 
 from typing import ClassVar
 
+import pytest
 from hypothesis import given, settings
 from hypothesis import strategies as st
 
 from aai_cli.code_gen import serialize
+from aai_cli.code_gen.transcribe import render as render_transcribe_code
 
 settings.register_profile("codegen", max_examples=150)
 settings.load_profile("codegen")
@@ -300,6 +302,67 @@ def test_fuzz_result_handling_always_execs(merged):
     exec(compile(body, "<snippets>", "exec"), {"transcript": _Stub(), "getattr": getattr})  # noqa: S102
 
 
+@pytest.mark.parametrize(
+    ("field", "fragment"),
+    [
+        ("text", "print(transcript.text)"),
+        ("id", "print(transcript.id)"),
+        ("status", "print(transcript.status.value)"),
+        ("utterances", 'print(f"Speaker {utt.speaker}: {utt.text}")'),
+        ("srt", "print(transcript.export_subtitles_srt())"),
+        ("json", "print(json.dumps(transcript.json_response, default=str))"),
+    ],
+)
+def test_transcribe_render_output_field_generates_matching_code(field, fragment):
+    # Each -o choice maps to result code faithful to client._FIELD_RENDERERS.
+    code = render_transcribe_code({}, "audio.mp3", output=field)
+    _compiles(code)
+    assert fragment in code
+
+
+def test_transcribe_render_output_json_imports_json_only_when_needed():
+    assert "import json" in render_transcribe_code({}, "audio.mp3", output="json")
+    assert "import json" not in render_transcribe_code({}, "audio.mp3", output="srt")
+    assert "import json" not in render_transcribe_code({}, "audio.mp3")
+
+
+def test_transcribe_render_output_replaces_analysis_result_handling():
+    # -o overrides the analysis sections, exactly like the real command's output path.
+    code = render_transcribe_code({"speaker_labels": True}, "audio.mp3", output="srt")
+    _compiles(code)
+    assert "print(transcript.export_subtitles_srt())" in code
+    assert "transcript.utterances" not in code
+
+
+def test_transcribe_render_output_takes_precedence_over_llm_gateway():
+    # The real command returns the -o field before the LLM chain runs; the generated
+    # script mirrors that and stays free of an unused OpenAI import.
+    code = render_transcribe_code(
+        {},
+        "audio.mp3",
+        llm_gateway={"prompts": ["summarize"], "model": "m", "max_tokens": 5},
+        output="srt",
+    )
+    _compiles(code)
+    assert "print(transcript.export_subtitles_srt())" in code
+    assert "from openai import OpenAI" not in code
+
+
+def test_transcribe_render_unknown_output_falls_back_to_text():
+    # Mirrors select_transcript_field's fallback for unrecognized field names.
+    code = render_transcribe_code({}, "audio.mp3", output="bogus")
+    _compiles(code)
+    assert "print(transcript.text)" in code
+
+
+@given(
+    merged=merged_strategy(config_builder.TRANSCRIBE_COERCE),
+    field=st.sampled_from(["text", "id", "status", "utterances", "srt", "json"]),
+)
+def test_fuzz_transcribe_output_fields_always_compile(merged, field):
+    _compiles(render_transcribe_code(merged, "audio.mp3", output=field))
+
+
 def test_transcribe_show_code_includes_llm_gateway_transform():
     code = code_gen.transcribe(
         {"speaker_labels": True},
diff --git a/tests/test_context.py b/tests/test_context.py
index 84d39564..c13be304 100644
--- a/tests/test_context.py
+++ b/tests/test_context.py
@@ -1,9 +1,13 @@
+import json
+import sys
+
+import pytest
 import typer
 from typer.testing import CliRunner
 
 from aai_cli import config, environments
 from aai_cli.auth.flow import LoginResult
-from aai_cli.context import AppState, env_override_warning, run_command
+from aai_cli.context import AppState, _interactive_session, env_override_warning, run_command
 from aai_cli.errors import APIError, NotAuthenticated, auth_failure
 
 runner = CliRunner()
@@ -23,6 +27,94 @@ def go(ctx: typer.Context):
     return app
 
 
+def _force_interactive(monkeypatch):
+    """Pretend a human is at the terminal (CliRunner/pytest streams are never TTYs)."""
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
+
+
+class _TtyProbe:
+    def __init__(self, tty):
+        self._tty = tty
+
+    def isatty(self):
+        return self._tty
+
+
+@pytest.mark.parametrize(
+    ("stdin_tty", "stderr_tty", "agentic", "expected"),
+    [
+        (True, True, False, True),  # a real terminal session
+        (False, True, False, False),  # stdin piped/redirected (CI input)
+        (True, False, False, False),  # stderr redirected (logged pipeline)
+        (True, True, True, False),  # agent/CI env detected despite TTYs
+    ],
+)
+def test_interactive_session_requires_both_ttys_and_no_agent(
+    monkeypatch, stdin_tty, stderr_tty, agentic, expected
+):
+    monkeypatch.setattr(sys, "stdin", _TtyProbe(stdin_tty))
+    monkeypatch.setattr(sys, "stderr", _TtyProbe(stderr_tty))
+    monkeypatch.setattr("aai_cli.output.is_agentic", lambda: agentic)
+    assert _interactive_session() is expected
+
+
+def test_run_command_skips_auto_login_when_session_not_interactive(monkeypatch):
+    # CliRunner/pytest streams are not TTYs, so this is a genuine non-interactive
+    # session: no browser login may start (it would bind a port and block 120s),
+    # and the ORIGINAL NotAuthenticated must surface with its actionable suggestion.
+    monkeypatch.setattr(
+        "aai_cli.context.run_login_flow",
+        lambda: (_ for _ in ()).throw(AssertionError("non-interactive must not auto-login")),
+    )
+
+    def body(state, json_mode):
+        raise NotAuthenticated()
+
+    result = runner.invoke(_make_app(body), ["go"])
+    assert result.exit_code == 4
+    assert "starting browser login" not in result.output
+    assert "You're not signed in." in result.output
+    assert "aai login" in result.output
+    assert "ASSEMBLYAI_API_KEY" in result.output
+
+
+def test_run_command_not_interactive_json_keeps_clean_error_shape(monkeypatch):
+    monkeypatch.setattr(
+        "aai_cli.context.run_login_flow",
+        lambda: (_ for _ in ()).throw(AssertionError("non-interactive must not auto-login")),
+    )
+
+    def body(state, json_mode):
+        raise NotAuthenticated()
+
+    result = runner.invoke(_make_app(body, json=True), ["go"])
+    assert result.exit_code == 4
+    payload = json.loads(result.output)  # the only output line is machine-readable
+    assert payload["error"]["type"] == "not_authenticated"
+    assert "aai login" in payload["error"]["suggestion"]
+    assert "ASSEMBLYAI_API_KEY" in payload["error"]["suggestion"]
+
+
+def test_run_command_auto_login_notice_suppressed_in_json_mode(monkeypatch):
+    # Even when auto-login runs, --json stderr must stay machine-readable: the
+    # human "starting browser login" prose is suppressed and only the JSON error
+    # shape is emitted.
+    _force_interactive(monkeypatch)
+    monkeypatch.setattr(
+        "aai_cli.context.run_login_flow",
+        lambda: LoginResult(api_key="sk_auto", session_jwt="j", session_token="t", account_id=1),
+    )
+
+    def body(state, json_mode):
+        raise NotAuthenticated()
+
+    result = runner.invoke(_make_app(body, json=True), ["go"])
+    assert result.exit_code == 4
+    assert "starting browser login" not in result.output
+    payload = json.loads(result.output)
+    assert payload["error"]["type"] == "login_required"
+
+
 def test_run_command_maps_cli_error_to_exit_code():
     def body(state, json_mode):
         raise NotAuthenticated()
@@ -32,6 +124,7 @@ def body(state, json_mode):
 
 
 def test_run_command_auto_logs_in_and_asks_for_rerun(monkeypatch):
+    _force_interactive(monkeypatch)
     monkeypatch.setattr(
         "aai_cli.context.run_login_flow",
         lambda: LoginResult(
@@ -59,6 +152,7 @@ def body(state, json_mode):
 
 
 def test_run_command_auto_login_persistence_failure_is_clean(monkeypatch):
+    _force_interactive(monkeypatch)
     monkeypatch.setattr(
         "aai_cli.context.run_login_flow",
         lambda: LoginResult(
@@ -83,8 +177,10 @@ def body(state, json_mode):
 
 
 def test_run_command_auto_login_failure_is_clean(monkeypatch):
+    _force_interactive(monkeypatch)
+
     def fail_login():
-        raise APIError("Login timed out waiting for the browser.")
+        raise APIError("Login failed: the server returned an unexpected response.")
 
     monkeypatch.setattr("aai_cli.context.run_login_flow", fail_login)
 
@@ -93,10 +189,31 @@ def body(state, json_mode):
 
     result = runner.invoke(_make_app(body), ["go"])
     assert result.exit_code == 1
-    assert "Login timed out" in result.output
+    assert "Login failed" in result.output
+
+
+def test_run_command_auto_login_timeout_maps_to_auth_error(monkeypatch):
+    # The loopback timeout is an auth failure (not_authenticated, exit 4), not a
+    # generic api_error.
+    _force_interactive(monkeypatch)
+
+    def fail_login():
+        raise NotAuthenticated("Login timed out waiting for the browser.")
+
+    monkeypatch.setattr("aai_cli.context.run_login_flow", fail_login)
+
+    def body(state, json_mode):
+        raise NotAuthenticated()
+
+    result = runner.invoke(_make_app(body, json=True), ["go"])
+    assert result.exit_code == 4
+    payload = json.loads(result.output)
+    assert payload["error"]["type"] == "not_authenticated"
+    assert "Login timed out" in payload["error"]["message"]
 
 
 def test_run_command_skips_auto_login_for_rejected_env_key(monkeypatch):
+    _force_interactive(monkeypatch)
     monkeypatch.setenv(config.ENV_API_KEY, "sk_bad")
     monkeypatch.setattr(
         "aai_cli.context.run_login_flow",
@@ -111,6 +228,7 @@ def body(state, json_mode):
 
 
 def test_run_command_never_auto_logs_in_login_command(monkeypatch):
+    _force_interactive(monkeypatch)
     monkeypatch.setattr(
         "aai_cli.context.run_login_flow",
         lambda: (_ for _ in ()).throw(AssertionError("login command must not auto-login")),
@@ -240,6 +358,7 @@ def test_run_command_auto_logs_in_when_env_key_set_but_error_is_not_a_rejection(
     # ENV key present but the failure is a generic NotAuthenticated (not a key
     # rejection): a browser login can still fix it, so we DO auto-login. This pins
     # the `and` in _should_auto_login — an `or` would wrongly skip the retry here.
+    _force_interactive(monkeypatch)
     monkeypatch.setenv(config.ENV_API_KEY, "sk_env")
     ran = {"login": 0}
 
diff --git a/tests/test_keys.py b/tests/test_keys.py
index 2fb306f9..fead920a 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -85,6 +85,7 @@ def test_keys_create_rejects_default_project_without_int_id(mocker):
 
 
 def test_keys_list_without_session_runs_login(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     list_projects = mocker.patch(
         "aai_cli.commands.keys.ams.list_projects", autospec=True, return_value=[]
diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py
index e7044e19..9d4b1567 100644
--- a/tests/test_llm_command.py
+++ b/tests/test_llm_command.py
@@ -140,6 +140,7 @@ def test_llm_missing_prompt_exits_2(monkeypatch):
 
 
 def test_llm_unauthenticated_runs_login(monkeypatch):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
 
     def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None):
@@ -283,6 +284,44 @@ def test_llm_follow_requires_piped_stdin(monkeypatch):
     assert "stdin" in result.output.lower()
 
 
+def test_llm_follow_empty_stdin_exits_2(monkeypatch):
+    # `aai llm -f "…" </dev/null` must not exit 0 silently: an empty pipe means the
+    # prompt never ran, which is a usage error, not a success.
+    _auth()
+    calls = []
+
+    def fake_complete(api_key, *, model, messages, max_tokens, transcript_id=None):
+        calls.append(messages)
+        return _payload("ok")
+
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", fake_complete)
+    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
+    assert result.exit_code == 2
+    assert "--follow needs transcript text piped on stdin" in result.output
+    assert calls == []  # no API call was made
+
+
+def test_llm_follow_interrupt_before_first_turn_still_exits_0(monkeypatch):
+    # Ctrl-C before any turn arrives is the normal "stop watching" signal, not the
+    # empty-stdin usage error.
+    _auth()
+
+    class _InterruptIter:
+        def __iter__(self):
+            return self
+
+        def __next__(self):
+            raise KeyboardInterrupt
+
+    monkeypatch.setattr(
+        "aai_cli.commands.llm.stdio.iter_piped_stdin_lines", lambda: _InterruptIter()
+    )
+    monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
+    result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
+    assert result.exit_code == 0
+    assert "--follow needs transcript text piped on stdin" not in result.output
+
+
 def test_llm_follow_stops_cleanly_on_interrupt(monkeypatch):
     _auth()
     calls = []
diff --git a/tests/test_login.py b/tests/test_login.py
index 2a86c68b..e9fe3bff 100644
--- a/tests/test_login.py
+++ b/tests/test_login.py
@@ -59,6 +59,7 @@ def test_whoami_human_render_shows_detail_rows(monkeypatch, mocker):
 
 
 def test_whoami_unauthenticated_runs_login(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _fake_login_result)
     validate = mocker.patch(
         "aai_cli.commands.login.client.validate_key", autospec=True, return_value=True
@@ -124,7 +125,7 @@ def test_login_oauth_flow_failure_exits_nonzero(monkeypatch):
     from aai_cli.errors import APIError
 
     def boom():
-        raise APIError("Login timed out waiting for the browser.")
+        raise APIError("Login failed: the server returned an unexpected response.")
 
     monkeypatch.setattr("aai_cli.context.run_login_flow", boom)
     result = runner.invoke(app, ["login"])
@@ -132,6 +133,52 @@ def boom():
     assert config.get_api_key("default") is None
 
 
+def test_login_timeout_is_auth_typed_with_exit_4(monkeypatch):
+    # The loopback timeout surfaces as not_authenticated/exit 4, not api_error/1.
+    from aai_cli.errors import NotAuthenticated
+
+    def timed_out():
+        raise NotAuthenticated("Login timed out waiting for the browser.")
+
+    monkeypatch.setattr("aai_cli.context.run_login_flow", timed_out)
+    result = runner.invoke(app, ["login", "--json"])
+    assert result.exit_code == 4
+    payload = json.loads(result.output)
+    assert payload["error"]["type"] == "not_authenticated"
+    assert config.get_api_key("default") is None
+
+
+def test_login_empty_api_key_flag_is_usage_error(monkeypatch):
+    # `--api-key "$UNSET_VAR"` (an explicit empty value) must not silently fall
+    # into the browser flow.
+    monkeypatch.setattr(
+        "aai_cli.context.run_login_flow",
+        lambda: (_ for _ in ()).throw(AssertionError("empty --api-key must not start a browser")),
+    )
+    result = runner.invoke(app, ["login", "--api-key", ""])
+    assert result.exit_code == 2
+    assert "empty" in result.output
+    assert config.get_api_key("default") is None
+
+
+def test_login_whitespace_api_key_flag_is_usage_error(monkeypatch):
+    monkeypatch.setattr(
+        "aai_cli.context.run_login_flow",
+        lambda: (_ for _ in ()).throw(AssertionError("blank --api-key must not start a browser")),
+    )
+    result = runner.invoke(app, ["login", "--api-key", "   "])
+    assert result.exit_code == 2
+    assert config.get_api_key("default") is None
+
+
+def test_login_empty_api_key_flag_json_error_shape():
+    result = runner.invoke(app, ["login", "--api-key", "", "--json"])
+    assert result.exit_code == 2
+    payload = json.loads(result.output)
+    assert payload["error"]["type"] == "usage_error"
+    assert "--api-key" in payload["error"]["message"]
+
+
 def test_login_api_key_flag_still_bypasses_oauth(monkeypatch, mocker):
     monkeypatch.setattr(
         "aai_cli.context.run_login_flow",
@@ -281,16 +328,29 @@ def test_whoami_renders_human_table_reachable(mocker):
 
 def test_whoami_renders_human_table_rejected_key(mocker):
     # The non-JSON render path also covers the "key rejected" branch and the
-    # account/session "none" fallbacks (the em-dash placeholder).
+    # account/session "none" fallbacks (the em-dash placeholder). A rejected key
+    # is a failed preflight: the status still renders, but the exit code is 4.
     config.set_api_key("default", "sk_1234567890")
     mocker.patch("aai_cli.output.resolve_json", autospec=True, return_value=False)
     mocker.patch("aai_cli.commands.login.client.validate_key", autospec=True, return_value=False)
     result = runner.invoke(app, ["whoami"])
-    assert result.exit_code == 0
+    assert result.exit_code == 4
     assert "key rejected" in result.output
     assert "none" in result.output
 
 
+def test_whoami_rejected_key_exits_4_with_json_status_on_stdout(mocker):
+    # CI preflight contract: a rejected key keeps the rendered status (stdout, clean
+    # JSON) but signals failure via the auth exit code 4.
+    config.set_api_key("default", "sk_1234567890")
+    mocker.patch("aai_cli.commands.login.client.validate_key", autospec=True, return_value=False)
+    result = runner.invoke(app, ["whoami", "--json"])
+    assert result.exit_code == 4
+    data = json.loads(result.output)
+    assert data["reachable"] is False
+    assert data["profile"] == "default"
+
+
 def test_whoami_honors_env_api_key(monkeypatch, mocker):
     # A CI box authenticated only via ASSEMBLYAI_API_KEY (no keyring entry) must be
     # able to use whoami as a preflight check.
diff --git a/tests/test_sessions_command.py b/tests/test_sessions_command.py
index 93bf853f..5bfe3708 100644
--- a/tests/test_sessions_command.py
+++ b/tests/test_sessions_command.py
@@ -107,6 +107,7 @@ def test_sessions_get_renders_detail(monkeypatch, mocker):
 
 
 def test_sessions_without_session_runs_login(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     list_ = mocker.patch(
         "aai_cli.commands.sessions.ams.list_streaming", autospec=True, return_value={"data": []}
diff --git a/tests/test_source_validation.py b/tests/test_source_validation.py
index 83016826..5d979473 100644
--- a/tests/test_source_validation.py
+++ b/tests/test_source_validation.py
@@ -26,6 +26,57 @@ def test_resolve_audio_source_sample_explicit_and_missing(tmp_path):
     assert "--sample" in (exc.value.suggestion or "")
 
 
+def test_resolve_audio_source_rejects_explicit_source_plus_sample(tmp_path):
+    # Both an explicit source and --sample is a contradiction: neither may silently win.
+    clip = tmp_path / "clip.mp3"
+    clip.write_bytes(b"fake")
+    with pytest.raises(UsageError) as exc:
+        client.resolve_audio_source(str(clip), sample=True)
+    assert exc.value.exit_code == 2
+    assert exc.value.message == "An audio source and --sample cannot be combined."
+    assert exc.value.suggestion == "Pass the file/URL or --sample, not both."
+
+
+def test_resolve_audio_source_source_plus_sample_rejected_even_without_checks():
+    # The conflict fires before any existence check, including --show-code paths.
+    with pytest.raises(UsageError) as exc:
+        client.resolve_audio_source("missing.mp3", sample=True, check_local=False)
+    assert exc.value.message == "An audio source and --sample cannot be combined."
+
+
+def test_transcribe_source_plus_sample_exits_2(mocker, tmp_path):
+    # No key configured: the conflict must fail before credential resolution.
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    clip = tmp_path / "clip.mp3"
+    clip.write_bytes(b"fake")
+    result = runner.invoke(app, ["transcribe", str(clip), "--sample"])
+    assert result.exit_code == 2
+    assert "An audio source and --sample cannot be combined." in result.output
+    assert "starting browser login" not in result.output
+    tx.assert_not_called()
+
+
+def test_resolve_audio_source_rejects_directory(tmp_path):
+    # Path(...).exists() is true for a directory; it must still be rejected up front.
+    with pytest.raises(CLIError) as exc:
+        client.resolve_audio_source(str(tmp_path), sample=False)
+    assert exc.value.error_type == "not_a_file"
+    assert exc.value.exit_code == 2
+    assert exc.value.message == f"Not a file: {tmp_path}"
+    assert exc.value.suggestion == "Pass an audio file, not a directory."
+
+
+def test_transcribe_directory_source_fails_before_credentials(mocker, tmp_path):
+    # No key configured: a directory must read as "not a file", never trigger a login
+    # (or an upload attempt).
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(app, ["transcribe", str(tmp_path)])
+    assert result.exit_code == 2
+    assert f"Not a file: {tmp_path}" in result.output
+    assert "starting browser login" not in result.output
+    tx.assert_not_called()
+
+
 def test_resolve_audio_source_missing_local_file_fails_cleanly():
     with pytest.raises(CLIError) as exc:
         client.resolve_audio_source("no-such-clip.mp3", sample=False)
diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py
index 86cb80ff..bdbf6c8f 100644
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@@ -89,6 +89,7 @@ def test_transcribe_json_output(mocker):
 
 
 def test_transcribe_unauthenticated_runs_login_then_transcribes(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     tx = mocker.patch(
         "aai_cli.commands.transcribe.client.transcribe",
@@ -478,6 +479,120 @@ def _boom(*a, **k):
     assert '"transcript_id": transcript.id' in result.output
 
 
+def test_transcribe_show_code_output_srt_generates_export(monkeypatch):
+    # -o srt must be reflected in the generated code (not silently dropped).
+    def _boom(*a, **k):
+        raise AssertionError("must not transcribe")
+
+    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    result = runner.invoke(app, ["transcribe", "--sample", "-o", "srt", "--show-code"])
+    assert result.exit_code == 0
+    compile(result.output, "<generated>", "exec")  # the emitted script is runnable
+    assert "print(transcript.export_subtitles_srt())" in result.output
+    assert "print(transcript.text)" not in result.output
+
+
+def test_transcribe_show_code_output_utterances_generates_loop(monkeypatch):
+    def _boom(*a, **k):
+        raise AssertionError("must not transcribe")
+
+    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    result = runner.invoke(app, ["transcribe", "--sample", "-o", "utterances", "--show-code"])
+    assert result.exit_code == 0
+    compile(result.output, "<generated>", "exec")
+    assert 'print(f"Speaker {utt.speaker}: {utt.text}")' in result.output
+
+
+def test_transcribe_negative_audio_start_exits_2(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-start", "-100"])
+    assert result.exit_code == 2
+    tx.assert_not_called()
+
+
+def test_transcribe_language_code_with_detection_exits_2(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(
+        app,
+        ["transcribe", "audio.mp3", "--language-code", "en_us", "--language-detection"],
+    )
+    assert result.exit_code == 2
+    assert "--language-code and --language-detection can't be combined." in result.output
+    tx.assert_not_called()
+
+
+def test_transcribe_language_flags_alone_are_accepted(mocker):
+    # Only the combination is contradictory; each flag works on its own.
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--language-code", "en_us"])
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].language_code == "en_us"
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--language-detection"])
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].language_detection is True
+
+
+def test_transcribe_speakers_expected_without_labels_exits_2(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--speakers-expected", "2"])
+    assert result.exit_code == 2
+    assert "--speakers-expected only applies when diarization is enabled." in result.output
+    assert "Add --speaker-labels." in result.output
+    tx.assert_not_called()
+
+
+def test_transcribe_speakers_expected_with_labels_is_accepted(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(
+        app, ["transcribe", "audio.mp3", "--speaker-labels", "--speakers-expected", "2"]
+    )
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].speakers_expected == 2
+
+
+def test_transcribe_speakers_expected_with_config_speaker_labels_is_accepted(mocker):
+    # Diarization enabled through the --config escape hatch counts too: the check
+    # runs on the merged config, not just the curated flag.
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(
+        app,
+        ["transcribe", "audio.mp3", "--config", "speaker_labels=true", "--speakers-expected", "2"],
+    )
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].speakers_expected == 2
+
+
+def test_transcribe_unknown_pii_policy_exits_2_and_lists_valid(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(
+        app,
+        ["transcribe", "audio.mp3", "--redact-pii", "--redact-pii-policy", "not_a_policy"],
+    )
+    assert result.exit_code == 2
+    assert "Unknown PII policy(s) ['not_a_policy']" in result.output
+    assert "person_name" in result.output  # the valid values are listed
+    tx.assert_not_called()
+
+
 def test_transcribe_renders_summary_human(monkeypatch, mocker):
     _auth()
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
diff --git a/tests/test_transcripts.py b/tests/test_transcripts.py
index ebfaaf13..802aa5ad 100644
--- a/tests/test_transcripts.py
+++ b/tests/test_transcripts.py
@@ -92,6 +92,7 @@ def test_list_renders_rows(mocker):
 
 
 def test_list_unauthenticated_runs_login(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     rows = [{"id": "t1", "status": "completed"}]
     list_ = mocker.patch(
@@ -104,6 +105,17 @@ def test_list_unauthenticated_runs_login(monkeypatch, mocker):
     assert "Run the same command again" in result.output
 
 
+def test_list_limit_must_be_at_least_one(mocker):
+    # min=1 on --limit: 0 and negatives are rejected client-side, before any request.
+    config.set_api_key("default", "sk_live")
+    list_ = mocker.patch("aai_cli.commands.transcripts.client.list_transcripts", autospec=True)
+    for bad in ("0", "-3"):
+        result = runner.invoke(app, ["transcripts", "list", "--limit", bad])
+        assert result.exit_code == 2
+        assert "limit" in result.output.lower()
+    list_.assert_not_called()
+
+
 def test_list_human_mode_renders_table(monkeypatch, mocker):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
diff --git a/tests/test_youtube.py b/tests/test_youtube.py
index f9625a9d..37a10ba8 100644
--- a/tests/test_youtube.py
+++ b/tests/test_youtube.py
@@ -56,6 +56,47 @@ def prepare_filename(self, info):
     assert captured["download"] is True
 
 
+def test_download_audio_routes_ytdlp_output_to_silent_logger(tmp_path, monkeypatch, capsys):
+    # yt-dlp's default logger writes its own "ERROR: …" line to stderr before the CLI's
+    # clean error, duplicating the message; the passed logger must swallow everything.
+    import logging
+
+    captured = {}
+
+    class FakeYDL:
+        def __init__(self, opts):
+            captured["opts"] = opts
+
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *exc):
+            return False
+
+        def extract_info(self, url, download):
+            (tmp_path / "x.m4a").write_bytes(b"audio")
+            return {"id": "x", "ext": "m4a"}
+
+        def prepare_filename(self, info):
+            return str(tmp_path / "x.m4a")
+
+    _fake_ytdlp(monkeypatch, FakeYDL)
+    youtube.download_audio("https://youtu.be/x", tmp_path)
+    logger = captured["opts"]["logger"]
+    # Structurally quiet: no propagation to root, only swallow-everything handlers.
+    assert logger.name == "aai_cli.youtube.yt_dlp"
+    assert logger.propagate is False
+    assert logger.handlers
+    assert all(isinstance(h, logging.NullHandler) for h in logger.handlers)
+    # Behaviorally quiet: even an ERROR record produces no console output.
+    logger.error("ERROR: [youtube] nope: Video unavailable")
+    logger.warning("WARNING: noisy")
+    logger.debug("[debug] noise")
+    out = capsys.readouterr()
+    assert out.err == ""
+    assert out.out == ""
+
+
 def test_download_audio_falls_back_to_landed_file(tmp_path, monkeypatch):
     landed = tmp_path / "actual.webm"
 

From 047455b6dc3b040942b5b54c5fa325af14a2ca15 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 04:52:48 +0000
Subject: [PATCH 02/11] Fix QA findings in stream/agent: show-code fidelity,
 error hygiene

- stream --show-code generates code for the actual source: files/URLs
  decode through ffmpeg to PCM at the requested rate (mirroring the run
  path), stdin reads sys.stdin.buffer, mic stays mic; the hardcoded
  16 kHz override is gone and --sample-rate/--config sample_rate win
- agent <file> --show-code warns on stderr that the snippet is mic-based
- stream <youtube-url> --show-code is a clean usage error
- websockets logger silenced so reader-thread EOFs never dump tracebacks
- agent maps a handshake 403 to api_error like stream; 401/policy-close
  still read as a rejected key
- --json with -o text is rejected on stream and agent
- the agent headphones notice routes to stderr in non-JSON modes
- mic-open failures name the default microphone and suggest fixes

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 aai_cli/agent/render.py       |  12 ++--
 aai_cli/agent/session.py      |  41 +++++++++++-
 aai_cli/code_gen/__init__.py  |   8 ++-
 aai_cli/code_gen/stream.py    | 123 +++++++++++++++++++++++++++++-----
 aai_cli/commands/agent.py     |  15 ++++-
 aai_cli/commands/stream.py    |  38 +++++++++--
 aai_cli/microphone.py         |  12 +++-
 aai_cli/streaming/session.py  |  16 ++++-
 tests/test_agent_command.py   |  56 ++++++++++++++++
 tests/test_agent_render.py    |  19 +++++-
 tests/test_agent_session.py   |  89 ++++++++++++++++++++++++
 tests/test_code_gen_stream.py | 117 ++++++++++++++++++++++++++++++++
 tests/test_microphone.py      |  20 ++++++
 tests/test_stream_command.py  |  99 +++++++++++++++++++++++++++
 14 files changed, 628 insertions(+), 37 deletions(-)
 create mode 100644 tests/test_code_gen_stream.py

diff --git a/aai_cli/agent/render.py b/aai_cli/agent/render.py
index e10c4351..5c62cf7d 100644
--- a/aai_cli/agent/render.py
+++ b/aai_cli/agent/render.py
@@ -40,13 +40,15 @@ def connected(self) -> None:
             self._line(Text("Connected — start talking. (Ctrl-C to stop)", style="aai.muted"))
 
     def notice(self, text: str) -> None:
-        """Print a human-facing notice (suppressed in JSON; to stderr in text mode)."""
+        """Print a human-facing notice: suppressed in JSON, to stderr otherwise.
+
+        Stderr in *every* non-JSON mode (not just ``-o text``): the default human
+        mode is also piped sometimes (``aai agent | head``), and a notice on stdout
+        would be consumed as transcript data there.
+        """
         if self.json_mode:
             return
-        if self.text_mode:
-            self._status(text.rstrip("\n"))
-        else:
-            self._line(text.rstrip("\n"))
+        self._status(text.rstrip("\n"))
 
     # --- user --------------------------------------------------------------
     def user_partial(self, text: str) -> None:
diff --git a/aai_cli/agent/session.py b/aai_cli/agent/session.py
index 77e52674..f5375430 100644
--- a/aai_cli/agent/session.py
+++ b/aai_cli/agent/session.py
@@ -3,6 +3,7 @@
 import base64
 import contextlib
 import json
+import logging
 import threading
 from collections.abc import Callable
 from dataclasses import dataclass
@@ -31,6 +32,9 @@ def ws_url() -> str:
 # session.error codes that mean the connection is unauthorized -> exit 2.
 _AUTH_ERROR_CODES = {"UNAUTHORIZED", "FORBIDDEN"}
 
+# A pre-upgrade HTTP 403 on the WebSocket handshake (see _is_rejected_key).
+_HTTP_FORBIDDEN = 403
+
 # The websocket connection, the `connect` factory, and the renderer/player/mic I/O
 # objects come from libraries/modules with no usable type stubs. Alias that untyped
 # boundary here so each role is named in signatures and `Any` stays in one place.
@@ -189,10 +193,44 @@ def _send_audio_loop(ws: _WebSocket, session: VoiceAgentSession, mic: _IO) -> No
             return
 
 
+# The sync websockets client logs through these; both are silenced for the session
+# (the parent covers any future child logger, the client logger is the one that fires).
+_WEBSOCKETS_LOGGERS = ("websockets", "websockets.client")
+
+
+def _silence_websockets_logging() -> None:
+    """Keep websockets' internal logging off the user's stderr for the session.
+
+    The sync client's background reader thread logs unhandled teardown errors (e.g.
+    ``EOFError: stream ended``) as "unexpected internal error" + traceback through the
+    ``websockets.client`` logger, which would land on stderr right next to our clean
+    CLIError. Those internals are never user-actionable from the CLI, so raise the
+    loggers above every level they emit at. Idempotent: re-setting the level is a no-op.
+    """
+    for name in _WEBSOCKETS_LOGGERS:
+        logging.getLogger(name).setLevel(logging.CRITICAL)
+
+
+def _is_rejected_key(exc: Exception) -> bool:
+    """Is this connect/session failure auth-shaped (the key itself was rejected)?
+
+    Mirrors how `stream` classifies handshake failures: a plain HTTP 403 on the
+    WebSocket upgrade stays an API error there ("Streaming error: WebSocket handshake
+    rejected (HTTP 403)"), so it must not become "Your API key was rejected" here —
+    403 also covers non-credential blocks (WAF, region, plan). Only 401, the Voice
+    Agent's 1008 policy-violation close, or an explicitly auth-worded message
+    (`is_auth_failure`'s text hints) count as a rejected key.
+    """
+    status = getattr(getattr(exc, "response", None), "status_code", None)
+    if status == _HTTP_FORBIDDEN:
+        return False
+    return is_auth_failure(exc)
+
+
 def _auth_or_api_error(exc: Exception, message: str) -> CLIError:
     """Map a connect/session exception to the right CLIError: a rejected key becomes
     auth_failure(), anything else becomes APIError(f"{message}: {exc}")."""
-    if is_auth_failure(exc):
+    if _is_rejected_key(exc):
         return auth_failure()
     return APIError(f"{message}: {exc}")
 
@@ -243,6 +281,7 @@ def run_session(
     the agent's first reply to the spoken input and the capture thread waits for
     session.ready before streaming the source.
     """
+    _silence_websockets_logging()
     if connect is None:
         from websockets.sync.client import connect
 
diff --git a/aai_cli/code_gen/__init__.py b/aai_cli/code_gen/__init__.py
index 5ed4cbaa..7758414d 100644
--- a/aai_cli/code_gen/__init__.py
+++ b/aai_cli/code_gen/__init__.py
@@ -42,11 +42,15 @@ def stream(
     merged: dict[str, object],
     *,
     llm: dict[str, object] | None = None,
+    source: str | None = None,
 ) -> str:
     """Generate runnable Python that reproduces this streaming invocation.
 
-    With `llm` (a dict of ``prompts``/``model``/``max_tokens``/``interval``), the script
+    ``source`` mirrors the CLI argument: ``None`` streams the microphone, ``"-"``
+    reads raw PCM16 from stdin, and anything else is a file path/URL decoded through
+    ffmpeg — so the generated script reads the same input the real run would. With
+    `llm` (a dict of ``prompts``/``model``/``max_tokens``/``interval``), the script
     refreshes a prompt-chain over the growing transcript every ``interval`` seconds (0 =
     every turn) — the live sibling of `transcribe --llm` — mirroring how `stream --llm` runs.
     """
-    return _stream.render(merged, llm=llm)
+    return _stream.render(merged, llm=llm, source=source)
diff --git a/aai_cli/code_gen/stream.py b/aai_cli/code_gen/stream.py
index e02bd0fd..91b036aa 100644
--- a/aai_cli/code_gen/stream.py
+++ b/aai_cli/code_gen/stream.py
@@ -15,7 +15,7 @@
     "TurnEvent",
 ]
 
-_PREAMBLE = """import os
+_PREAMBLE = """{stdlib_imports}
 
 import assemblyai as aai
 from assemblyai.streaming.v3 import (
@@ -39,8 +39,7 @@ def on_turn(client: StreamingClient, event: TurnEvent) -> None:
 client.on(StreamingEvents.Turn, on_turn)
 """
 
-_LLM_PREAMBLE = """import os
-import time
+_LLM_PREAMBLE = """{stdlib_imports}
 
 import assemblyai as aai
 from assemblyai.streaming.v3 import (
@@ -108,9 +107,9 @@ def on_turn(client: StreamingClient, event: TurnEvent) -> None:
 """
 
 _FOOTER = """
-print("Listening… press Ctrl-C to stop.")
+{setup}print({banner})
 try:
-    client.stream(aai.extras.MicrophoneStream(sample_rate={rate}))
+    client.stream({stream_expr})
 finally:
     client.disconnect(terminate=True)
 """
@@ -118,14 +117,56 @@ def on_turn(client: StreamingClient, event: TurnEvent) -> None:
 # Same as _FOOTER, but flushes a closing summary (incl. on Ctrl-C) so the turns since the
 # last interval tick are reflected before disconnecting.
 _LLM_FOOTER = """
-print("Listening… press Ctrl-C to stop.")
+{setup}print({banner})
 try:
-    client.stream(aai.extras.MicrophoneStream(sample_rate={rate}))
+    client.stream({stream_expr})
 finally:
     summarize(final=True)
     client.disconnect(terminate=True)
 """
 
+# Source-specific audio plumbing. The v3 client accepts any iterable of PCM16 byte
+# chunks, so the non-mic variants define a small generator and stream that instead of
+# aai.extras.MicrophoneStream. Both mirror what the CLI itself runs: StdinSource reads
+# raw PCM16 off stdin, and FileSource decodes any file/URL through ffmpeg.
+_STDIN_SETUP = """
+# Raw PCM16 mono at {rate} Hz piped on stdin, e.g.:
+#   ffmpeg -i input.mp4 -f s16le -acodec pcm_s16le -ac 1 -ar {rate} - | python script.py
+def stdin_chunks():
+    chunk_bytes = {rate} * 2 // 10  # ~100 ms of 16-bit mono PCM
+    while True:
+        data = sys.stdin.buffer.read(chunk_bytes)
+        if not data:
+            return
+        yield data
+
+
+"""
+
+_FILE_SETUP = """
+# Decode the source (any local file or http(s) URL ffmpeg can read) to PCM16 mono at
+# {rate} Hz and pace it at ~real time — the same pipeline `aai stream <file>` runs.
+def file_chunks():
+    chunk_bytes = {rate} * 2 // 10  # ~100 ms of 16-bit mono PCM
+    ffmpeg = subprocess.Popen(
+        ["ffmpeg", "-nostdin", "-loglevel", "error", "-i", {source},
+         "-f", "s16le", "-acodec", "pcm_s16le", "-ac", "1", "-ar", "{rate}", "-"],
+        stdout=subprocess.PIPE,
+    )
+    try:
+        while True:
+            data = ffmpeg.stdout.read(chunk_bytes)
+            if not data:
+                return
+            yield data
+            time.sleep(len(data) / ({rate} * 2))  # ~real-time pacing
+    finally:
+        ffmpeg.terminate()
+        ffmpeg.wait()
+
+
+"""
+
 
 def _imports_block(merged: dict[str, object]) -> str:
     """Sorted streaming-class import lines; SpeechModel only when a model kwarg is emitted."""
@@ -135,7 +176,7 @@ def _imports_block(merged: dict[str, object]) -> str:
     return "\n".join(f"    {name}," for name in sorted(names))
 
 
-def _build_preamble(imports: str, llm: dict[str, object] | None) -> str:
+def _build_preamble(imports: str, llm: dict[str, object] | None, stdlib_imports: str) -> str:
     """Pick and fill the plain vs. LLM-Gateway preamble for the given imports.
 
     Hosts come from the active environment, so a sandbox run generates a script
@@ -145,6 +186,7 @@ def _build_preamble(imports: str, llm: dict[str, object] | None) -> str:
     if llm:
         prompts = "\n".join(f"    {p!r}," for p in cast("list[str]", llm["prompts"]))
         return _LLM_PREAMBLE.format(
+            stdlib_imports=stdlib_imports,
             imports=imports,
             api_host=env.streaming_host,
             base_url=env.llm_gateway_base,
@@ -153,7 +195,9 @@ def _build_preamble(imports: str, llm: dict[str, object] | None) -> str:
             max_tokens=llm["max_tokens"],
             interval=llm.get("interval", 0.0),
         )
-    return _PREAMBLE.format(imports=imports, api_host=env.streaming_host)
+    return _PREAMBLE.format(
+        stdlib_imports=stdlib_imports, imports=imports, api_host=env.streaming_host
+    )
 
 
 def _build_connect(merged: dict[str, object]) -> str:
@@ -165,16 +209,61 @@ def _build_connect(merged: dict[str, object]) -> str:
     return f"client.connect(\n    StreamingParameters(\n{kwargs}\n    )\n)"
 
 
-def render(merged: dict[str, object], *, llm: dict[str, object] | None = None) -> str:
-    """Generate a runnable microphone-streaming script with the given params.
+def _source_parts(source: str | None, rate: object) -> tuple[set[str], str, str, str]:
+    """The (stdlib imports, setup block, banner text, stream expression) for a source.
 
-    With `llm`, the script transforms the live transcript through the LLM Gateway,
-    refreshing a prompt chain on every finalized turn (the live sibling of
-    `transcribe --llm`).
+    ``source`` mirrors the CLI argument: ``None`` is the microphone, ``"-"`` is raw
+    PCM16 on stdin, anything else is a file path or URL decoded through ffmpeg.
+    """
+    if source == "-":
+        return (
+            {"sys"},
+            _STDIN_SETUP.format(rate=rate),
+            f"Reading raw PCM16 mono audio at {rate} Hz from stdin…",
+            "stdin_chunks()",
+        )
+    if source is not None:
+        return (
+            {"subprocess", "time"},
+            _FILE_SETUP.format(rate=rate, source=repr(source)),
+            f"Streaming {source}…",
+            "file_chunks()",
+        )
+    return (
+        set(),
+        "",
+        "Listening… press Ctrl-C to stop.",
+        (f"aai.extras.MicrophoneStream(sample_rate={rate})"),
+    )
+
+
+def render(
+    merged: dict[str, object],
+    *,
+    llm: dict[str, object] | None = None,
+    source: str | None = None,
+) -> str:
+    """Generate a runnable streaming script with the given params.
+
+    ``source`` selects the audio input the script reads, mirroring the CLI run path:
+    ``None`` captures the microphone, ``"-"`` reads raw PCM16 from stdin, and anything
+    else is a file path or URL decoded to PCM through ffmpeg (the same pipeline a real
+    `aai stream <file>` run uses). With `llm`, the script transforms the live
+    transcript through the LLM Gateway, refreshing a prompt chain on every finalized
+    turn (the live sibling of `transcribe --llm`).
     """
-    preamble = _build_preamble(_imports_block(merged), llm)
-    # Mic capture rate must match StreamingParameters.sample_rate, else audio is corrupt.
+    # Capture/decode rate must match StreamingParameters.sample_rate, else audio is corrupt.
     rate = merged.get("sample_rate", 16000)
+    source_stdlib, setup, banner, stream_expr = _source_parts(source, rate)
+    stdlib = {"os"} | source_stdlib | ({"time"} if llm else set())
+    stdlib_imports = "\n".join(f"import {name}" for name in sorted(stdlib))
+    preamble = _build_preamble(_imports_block(merged), llm, stdlib_imports)
     connect = _build_connect(merged)
     footer = _LLM_FOOTER if llm else _FOOTER
-    return preamble + "\n" + connect + "\n" + footer.format(rate=rate)
+    return (
+        preamble
+        + "\n"
+        + connect
+        + "\n"
+        + footer.format(setup=setup, banner=repr(banner), stream_expr=stream_expr)
+    )
diff --git a/aai_cli/commands/agent.py b/aai_cli/commands/agent.py
index e6240f97..a7cd7d3e 100644
--- a/aai_cli/commands/agent.py
+++ b/aai_cli/commands/agent.py
@@ -19,6 +19,7 @@
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import CLIError, UsageError
 from aai_cli.help_text import examples_epilog
+from aai_cli.streaming.session import validate_output_flags
 from aai_cli.streaming.sources import FileSource
 
 app = typer.Typer()
@@ -56,7 +57,8 @@ def _open_audio(
     # One full-duplex stream for mic + speaker: macOS rejects two separate
     # streams on a device, which silently kills capture.
     duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device)
-    # notice() self-suppresses in JSON mode and routes to stderr in text mode.
+    # notice() self-suppresses in JSON mode and routes to stderr otherwise, so a
+    # piped `aai agent | …` never reads this advisory as transcript data.
     renderer.notice(
         "Use headphones — the mic stays open while the agent speaks, "
         "so speakers would let it hear itself.\n"
@@ -131,6 +133,7 @@ def agent(
         raise typer.Exit(code=0)
 
     def body(state: AppState, json_mode: bool) -> None:
+        validate_output_flags(json_mode=json_mode, output_field=output_field)
         text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode)
         if voice not in VOICES:
             raise UsageError(
@@ -142,6 +145,16 @@ def body(state: AppState, json_mode: bool) -> None:
         if show_code:
             # Print-only: emit the equivalent agent script from the flags and exit
             # without authenticating or opening audio. Raw stdout for `> script.py`.
+            if source or sample:
+                # A faithful file-driven agent script would need the CLI's whole
+                # ffmpeg-decode + ready-gate + exit-after-reply machinery, which is
+                # impractical to inline; the snippet is microphone-driven, so say so
+                # on stderr instead of silently dropping the source. stderr keeps
+                # `--show-code > script.py` byte-clean.
+                output.error_console.print(
+                    "[aai.warn]Note:[/aai.warn] the generated script uses the microphone; "
+                    "it does not stream the audio source you passed."
+                )
             output.print_code(code_gen.agent(voice, system_prompt_text, greeting))
             return
 
diff --git a/aai_cli/commands/stream.py b/aai_cli/commands/stream.py
index e413ca7d..5d4c397c 100644
--- a/aai_cli/commands/stream.py
+++ b/aai_cli/commands/stream.py
@@ -25,7 +25,12 @@
 from aai_cli.microphone import MicrophoneSource
 from aai_cli.streaming.macos import MacSystemAudioSource
 from aai_cli.streaming.render import StreamRenderer
-from aai_cli.streaming.session import SourceOptions, StreamSession, validate_sources
+from aai_cli.streaming.session import (
+    SourceOptions,
+    StreamSession,
+    validate_output_flags,
+    validate_sources,
+)
 from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource
 
 app = typer.Typer()
@@ -343,6 +348,7 @@ def stream(
     """
 
     def body(state: AppState, json_mode: bool) -> None:
+        validate_output_flags(json_mode=json_mode, output_field=output_field)
         text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode)
         opts = SourceOptions(
             source=source,
@@ -380,20 +386,40 @@ def body(state: AppState, json_mode: bool) -> None:
         base_flags.update(config_builder.auth_header_flags(webhook_auth_header))
 
         if show_code:
-            # Print-only: emit the canonical microphone-streaming script (16 kHz) from
-            # the flags and exit without opening audio or authenticating. Raw stdout so
-            # `--show-code > script.py` yields a runnable file.
+            # Print-only: emit a script faithful to the requested source — mic
+            # (default), stdin (-), or a file/URL — and exit without opening audio or
+            # authenticating. Raw stdout so `--show-code > script.py` is runnable.
+            # The same source validation as a real run, so e.g. a file + --sample-rate
+            # conflict errors here too instead of silently generating mic code.
+            validate_sources(opts, has_llm=bool(llm_prompt), text_mode=text_mode)
             if opts.from_system_audio:
                 raise UsageError("--show-code does not support macOS system audio capture yet.")
+            if opts.source and youtube.is_youtube_url(opts.source):
+                raise UsageError(
+                    "--show-code does not support YouTube sources yet.",
+                    suggestion="Download the audio first (e.g. yt-dlp) and pass the local file.",
+                )
+            code_source: str | None = None
+            if opts.from_stdin:
+                code_source = "-"
+            elif opts.from_file:
+                # check_local=False: generating code for a file you don't have yet is fine.
+                code_source = client.resolve_audio_source(
+                    opts.source, sample=opts.sample, check_local=False
+                )
             merged = config_builder.merge_streaming_params(
-                flags=base_flags | {"sample_rate": TARGET_RATE},
+                # sample_rate precedence: --sample-rate (None is dropped by the merge)
+                # beats --config/--config-file, which beat the 16 kHz default below —
+                # so an explicit `--config sample_rate=…` is honored, not overridden.
+                flags=base_flags | {"sample_rate": opts.sample_rate},
                 overrides=config_kv,
                 config_file=config_file,
             )
+            merged.setdefault("sample_rate", TARGET_RATE)
             gateway = code_gen.gateway_options(
                 list(llm_prompt or []), model, max_tokens, interval=llm_interval
             )
-            output.print_code(code_gen.stream(merged, llm=gateway))
+            output.print_code(code_gen.stream(merged, llm=gateway, source=code_source))
             return
 
         # Validate the requested sources (including that a local file exists) before
diff --git a/aai_cli/microphone.py b/aai_cli/microphone.py
index 1ea500da..ba45ba9e 100644
--- a/aai_cli/microphone.py
+++ b/aai_cli/microphone.py
@@ -153,10 +153,20 @@ def __iter__(self) -> Iterator[bytes]:
         except ImportError as exc:
             raise audio_missing_error() from exc
         except Exception as exc:
+            # "device None" reads like a bug; name the default mic in plain words.
+            target = (
+                "the default microphone"
+                if self.device is None
+                else f"microphone device {self.device}"
+            )
             raise CLIError(
-                f"Could not open the microphone (device {self.device}): {exc}",
+                f"Could not open {target}: {exc}",
                 error_type="mic_error",
                 exit_code=1,
+                suggestion=(
+                    "Check your OS microphone permissions for this terminal, or pick "
+                    "another input with --device (list devices: python -m sounddevice)."
+                ),
             ) from exc
         if self._on_open is not None:
             self._on_open()  # the device is open and recording now
diff --git a/aai_cli/streaming/session.py b/aai_cli/streaming/session.py
index bc6b2c4e..1ef8aa04 100644
--- a/aai_cli/streaming/session.py
+++ b/aai_cli/streaming/session.py
@@ -9,7 +9,7 @@
 
 import typer
 
-from aai_cli import client, config_builder, llm, output
+from aai_cli import choices, client, config_builder, llm, output
 from aai_cli.errors import CLIError, UsageError
 from aai_cli.follow import FollowRenderer
 from aai_cli.streaming.render import StreamRenderer, speaker_prefix
@@ -51,6 +51,16 @@ def has_capture_overrides(self) -> bool:
         return self.sample_rate is not None or self.device is not None
 
 
+def validate_output_flags(*, json_mode: bool, output_field: choices.TextOrJson | None) -> None:
+    """Reject --json combined with -o text, shared by `stream` and `agent`.
+
+    Same precedent as --llm + -o text: contradictory output shapes are a clean
+    usage error, not a silent coin-flip between plain text and NDJSON.
+    """
+    if json_mode and output_field is choices.TextOrJson.text:
+        raise UsageError("--json can't be combined with -o text; pick one output format.")
+
+
 def validate_sources(opts: SourceOptions, *, has_llm: bool, text_mode: bool) -> None:
     """Reject flag combinations that can't be honored, before any audio is opened."""
     if opts.system_audio and opts.system_audio_only:
@@ -72,6 +82,10 @@ def _validate_input_source(opts: SourceOptions) -> None:
                 "--sample-rate and --device require microphone input; use --system-audio."
             )
     elif opts.from_stdin:
+        if opts.sample:
+            # The stdin branch wins dispatch over --sample, so without this the
+            # hosted clip would be silently ignored in favor of the pipe.
+            raise UsageError("- (stdin) cannot be combined with --sample.")
         if opts.device is not None:
             raise UsageError("--device applies only to microphone input.")
     elif opts.from_file and opts.has_capture_overrides:
diff --git a/tests/test_agent_command.py b/tests/test_agent_command.py
index 96794f6f..abe38229 100644
--- a/tests/test_agent_command.py
+++ b/tests/test_agent_command.py
@@ -1,5 +1,7 @@
 import json
 
+import click.testing
+import typer.main
 from typer.testing import CliRunner
 
 from aai_cli import config
@@ -9,6 +11,11 @@
 runner = CliRunner()
 
 
+def _invoke_split(args):
+    """Invoke with stdout/stderr captured separately (typer's runner always mixes)."""
+    return click.testing.CliRunner(mix_stderr=False).invoke(typer.main.get_command(app), args)
+
+
 def _login_result():
     return LoginResult(
         api_key="sk_from_oauth", session_jwt="jwt", session_token="tok", account_id=7
@@ -35,6 +42,7 @@ def fake_run_session(*a, **k):
 
 
 def test_agent_unauthenticated_runs_login(monkeypatch):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: f"filesrc:{src}")
 
@@ -255,6 +263,54 @@ def test_agent_show_code_prints_without_session(monkeypatch):
     assert 'os.environ["ASSEMBLYAI_API_KEY"]' in result.output
 
 
+def test_agent_show_code_file_source_warns_on_stderr(monkeypatch):
+    # No faithful file-driven agent snippet exists yet; the mic-driven script must
+    # come with an explicit stderr note instead of silently ignoring the source.
+    def _boom(*a, **k):
+        raise AssertionError("must not run a session")
+
+    monkeypatch.setattr("aai_cli.commands.agent.run_session", _boom)
+    result = _invoke_split(["agent", "clip.wav", "--show-code"])
+    assert result.exit_code == 0
+    assert "uses the microphone" in result.stderr
+    # (the console wraps the line, so assert a fragment that fits in 80 cols)
+    assert "does not stream the audio" in result.stderr
+    assert "uses the microphone" not in result.stdout  # stdout stays a clean script
+    compile(result.stdout, "<show-code>", "exec")
+
+
+def test_agent_show_code_sample_warns_on_stderr():
+    result = _invoke_split(["agent", "--sample", "--show-code"])
+    assert result.exit_code == 0
+    assert "uses the microphone" in result.stderr
+
+
+def test_agent_show_code_mic_emits_no_warning():
+    result = _invoke_split(["agent", "--show-code"])
+    assert result.exit_code == 0
+    assert result.stderr == ""  # nothing to warn about: the script matches the run
+    compile(result.stdout, "<show-code>", "exec")
+
+
+def test_agent_json_with_text_output_is_usage_error():
+    # Contradictory output shapes (--json + -o text) are rejected like stream's.
+    result = runner.invoke(app, ["agent", "--json", "-o", "text"])
+    assert result.exit_code == 2
+    assert "can't be combined with -o text" in result.output
+
+
+def test_agent_headphones_notice_routes_to_stderr(monkeypatch):
+    # `aai agent | head` must not eat the advisory as transcript data: in the
+    # default human mode the notice goes to stderr, stdout stays transcript-only.
+    config.set_api_key("default", "sk_live")
+    monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
+    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    result = _invoke_split(["agent"])
+    assert result.exit_code == 0
+    assert "headphones" in result.stderr.lower()
+    assert "headphones" not in result.stdout.lower()
+
+
 def test_agent_show_code_ignores_json_flag(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not run a session")
diff --git a/tests/test_agent_render.py b/tests/test_agent_render.py
index c697d8c9..9821fcd8 100644
--- a/tests/test_agent_render.py
+++ b/tests/test_agent_render.py
@@ -131,10 +131,23 @@ def test_human_close_commits_open_partial():
     assert "half a sentence" in buf.getvalue()  # committed, not dropped
 
 
-def test_human_notice_rendered():
-    r, buf = _human()
+def test_human_notice_goes_to_stderr_not_stdout():
+    # Human (default) mode is also piped sometimes (`aai agent | head`); the notice
+    # must land on stderr in every non-JSON mode so stdout carries only transcript.
+    out, err = io.StringIO(), io.StringIO()
+    console = theme.make_console(file=out, force_terminal=True, width=80)
+    r = AgentRenderer(json_mode=False, out=out, err=err, console=console)
     r.notice("Half-duplex note.\n")
-    assert "Half-duplex note." in buf.getvalue()
+    assert "Half-duplex note." in err.getvalue()
+    assert out.getvalue() == ""
+
+
+def test_json_notice_is_suppressed():
+    out, err = io.StringIO(), io.StringIO()
+    r = AgentRenderer(json_mode=True, out=out, err=err)
+    r.notice("Half-duplex note.\n")
+    assert out.getvalue() == ""
+    assert err.getvalue() == ""
 
 
 def test_human_connected_and_stopped_announce():
diff --git a/tests/test_agent_session.py b/tests/test_agent_session.py
index ae0821c4..dce45d31 100644
--- a/tests/test_agent_session.py
+++ b/tests/test_agent_session.py
@@ -1,9 +1,12 @@
 import base64
 import json
+import logging
+import types
 
 import pytest
 
 from aai_cli.agent.session import (
+    _WEBSOCKETS_LOGGERS,
     AgentRunConfig,
     VoiceAgentSession,
     _send_audio_loop,
@@ -309,6 +312,92 @@ def start(self):
     assert player.closed is False  # never opened, so never closed
 
 
+class _HandshakeRejected(Exception):
+    """Mimics websockets' InvalidStatus: a structured HTTP status on ``.response``."""
+
+    def __init__(self, status):
+        super().__init__(f"server rejected WebSocket connection: HTTP {status}")
+        self.response = types.SimpleNamespace(status_code=status)
+
+
+def _run_with_connect(connect):
+    run_session(
+        "sk",
+        renderer=FakeRenderer(),
+        player=FakePlayer(),
+        mic=[],
+        config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+        connect=connect,
+    )
+
+
+def test_run_session_handshake_403_is_api_error_like_stream():
+    # Harmonized with `stream`: a plain handshake 403 is an API error (exit 1), not
+    # "Your API key was rejected" — 403 also covers non-credential blocks.
+    def reject(url, **kwargs):
+        raise _HandshakeRejected(403)
+
+    with pytest.raises(APIError) as exc:
+        _run_with_connect(reject)
+    assert exc.value.error_type == "api_error"
+    assert exc.value.exit_code == 1
+    assert "HTTP 403" in exc.value.message
+
+
+def test_run_session_handshake_401_is_still_auth_failure():
+    # A genuinely auth-shaped rejection (HTTP 401) keeps the rejected-key path.
+    def reject(url, **kwargs):
+        raise _HandshakeRejected(401)
+
+    with pytest.raises(NotAuthenticated) as exc:
+        _run_with_connect(reject)
+    assert exc.value.exit_code == 4
+
+
+def test_run_session_auth_worded_failure_is_still_auth_failure():
+    # The text heuristic ("unauthorized" etc.) keeps working for real bad keys.
+    def reject(url, **kwargs):
+        raise RuntimeError("connection rejected: Unauthorized")
+
+    with pytest.raises(NotAuthenticated):
+        _run_with_connect(reject)
+
+
+class _CleanWS:
+    def send(self, _msg):
+        pass
+
+    def __iter__(self):
+        return iter(())
+
+    def close(self):
+        pass
+
+
+def test_run_session_silences_websockets_loggers():
+    # websockets' sync reader thread logs teardown errors (EOFError tracebacks) via
+    # its own loggers; run_session must mute them so they never hit the user's stderr.
+    loggers = [logging.getLogger(name) for name in _WEBSOCKETS_LOGGERS]
+    previous = [lg.level for lg in loggers]
+    try:
+        for lg in loggers:
+            lg.setLevel(logging.NOTSET)
+        _run_with_connect(lambda url, **kwargs: _CleanWS())
+        for lg in loggers:
+            assert lg.level == logging.CRITICAL
+            assert not lg.isEnabledFor(logging.ERROR)  # an ERROR record is dropped
+    finally:
+        for lg, level in zip(loggers, previous, strict=True):
+            lg.setLevel(level)
+
+
+def test_websockets_logger_names_cover_the_sync_client():
+    # The sync client logs through "websockets.client"; pin that the silenced set
+    # covers it (and the parent, for any future child loggers).
+    assert "websockets.client" in _WEBSOCKETS_LOGGERS
+    assert "websockets" in _WEBSOCKETS_LOGGERS
+
+
 def test_run_session_non_auth_failure_stays_api_error():
     def boom(url, **kwargs):
         raise RuntimeError("network unreachable")
diff --git a/tests/test_code_gen_stream.py b/tests/test_code_gen_stream.py
new file mode 100644
index 00000000..2f66faf4
--- /dev/null
+++ b/tests/test_code_gen_stream.py
@@ -0,0 +1,117 @@
+"""Source fidelity of `stream --show-code` generation (mic vs stdin vs file/URL).
+
+The generated script must read the same audio input the real run would, at the
+same sample rate, and every variant must compile (`python -m py_compile` parity).
+"""
+
+from __future__ import annotations
+
+from hypothesis import given
+from hypothesis import strategies as st
+
+from aai_cli import code_gen
+
+_LLM = {"prompts": ["summarize"], "model": "m", "max_tokens": 100, "interval": 5.0}
+
+
+def _compiles(code: str) -> None:
+    # compile() is stricter than ast.parse() and is what `python file.py` runs through.
+    compile(code, "<generated>", "exec")
+
+
+# --- microphone (default) ----------------------------------------------------
+def test_mic_variant_is_unchanged_and_has_no_source_plumbing():
+    code = code_gen.stream({"sample_rate": 16000})
+    _compiles(code)
+    assert "client.stream(aai.extras.MicrophoneStream(sample_rate=16000))" in code
+    assert "print('Listening… press Ctrl-C to stop.')" in code
+    assert "import subprocess" not in code
+    assert "import sys" not in code
+    assert "stdin_chunks" not in code
+    assert "file_chunks" not in code
+
+
+def test_mic_variant_honors_sample_rate():
+    code = code_gen.stream({"sample_rate": 8000})
+    _compiles(code)
+    assert "MicrophoneStream(sample_rate=8000)" in code
+    assert "sample_rate=8000," in code  # StreamingParameters matches the capture rate
+
+
+# --- stdin (`-`) ---------------------------------------------------------------
+def test_stdin_variant_reads_stdin_not_the_mic():
+    code = code_gen.stream({"sample_rate": 16000}, source="-")
+    _compiles(code)
+    assert "client.stream(stdin_chunks())" in code
+    assert "sys.stdin.buffer.read(chunk_bytes)" in code
+    assert "import sys" in code
+    assert "MicrophoneStream" not in code
+
+
+def test_stdin_variant_honors_sample_rate():
+    code = code_gen.stream({"sample_rate": 8000}, source="-")
+    _compiles(code)
+    assert "chunk_bytes = 8000 * 2 // 10" in code
+    assert "-ar 8000" in code  # the example ffmpeg pipe matches the declared rate
+    assert "sample_rate=8000," in code
+
+
+# --- file / URL ---------------------------------------------------------------
+def test_file_variant_decodes_that_file_through_ffmpeg():
+    code = code_gen.stream({"sample_rate": 16000}, source="rec.wav")
+    _compiles(code)
+    assert "client.stream(file_chunks())" in code
+    assert "'rec.wav'" in code  # the source is embedded as the ffmpeg input
+    assert '"-ar", "16000"' in code
+    assert "chunk_bytes = 16000 * 2 // 10" in code
+    assert "time.sleep(len(data) / (16000 * 2))" in code  # ~real-time pacing
+    assert "import subprocess" in code
+    assert "import time" in code
+    assert "print('Streaming rec.wav…')" in code
+    assert "MicrophoneStream" not in code
+
+
+def test_file_variant_honors_sample_rate():
+    code = code_gen.stream({"sample_rate": 8000}, source="clip.mp3")
+    _compiles(code)
+    assert '"-ar", "8000"' in code  # decode rate == StreamingParameters.sample_rate
+    assert "sample_rate=8000," in code
+
+
+def test_url_source_is_passed_to_ffmpeg_verbatim():
+    code = code_gen.stream({}, source="https://assembly.ai/wildfires.mp3")
+    _compiles(code)
+    assert "'https://assembly.ai/wildfires.mp3'" in code
+    assert "file_chunks()" in code
+
+
+def test_file_variant_with_quotes_in_name_still_compiles():
+    code = code_gen.stream({}, source='rec\'s "weird" name.wav')
+    _compiles(code)
+
+
+# --- --llm composition ---------------------------------------------------------
+def test_llm_with_file_source_streams_file_and_flushes_summary():
+    code = code_gen.stream({"sample_rate": 16000}, llm=_LLM, source="rec.wav")
+    _compiles(code)
+    assert "client.stream(file_chunks())" in code
+    assert "run_chain" in code
+    assert "summarize(final=True)" in code
+    assert code.count("import time") == 1  # llm + file both need time; imported once
+
+
+def test_llm_with_stdin_source_keeps_both_imports():
+    code = code_gen.stream({}, llm=_LLM, source="-")
+    _compiles(code)
+    assert "client.stream(stdin_chunks())" in code
+    assert "import sys" in code
+    assert "import time" in code
+
+
+# --- fuzz: every source shape always compiles ----------------------------------
+@given(st.text(st.characters(blacklist_categories=["Cs"]), max_size=40) | st.none())
+def test_fuzz_any_source_always_compiles(source):
+    # Arbitrary file names (quotes, newlines, braces, unicode), "-" (stdin), ""
+    # and None (mic) must all yield a compilable script.
+    _compiles(code_gen.stream({"sample_rate": 16000}, source=source))
+    _compiles(code_gen.stream({"sample_rate": 16000}, llm=_LLM, source=source))
diff --git a/tests/test_microphone.py b/tests/test_microphone.py
index 1a26fe6b..7e2f141c 100644
--- a/tests/test_microphone.py
+++ b/tests/test_microphone.py
@@ -101,6 +101,26 @@ def boom(*, sample_rate, device):
         list(mic)
     assert exc.value.error_type == "mic_error"
     assert exc.value.exit_code == 1
+    assert "microphone device 99" in exc.value.message  # names the explicit device
+    assert "Invalid device" in exc.value.message  # keeps the underlying cause
+    assert exc.value.suggestion is not None
+    assert "--device" in exc.value.suggestion
+
+
+def test_default_device_error_names_default_microphone():
+    # device=None must read as "the default microphone", not the raw "device None",
+    # and carry an actionable suggestion (permissions / pick another device).
+    def boom(*, sample_rate, device):
+        raise OSError("Error querying device -1")
+
+    mic = MicrophoneSource(capture_rate=16000, stream_factory=boom)
+    with pytest.raises(CLIError) as exc:
+        list(mic)
+    assert "the default microphone" in exc.value.message
+    assert "device None" not in exc.value.message
+    assert exc.value.suggestion is not None
+    assert "permissions" in exc.value.suggestion
+    assert "python -m sounddevice" in exc.value.suggestion
 
 
 def test_closes_closeable_stream_in_finally():
diff --git a/tests/test_stream_command.py b/tests/test_stream_command.py
index 85593ccf..142e0a9f 100644
--- a/tests/test_stream_command.py
+++ b/tests/test_stream_command.py
@@ -124,6 +124,7 @@ def fake(api_key, source, *, params, on_begin=None, **_kwargs):
 
 
 def test_stream_unauthenticated_runs_login(monkeypatch):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
 
     def fake_stream_audio(api_key, source, *, params, **_kwargs):
@@ -389,6 +390,104 @@ def test_stream_show_code_prints_without_streaming(monkeypatch):
     assert 'os.environ["ASSEMBLYAI_API_KEY"]' in result.output
 
 
+def test_stream_show_code_file_source_streams_that_file():
+    # A file source must generate file-streaming code, not silently emit mic code.
+    # The file need not exist: generating code for it is legitimate (check_local=False).
+    result = runner.invoke(app, ["stream", "rec.wav", "--show-code"])
+    assert result.exit_code == 0
+    assert "client.stream(file_chunks())" in result.output
+    assert "'rec.wav'" in result.output  # the ffmpeg input is the file passed
+    assert "MicrophoneStream" not in result.output
+    compile(result.output, "<show-code>", "exec")  # the printed script is runnable
+
+
+def test_stream_show_code_stdin_source_reads_stdin():
+    result = runner.invoke(app, ["stream", "-", "--show-code"])
+    assert result.exit_code == 0
+    assert "client.stream(stdin_chunks())" in result.output
+    assert "sys.stdin.buffer" in result.output
+    assert "MicrophoneStream" not in result.output
+    compile(result.output, "<show-code>", "exec")
+
+
+def test_stream_show_code_sample_streams_hosted_clip():
+    result = runner.invoke(app, ["stream", "--sample", "--show-code"])
+    assert result.exit_code == 0
+    assert "wildfires.mp3" in result.output
+    assert "file_chunks()" in result.output
+    assert "MicrophoneStream" not in result.output
+
+
+def test_stream_show_code_honors_sample_rate_flag():
+    result = runner.invoke(app, ["stream", "--sample-rate", "8000", "--show-code"])
+    assert result.exit_code == 0
+    assert "MicrophoneStream(sample_rate=8000)" in result.output
+    assert "sample_rate=8000," in result.output  # params match the capture rate
+
+
+def test_stream_show_code_honors_config_sample_rate():
+    # An explicit `--config sample_rate=…` must not be overridden by the 16 kHz default.
+    result = runner.invoke(app, ["stream", "--config", "sample_rate=8000", "--show-code"])
+    assert result.exit_code == 0
+    assert "MicrophoneStream(sample_rate=8000)" in result.output
+    assert "sample_rate=8000," in result.output
+
+
+def test_stream_show_code_sample_rate_flag_beats_config():
+    result = runner.invoke(
+        app, ["stream", "--sample-rate", "8000", "--config", "sample_rate=44100", "--show-code"]
+    )
+    assert result.exit_code == 0
+    assert "MicrophoneStream(sample_rate=8000)" in result.output
+    assert "44100" not in result.output
+
+
+def test_stream_show_code_file_with_mic_flags_rejected():
+    # --show-code applies the same source validation as a real run, so the
+    # file + --sample-rate conflict errors instead of generating mic code.
+    result = runner.invoke(app, ["stream", "rec.wav", "--sample-rate", "8000", "--show-code"])
+    assert result.exit_code == 2
+    assert "--sample-rate" in result.output
+
+
+def test_stream_show_code_rejects_youtube_sources():
+    result = runner.invoke(app, ["stream", "https://youtu.be/abc", "--show-code"])
+    assert result.exit_code == 2
+    assert "YouTube" in result.output
+
+
+def test_stream_json_with_text_output_is_usage_error():
+    # Contradictory output shapes (--json + -o text) are rejected up front, before
+    # credentials, like the --llm + -o text precedent.
+    result = runner.invoke(app, ["stream", "--json", "-o", "text"])
+    assert result.exit_code == 2
+    assert "can't be combined with -o text" in result.output
+
+
+def test_stream_stdin_with_sample_rejected():
+    config.set_api_key("default", "sk_live")
+    result = runner.invoke(app, ["stream", "-", "--sample"], input=b"\x00\x00")
+    assert result.exit_code == 2
+    assert "--sample" in result.output
+
+
+def test_stream_file_source_with_sample_rejected(monkeypatch, tmp_path):
+    # A real source plus --sample is a conflict (the file would silently lose),
+    # surfaced by resolve_audio_source as a usage error before any streaming.
+    config.set_api_key("default", "sk_live")
+
+    def _boom(*a, **k):
+        raise AssertionError("must not stream a conflicting source")
+
+    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom)
+    wav = tmp_path / "a.wav"
+    wav.write_bytes(b"RIFF")
+    result = runner.invoke(app, ["stream", str(wav), "--sample"])
+    assert result.exit_code == 2
+    assert "--sample" in result.output
+    assert "cannot be combined" in result.output
+
+
 def test_stream_show_code_ignores_json_flag(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not stream")

From e5bcdbd33963e5ba86da06e8e1fd685ea7a766f6 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 04:53:17 +0000
Subject: [PATCH 03/11] Regenerate help-text snapshots for new
 transcribe/transcripts flags

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 .../test_cli_output_snapshots.ambr            | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/tests/__snapshots__/test_cli_output_snapshots.ambr b/tests/__snapshots__/test_cli_output_snapshots.ambr
index dfe6b85b..7c08045f 100644
--- a/tests/__snapshots__/test_cli_output_snapshots.ambr
+++ b/tests/__snapshots__/test_cli_output_snapshots.ambr
@@ -170,6 +170,9 @@
   
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
   │ --port              INTEGER  Local server port. [default: 3000]              │
+  │ --host              TEXT     Interface to bind. Loopback by default; pass    │
+  │                              0.0.0.0 to expose on your network.              │
+  │                              [default: 127.0.0.1]                            │
   │ --no-open                    Launch, but don't open the browser.             │
   │ --no-install                 Skip dependency install; launch directly.       │
   │ --json                       Output raw JSON.                                │
@@ -224,7 +227,8 @@
    conversation and writes no code.
   
   ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
-  │   template       [TEMPLATE]   Template to scaffold (omit to pick             │
+  │   template       [TEMPLATE]   Template to scaffold: audio-transcription,     │
+  │                               live-captions, voice-agent (omit to pick       │
   │                               interactively).                                │
   │   directory      [DIRECTORY]  Target directory (default: <template>).        │
   ╰──────────────────────────────────────────────────────────────────────────────╯
@@ -529,14 +533,13 @@
   
    Usage: aai setup install [OPTIONS]
   
-   Set up your coding agent for AssemblyAI by installing three things:
+   Set up your coding agent for AssemblyAI (docs MCP server + skills).
   
-   the assemblyai-docs MCP server (live API docs, via `claude mcp add`), the
-   AssemblyAI
-   skill (via `npx skills add`), and the bundled aai-cli skill (copied from this
-   package,
-   no network). Each step is idempotent and skipped if already present unless
-   --force.
+   Installs three things: the assemblyai-docs MCP server (live API docs, via
+   `claude mcp add`), the AssemblyAI skill (via `npx skills add`), and the
+   bundled
+   aai-cli skill (copied from this package, no network). Each step is idempotent
+   and skipped if already present unless --force.
   
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
   │ --scope        [user|project|local]  Config scope to register the MCP under. │
@@ -618,7 +621,9 @@
    and
    opens a cloudflared quick tunnel, printing a shareable
    https://*.trycloudflare.com
-   URL. Requires cloudflared (`brew install cloudflared`).
+   URL. Requires cloudflared (macOS: `brew install cloudflared`; other platforms:
+   https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/
+   downloads/).
   
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
   │ --port              INTEGER  Local server port. [default: 3000]              │
@@ -882,10 +887,12 @@
   │ --topic-detection                                    Detect IAB topics.      │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   ╭─ Customization ──────────────────────────────────────────────────────────────╮
-  │ --word-boost                  TEXT     Boost a word (repeatable).            │
-  │ --custom-spelling-file        FILE     JSON map of custom spellings.         │
-  │ --audio-start                 INTEGER  Start offset in ms.                   │
-  │ --audio-end                   INTEGER  End offset in ms.                     │
+  │ --word-boost                  TEXT                  Boost a word             │
+  │                                                     (repeatable).            │
+  │ --custom-spelling-file        FILE                  JSON map of custom       │
+  │                                                     spellings.               │
+  │ --audio-start                 INTEGER RANGE [x>=0]  Start offset in ms.      │
+  │ --audio-end                   INTEGER               End offset in ms.        │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   ╭─ Webhooks ───────────────────────────────────────────────────────────────────╮
   │ --webhook-url                TEXT        Webhook URL for completion.         │
@@ -968,9 +975,10 @@
    List recent transcripts.
   
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
-  │ --limit        INTEGER  How many transcripts to show. [default: 10]          │
-  │ --json                  Output raw JSON.                                     │
-  │ --help                  Show this message and exit.                          │
+  │ --limit        INTEGER RANGE [x>=1]  How many transcripts to show.           │
+  │                                      [default: 10]                           │
+  │ --json                               Output raw JSON.                        │
+  │ --help                               Show this message and exit.             │
   ╰──────────────────────────────────────────────────────────────────────────────╯
   
    Examples

From e3be1d38635689a97f39e91ef69e168f88d65d29 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 04:55:22 +0000
Subject: [PATCH 04/11] Fix QA findings in init/dev/deploy/doctor: local
 binding, guards, hints

- aai dev (and share's inner server) now binds 127.0.0.1 by default;
  --host 0.0.0.0 is an explicit opt-in, and the printed URL matches the
  actual bind (template Procfiles keep 0.0.0.0 for deploy targets)
- deploy checks for a Procfile before requiring a deploy CLI, and
  rejects --prod for non-Vercel targets
- cloudflared/flyctl install hints are platform-aware (brew on macOS,
  official install docs elsewhere)
- the init banner prints to stderr, keeping stdout pipeline-clean
- init --help enumerates the template names
- doctor names the active profile/environment and points its
  connectivity fix at the active environment's API host
- keyring-write failures suggest ASSEMBLYAI_API_KEY on headless boxes
- config validation errors compact to field: reason (no pydantic URLs)
- unknown-env errors mention the profile's stored env as a third source
- setup install's help summary is a complete sentence
- AGENTS.md: --json is never auto-enabled; command list matches reality

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 AGENTS.md                  |   4 +-
 aai_cli/commands/deploy.py |  26 ++++++++--
 aai_cli/commands/dev.py    |  16 ++++--
 aai_cli/commands/doctor.py |  26 ++++++++--
 aai_cli/commands/init.py   |  13 +++--
 aai_cli/commands/setup.py  |   9 ++--
 aai_cli/commands/share.py  |  18 ++++++-
 aai_cli/config.py          |  20 +++++++-
 aai_cli/environments.py    |   4 +-
 aai_cli/init/devserver.py  |  32 ++++++++++--
 aai_cli/init/procfile.py   |  18 +++++--
 tests/test_config.py       |  62 +++++++++++++++++++++++
 tests/test_deploy.py       | 101 ++++++++++++++++++++++++++++++++++---
 tests/test_dev.py          |  31 +++++++++++-
 tests/test_devserver.py    |  84 +++++++++++++++++++++++++++++-
 tests/test_doctor.py       |  73 ++++++++++++++++++++++++++-
 tests/test_environments.py |   4 ++
 tests/test_init_command.py |  45 ++++++++++++++++-
 tests/test_procfile.py     |  14 +++++
 tests/test_setup.py        |  12 +++++
 tests/test_share.py        |  55 +++++++++++++++++++-
 21 files changed, 619 insertions(+), 48 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index 2c651933..9846419c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -70,7 +70,7 @@ A Typer CLI. `aai_cli/main.py` builds the `app`, registers each command sub-app,
 
 ### Command layer
 
-Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `transcripts`, `agent`, `llm`, `login`, `doctor`, `init`, `claude`). Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps any `CLIError` to clean stderr output + the error's exit code. Commands never print tracebacks for expected failures.
+Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `transcripts`, `agent`, `llm`, `login` (login/logout/whoami), `doctor`, `init`, `dev`, `share`, `deploy`, `setup`, `onboard`, `account` (balance/usage/limits), `keys`, `sessions`, `audit`). Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps any `CLIError` to clean stderr output + the error's exit code. Commands never print tracebacks for expected failures.
 
 ### Cross-cutting state (resolution order matters)
 
@@ -78,7 +78,7 @@ Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `tr
 - **`config.py`** — profiles persisted in `config.toml` (via `platformdirs`); the **API key lives only in the OS keyring** (`KEYRING_SERVICE = "assemblyai-cli"`), never in a dotfile. Key resolution order: `--api-key` flag (validation paths only) → `ASSEMBLYAI_API_KEY` env → keyring. **Run commands deliberately expose no `--api-key` flag** so keys can't leak into `ps`/shell history.
 - **`environments.py`** — a frozen `Environment` (api_base, streaming_host, llm_gateway_base, ams_base, stytch_*). `DEFAULT_ENV` is **`production`**; use `--sandbox` (or `--env sandbox000` / `AAI_ENV`) to target the sandbox. The active environment is a process-global set once at startup; precedence: `--env` → `AAI_ENV` → profile's stored env → default. A credential is only valid against the environment that minted it.
 - **`client.py`** — thin wrappers over the `assemblyai` SDK (`transcribe`, `list_transcripts`, `stream_audio`, etc.). It normalizes SDK exceptions: auth failures become a single clean `auth_failure()` `CLIError`; everything else becomes `APIError`. New SDK calls should follow this try/except shape.
-- **`errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` (auto-enabled when piped/agent-run) switches to machine-readable output.
+- **`errors.py`** — the `CLIError` hierarchy (each with `error_type` + `exit_code`). `output.py` emits errors to **stderr**; stdout stays clean for pipelines. `--json` switches to machine-readable output; it is never auto-enabled — `output.resolve_json()` deliberately keeps human text the default even when piped or agent-run.
 
 ### Feature subsystems
 
diff --git a/aai_cli/commands/deploy.py b/aai_cli/commands/deploy.py
index f37a7ff3..487e20a6 100644
--- a/aai_cli/commands/deploy.py
+++ b/aai_cli/commands/deploy.py
@@ -3,6 +3,7 @@
 
 import shutil
 import subprocess
+import sys
 from dataclasses import dataclass
 from pathlib import Path
 
@@ -10,8 +11,9 @@
 
 from aai_cli import help_panels, output
 from aai_cli.context import AppState, run_command
-from aai_cli.errors import CLIError
+from aai_cli.errors import CLIError, UsageError
 from aai_cli.help_text import examples_epilog
+from aai_cli.init import procfile
 
 # Flattened single-command sub-typer (same pattern as `aai dev`).
 app = typer.Typer()
@@ -22,10 +24,11 @@ class Target:
     name: str  # human label, e.g. "Vercel"
     bin: str  # executable resolved via shutil.which
     flag: str  # CLI selector, e.g. "--vercel"
-    install: str  # full hint sentence shown when the CLI is missing
+    install: str  # hint sentence shown when the CLI is missing (everywhere, or macOS-only)
     deploy_args: tuple[str, ...]  # subcommand(s) appended after `bin`
     supports_prod: bool = False  # whether `--prod` adds a production flag
     post_deploy_args: tuple[str, ...] | None = None  # command run after a successful deploy
+    install_non_darwin: str | None = None  # hint off-macOS, when `install` is brew-specific
 
     def command(self, *, prod: bool) -> list[str]:
         argv = [self.bin, *self.deploy_args]
@@ -54,7 +57,9 @@ def command(self, *, prod: bool) -> list[str]:
     name="Fly",
     bin="fly",
     flag="--fly",
+    # brew is macOS-specific; elsewhere point at the official install docs.
     install="Install it with `brew install flyctl`.",
+    install_non_darwin="Install it: https://fly.io/docs/flyctl/install/",
     # `fly launch` does it all: creates the app, generates fly.toml (detecting the
     # shipped Dockerfile), and deploys — so no fly.toml needs to exist beforehand.
     deploy_args=("launch",),
@@ -74,10 +79,17 @@ def _resolve_target(selected: list[Target]) -> Target:
     return selected[0] if selected else VERCEL  # Vercel is the default
 
 
+def _install_hint(target: Target) -> str:
+    """The platform-appropriate install hint: brew on macOS, docs URL elsewhere."""
+    if target.install_non_darwin is not None and sys.platform != "darwin":
+        return target.install_non_darwin
+    return target.install
+
+
 def _require_cli(target: Target) -> None:
     if shutil.which(target.bin) is None:
         raise CLIError(
-            f"The {target.name} CLI is required to deploy. {target.install}",
+            f"The {target.name} CLI is required to deploy. {_install_hint(target)}",
             error_type="missing_dependency",
             exit_code=1,
         )
@@ -101,6 +113,14 @@ def _confirmed(target: Target, *, assume_yes: bool) -> bool:
 
 def run_deploy(*, target: Target, prod: bool, assume_yes: bool) -> None:
     """Confirm, then run the target's deploy command in the current directory."""
+    if prod and not target.supports_prod:
+        raise UsageError(
+            "--prod is only supported for Vercel deploys.",
+            suggestion=f"Drop --prod, or drop {target.flag} to deploy to Vercel.",
+        )
+    # Same not-a-project guard as `aai dev`/`aai share`, checked before CLI presence
+    # so an empty directory says "run `aai init`", not "install the Vercel CLI".
+    procfile.require_procfile(Path.cwd())
     _require_cli(target)
     if not _confirmed(target, assume_yes=assume_yes):
         output.console.print("Aborted.")
diff --git a/aai_cli/commands/dev.py b/aai_cli/commands/dev.py
index 0c5cb2e9..8e2861bb 100644
--- a/aai_cli/commands/dev.py
+++ b/aai_cli/commands/dev.py
@@ -17,7 +17,7 @@
 app = typer.Typer()
 
 
-def run_dev(*, port: int, no_install: bool, no_open: bool, json_mode: bool) -> None:
+def run_dev(*, port: int, host: str, no_install: bool, no_open: bool, json_mode: bool) -> None:
     """Boot the project's Procfile `web:` process locally, with live reload."""
     target = Path.cwd()
     use_uv = runner.has_uv()
@@ -34,8 +34,11 @@ def run_dev(*, port: int, no_install: bool, no_open: bool, json_mode: bool) -> N
     if any(s["status"] == "failed" for s in report):
         raise typer.Exit(code=1)
 
-    command = devserver.dev_command(target, web, use_uv=use_uv)
-    url = f"http://localhost:{chosen_port}"
+    command = devserver.dev_command(target, web, use_uv=use_uv, host=host)
+    # The printed URL reflects the actual bind: "localhost" for the loopback
+    # default, the literal host for an explicit --host.
+    url_host = "localhost" if host == devserver.LOCAL_HOST else host
+    url = f"http://{url_host}:{chosen_port}"
     if not json_mode:
         output.console.print(
             f"[aai.heading]Starting[/aai.heading] [aai.url]{escape(url)}[/aai.url]"
@@ -62,6 +65,11 @@ def run_dev(*, port: int, no_install: bool, no_open: bool, json_mode: bool) -> N
 def dev(
     ctx: typer.Context,
     port: int = typer.Option(3000, "--port", help="Local server port."),
+    host: str = typer.Option(
+        devserver.LOCAL_HOST,
+        "--host",
+        help="Interface to bind. Loopback by default; pass 0.0.0.0 to expose on your network.",
+    ),
     no_open: bool = typer.Option(False, "--no-open", help="Launch, but don't open the browser."),
     no_install: bool = typer.Option(
         False, "--no-install", help="Skip dependency install; launch directly."
@@ -75,6 +83,6 @@ def dev(
     """
 
     def body(_state: AppState, json_mode: bool) -> None:
-        run_dev(port=port, no_install=no_install, no_open=no_open, json_mode=json_mode)
+        run_dev(port=port, host=host, no_install=no_install, no_open=no_open, json_mode=json_mode)
 
     run_command(ctx, body, json=json_out)
diff --git a/aai_cli/commands/doctor.py b/aai_cli/commands/doctor.py
index b6fe12e9..e4839612 100644
--- a/aai_cli/commands/doctor.py
+++ b/aai_cli/commands/doctor.py
@@ -3,12 +3,12 @@
 import shutil
 import sys
 from collections.abc import Mapping, Sequence
-from typing import Protocol, TypedDict
+from typing import NotRequired, Protocol, TypedDict
 
 import typer
 from rich.markup import escape
 
-from aai_cli import client, config, help_panels, options, output, theme
+from aai_cli import client, config, environments, help_panels, options, output, theme
 from aai_cli.context import AppState, resolve_profile, run_command
 from aai_cli.errors import CLIError, NotAuthenticated
 from aai_cli.help_text import examples_epilog
@@ -28,6 +28,11 @@ class Check(TypedDict):
 
 class DoctorResult(TypedDict):
     ok: bool
+    # Which profile/environment the checks ran against. `aai doctor` always fills
+    # these in; the onboarding wizard reuses `render` for a partial check without
+    # them, so they stay optional.
+    profile: NotRequired[str]
+    environment: NotRequired[str]
     checks: list[Check]
 
 
@@ -82,7 +87,8 @@ def _check_api_key(profile: str) -> Check:
             affects=["everything"],
         )
     # validate_key doubles as the connectivity probe: it makes one cheap authed call,
-    # so a pass means the key is valid AND api.assemblyai.com is reachable.
+    # so a pass means the key is valid AND the active environment's API is reachable.
+    api_host = environments.active().api_base.removeprefix("https://")
     try:
         valid = client.validate_key(key)
     except CLIError as exc:
@@ -90,7 +96,7 @@ def _check_api_key(profile: str) -> Check:
             "api-key",
             "fail",
             f"Could not reach AssemblyAI: {exc.message}",
-            fix="Check your network/proxy and that api.assemblyai.com is reachable.",
+            fix=f"Check your network/proxy and that {api_host} is reachable.",
             affects=["everything"],
         )
     if valid:
@@ -197,6 +203,11 @@ def _check_coding_agent() -> Check:
 def render(data: DoctorResult) -> str:
     checks = data["checks"]
     lines = [output.heading("Environment check")]
+    profile, environment = data.get("profile"), data.get("environment")
+    if profile is not None and environment is not None:
+        lines.append(
+            "  " + output.hint(f"profile: {escape(profile)} · environment: {escape(environment)}")
+        )
     for c in checks:
         symbol, style = _SYMBOL.get(c["status"], (theme.SYMBOL_HINT, "aai.muted"))
         lines.append(
@@ -238,7 +249,12 @@ def body(state: AppState, json_mode: bool) -> None:
             _check_coding_agent(),
         ]
         ok = not any(c["status"] == "fail" for c in checks)
-        payload: DoctorResult = {"ok": ok, "checks": checks}
+        payload: DoctorResult = {
+            "ok": ok,
+            "profile": profile,
+            "environment": environments.active().name,
+            "checks": checks,
+        }
         output.emit(payload, render, json_mode=json_mode)
         if not ok:
             raise typer.Exit(code=1)
diff --git a/aai_cli/commands/init.py b/aai_cli/commands/init.py
index 11297cd3..1d58b285 100644
--- a/aai_cli/commands/init.py
+++ b/aai_cli/commands/init.py
@@ -182,8 +182,9 @@ def run_init(
     running dev server — it stops after install and leaves the run command as a hint.
     """
     if not json_mode:
-        # Vercel-style banner at the top of the run.
-        output.console.print(
+        # Vercel-style banner at the top of the run. Decoration goes to stderr (data →
+        # stdout): it must never pollute a piped stdout, even on an error path.
+        output.error_console.print(
             f"[aai.heading]AssemblyAI CLI[/aai.heading] [aai.muted]{__version__}[/aai.muted]"
         )
     chosen = _resolve_template(template)
@@ -243,7 +244,13 @@ def run_init(
 def init(
     ctx: typer.Context,
     template: str | None = typer.Argument(
-        None, help="Template to scaffold (omit to pick interactively)."
+        None,
+        # Enumerate the registry so the help text can never drift from the templates
+        # that actually ship.
+        help=(
+            f"Template to scaffold: {', '.join(templates.TEMPLATE_ORDER)} "
+            "(omit to pick interactively)."
+        ),
     ),
     directory: str | None = typer.Argument(None, help="Target directory (default: <template>)."),
     no_install: bool = typer.Option(
diff --git a/aai_cli/commands/setup.py b/aai_cli/commands/setup.py
index 8f699814..728e7b61 100644
--- a/aai_cli/commands/setup.py
+++ b/aai_cli/commands/setup.py
@@ -307,11 +307,12 @@ def install(
     force: bool = typer.Option(False, "--force", help="Reinstall even if already present."),
     json_out: bool = options.json_option(),
 ) -> None:
-    """Set up your coding agent for AssemblyAI by installing three things:
+    """Set up your coding agent for AssemblyAI (docs MCP server + skills).
 
-    the assemblyai-docs MCP server (live API docs, via `claude mcp add`), the AssemblyAI
-    skill (via `npx skills add`), and the bundled aai-cli skill (copied from this package,
-    no network). Each step is idempotent and skipped if already present unless --force.
+    Installs three things: the assemblyai-docs MCP server (live API docs, via
+    `claude mcp add`), the AssemblyAI skill (via `npx skills add`), and the bundled
+    aai-cli skill (copied from this package, no network). Each step is idempotent
+    and skipped if already present unless --force.
     """
 
     def body(_state: AppState, json_mode: bool) -> None:
diff --git a/aai_cli/commands/share.py b/aai_cli/commands/share.py
index 6e506bdc..0c1dc015 100644
--- a/aai_cli/commands/share.py
+++ b/aai_cli/commands/share.py
@@ -4,6 +4,7 @@
 import os
 import shutil
 import subprocess
+import sys
 import tempfile
 from pathlib import Path
 
@@ -20,13 +21,25 @@
 app = typer.Typer()
 
 
+# brew exists only on macOS; everywhere else point at Cloudflare's install docs.
+_CLOUDFLARED_DOCS = (
+    "https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/downloads/"
+)
+
+
+def _cloudflared_install_hint() -> str:
+    if sys.platform == "darwin":
+        return "Install it: brew install cloudflared"
+    return f"Install it: {_CLOUDFLARED_DOCS}"
+
+
 def _require_cloudflared() -> None:
     if shutil.which(tunnel.CLOUDFLARED) is None:
         raise CLIError(
             "cloudflared is required to share a public link.",
             error_type="missing_dependency",
             exit_code=1,
-            suggestion="Install it: brew install cloudflared",
+            suggestion=_cloudflared_install_hint(),
         )
 
 
@@ -121,7 +134,8 @@ def share(
 
     Run this from inside a project created by `aai init`. It starts the dev server and
     opens a cloudflared quick tunnel, printing a shareable https://*.trycloudflare.com
-    URL. Requires cloudflared (`brew install cloudflared`).
+    URL. Requires cloudflared (macOS: `brew install cloudflared`; other platforms:
+    https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/downloads/).
     """
 
     def body(_state: AppState, json_mode: bool) -> None:
diff --git a/aai_cli/config.py b/aai_cli/config.py
index 7b9a2287..6d9d010f 100644
--- a/aai_cli/config.py
+++ b/aai_cli/config.py
@@ -73,6 +73,20 @@ def _config_file() -> Path:
     return config_dir() / "config.toml"
 
 
+def _validation_summary(exc: ValidationError) -> str:
+    """A compact, human-sized summary of a pydantic ValidationError.
+
+    Just "field: reason" per problem — pydantic's full rendering dumps input values
+    and errors.pydantic.dev doc URLs, which is noise (and a potential value leak)
+    in a one-line CLI error.
+    """
+    problems: list[str] = []
+    for err in exc.errors(include_url=False, include_input=False):
+        loc = ".".join(str(part) for part in err["loc"]) or "top level"
+        problems.append(f"{loc}: {err['msg']}")
+    return "; ".join(problems)
+
+
 # Parsed-config cache: path -> (mtime_ns, size, parsed). The several _load()
 # calls in one CLI invocation (profile, env, key resolution) then don't each
 # re-read and re-parse the same unchanged TOML; _dump() bumps the mtime, which
@@ -107,7 +121,8 @@ def _load() -> Config:
         from aai_cli.errors import CLIError
 
         raise CLIError(
-            f"Config file at {path} has an unexpected shape ({exc}). Fix or delete it.",
+            f"Config file at {path} has an unexpected shape "
+            f"({_validation_summary(exc)}). Fix or delete it.",
             error_type="invalid_config",
             exit_code=2,
         ) from exc
@@ -157,7 +172,8 @@ def _keyring_set(username: str, secret: str) -> None:
             error_type="keyring_error",
             suggestion=(
                 "Unlock your keyring, or remove the stale 'assemblyai-cli' entry and "
-                "retry (macOS: security delete-generic-password -s assemblyai-cli)."
+                "retry (macOS: security delete-generic-password -s assemblyai-cli). "
+                "On a headless machine without a keyring, set ASSEMBLYAI_API_KEY instead."
             ),
         ) from exc
 
diff --git a/aai_cli/environments.py b/aai_cli/environments.py
index fc27d396..12955f56 100644
--- a/aai_cli/environments.py
+++ b/aai_cli/environments.py
@@ -77,7 +77,9 @@ def get(name: str) -> Environment:
             error_type="invalid_environment",
             exit_code=2,
             suggestion=(
-                f"Pass --env with one of: {', '.join(ENVIRONMENTS)}, or unset AAI_ENV if it's set."
+                f"Pass --env with one of: {', '.join(ENVIRONMENTS)}, unset AAI_ENV if it's "
+                "set, or fix the profile's stored env in config.toml (where a bad value "
+                "fails every command)."
             ),
         )
     return env
diff --git a/aai_cli/init/devserver.py b/aai_cli/init/devserver.py
index 3567bd6c..fd630a7a 100644
--- a/aai_cli/init/devserver.py
+++ b/aai_cli/init/devserver.py
@@ -21,16 +21,38 @@ def install_step(target: Path, *, no_install: bool, use_uv: bool) -> steps.Step:
     return {"name": "install", "status": "installed", "detail": "uv" if use_uv else "venv + pip"}
 
 
-def dev_command(target: Path, web: list[str], *, use_uv: bool) -> list[str]:
+# Local dev binds the loopback interface only. The template Procfile says
+# `--host 0.0.0.0` — correct for the deploy targets (Railway/Fly route traffic into
+# the container) but wrong for `aai dev`/`aai share`: the .env beside it holds a real
+# API key, so the dev server must not listen on every interface of the machine.
+LOCAL_HOST = "127.0.0.1"
+
+
+def _override_host(argv: list[str], host: str) -> list[str]:
+    """Rewrite (or add) the uvicorn ``--host`` argument so the server binds `host`."""
+    out = list(argv)
+    for index, arg in enumerate(out):
+        if arg == "--host" and index + 1 < len(out):
+            out[index + 1] = host
+            return out
+        if arg.startswith("--host="):
+            out[index] = f"--host={host}"
+            return out
+    return [*out, "--host", host]
+
+
+def dev_command(target: Path, web: list[str], *, use_uv: bool, host: str = LOCAL_HOST) -> list[str]:
     """The Procfile web process, run in the project venv with live reload.
 
     The Procfile's `web:` line starts with `python -m uvicorn …`. With uv, run it
-    verbatim under `uv run`; without uv, swap a leading `python` for the project's
-    venv interpreter so it runs inside the scaffolded `.venv`.
+    under `uv run`; without uv, swap a leading `python` for the project's venv
+    interpreter so it runs inside the scaffolded `.venv`. In both cases the
+    Procfile's `--host 0.0.0.0` is overridden to `host` (loopback by default) so a
+    local dev run never exposes the server — and the key in `.env` — to the LAN.
     """
+    argv = _override_host(web, host)
     if use_uv:
-        return ["uv", "run", *web, "--reload"]
-    argv = list(web)
+        return ["uv", "run", *argv, "--reload"]
     if argv and argv[0] == "python":
         argv[0] = str(runner.venv_python(target))
     return [*argv, "--reload"]
diff --git a/aai_cli/init/procfile.py b/aai_cli/init/procfile.py
index 43b7027d..f37bb6c1 100644
--- a/aai_cli/init/procfile.py
+++ b/aai_cli/init/procfile.py
@@ -25,11 +25,11 @@ def repl(match: re.Match[str]) -> str:
     return _VAR.sub(repl, token)
 
 
-def web_argv(target: Path, *, env: Mapping[str, str]) -> list[str]:
-    """The template Procfile's `web:` process, as an expanded argv.
+def require_procfile(target: Path) -> Path:
+    """The project's Procfile path, or the standard not-a-project usage error.
 
-    Raises a usage `CLIError` when there's no Procfile or no `web:` line — that's how
-    `aai dev` detects it isn't sitting inside a scaffolded project.
+    This is how `aai dev`/`aai share`/`aai deploy` all detect they aren't sitting
+    inside a scaffolded project, so they fail with the same message.
     """
     procfile = target / "Procfile"
     if not procfile.exists():
@@ -39,6 +39,16 @@ def web_argv(target: Path, *, env: Mapping[str, str]) -> list[str]:
             error_type="usage_error",
             exit_code=1,
         )
+    return procfile
+
+
+def web_argv(target: Path, *, env: Mapping[str, str]) -> list[str]:
+    """The template Procfile's `web:` process, as an expanded argv.
+
+    Raises a usage `CLIError` when there's no Procfile or no `web:` line — that's how
+    `aai dev` detects it isn't sitting inside a scaffolded project.
+    """
+    procfile = require_procfile(target)
     for line in procfile.read_text().splitlines():
         stripped = line.strip()
         if stripped.startswith("web:"):
diff --git a/tests/test_config.py b/tests/test_config.py
index ae52ba39..d57ee6cf 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -200,6 +200,49 @@ def test_unexpected_config_shape_raises_clean_error(tmp_config):
     with pytest.raises(CLIError) as exc:
         config.get_active_profile()
     assert exc.value.error_type == "invalid_config"
+    assert exc.value.exit_code == 2
+
+
+def test_unexpected_config_shape_error_is_compact(tmp_config):
+    # The message keeps the failing field + a short reason and stays actionable,
+    # without pydantic's doc URLs or a dump of the offending input value.
+    (tmp_config / "config.toml").write_text('profiles = "oops"\n')
+    with pytest.raises(CLIError) as exc:
+        config.get_active_profile()
+    message = exc.value.message
+    assert "profiles:" in message  # the field that failed
+    assert "Fix or delete it." in message  # the actionable next step
+    assert "errors.pydantic.dev" not in message  # no pydantic doc URLs
+    assert "input_value" not in message  # no raw input dump
+    assert "oops" not in message  # the bad value itself isn't echoed back
+
+
+def test_unexpected_config_shape_error_names_nested_field(tmp_config):
+    (tmp_config / "config.toml").write_text("[profiles.default]\nenv = 12\n")
+    with pytest.raises(CLIError) as exc:
+        config.get_active_profile()
+    # The dotted location pinpoints which profile key is wrong.
+    assert "profiles.default.env:" in exc.value.message
+
+
+def test_validation_summary_joins_multiple_problems():
+    from pydantic import ValidationError
+
+    with pytest.raises(ValidationError) as exc:
+        config.Config.model_validate({"profiles": "oops", "active_profile": 7})
+    summary = config._validation_summary(exc.value)
+    assert "profiles:" in summary
+    assert "active_profile:" in summary
+    assert "; " in summary  # both problems, compactly joined
+    assert "https://" not in summary
+
+
+def test_validation_summary_labels_rootlevel_problems():
+    from pydantic import ValidationError
+
+    with pytest.raises(ValidationError) as exc:
+        config.Config.model_validate("not a table")
+    assert config._validation_summary(exc.value).startswith("top level: ")
 
 
 def test_config_roundtrips_after_special_value(tmp_path, monkeypatch):
@@ -311,3 +354,22 @@ def no_backend(service, username):
     monkeypatch.setattr(keyring, "delete_password", no_backend)
     config.clear_api_key("default")
     config.clear_session("default")
+
+
+def test_keyring_write_failure_suggestion_covers_headless_linux(monkeypatch):
+    # The macOS keychain advice is useless on a headless Linux box; the suggestion
+    # must also offer the ASSEMBLYAI_API_KEY env-var path that works everywhere.
+    import keyring
+    import keyring.errors
+
+    def rejected(service, username, secret):
+        raise keyring.errors.KeyringError("locked")
+
+    monkeypatch.setattr(keyring, "set_password", rejected)
+    with pytest.raises(CLIError) as exc:
+        config.set_api_key("default", "sk_x")
+    assert exc.value.error_type == "keyring_error"
+    assert exc.value.suggestion is not None
+    assert "set ASSEMBLYAI_API_KEY instead" in exc.value.suggestion
+    # The macOS path stays for keychain users.
+    assert "security delete-generic-password -s assemblyai-cli" in exc.value.suggestion
diff --git a/tests/test_deploy.py b/tests/test_deploy.py
index 6e445a2b..6936c546 100644
--- a/tests/test_deploy.py
+++ b/tests/test_deploy.py
@@ -19,6 +19,15 @@
 _ANSI = re.compile(r"\x1b\[[0-9;]*m")
 
 
+@pytest.fixture(autouse=True)
+def in_project(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
+    """Run each test inside a scaffolded-looking project (deploy guards on ./Procfile)."""
+    monkeypatch.chdir(tmp_path)
+    procfile = tmp_path / "Procfile"
+    procfile.write_text("web: python -m uvicorn api.index:app --host 0.0.0.0 --port 3000\n")
+    return procfile
+
+
 def test_targets_are_frozen() -> None:
     # Every deploy target is a module-level singleton; freezing them guards
     # against accidental in-place mutation of shared deploy config.
@@ -197,11 +206,14 @@ def test_deploy_prod_flag_vercel(monkeypatch: pytest.MonkeyPatch) -> None:
     assert _cmds(calls)[0] == ["vercel", "deploy", "--prod"]
 
 
-def test_deploy_prod_ignored_for_railway(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_deploy_prod_rejected_for_railway(monkeypatch: pytest.MonkeyPatch) -> None:
+    # --prod only means something to Vercel; silently dropping it would deploy a
+    # preview the user believed was production. Clean usage error instead.
     calls = _stub(monkeypatch, available=("railway",))
     result = runner.invoke(app, ["deploy", "--railway", "--prod", "--yes"])
-    assert result.exit_code == 0, result.output
-    assert _cmds(calls)[0] == ["railway", "up"]
+    assert result.exit_code == 2
+    assert "--prod is only supported for Vercel deploys." in result.output
+    assert _cmds(calls) == []  # nothing was deployed
 
 
 def test_deploy_fly_flag(monkeypatch: pytest.MonkeyPatch) -> None:
@@ -213,14 +225,18 @@ def test_deploy_fly_flag(monkeypatch: pytest.MonkeyPatch) -> None:
     assert _cmds(calls) == [["fly", "launch"]]
 
 
-def test_deploy_prod_ignored_for_fly(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_deploy_prod_rejected_for_fly(monkeypatch: pytest.MonkeyPatch) -> None:
     calls = _stub(monkeypatch, available=("fly",))
     result = runner.invoke(app, ["deploy", "--fly", "--prod", "--yes"])
-    assert result.exit_code == 0, result.output
-    assert _cmds(calls)[0] == ["fly", "launch"]
+    assert result.exit_code == 2
+    assert "--prod is only supported for Vercel deploys." in result.output
+    assert _cmds(calls) == []
 
 
-def test_deploy_missing_fly_errors(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_deploy_missing_fly_errors_with_brew_hint_on_macos(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr("sys.platform", "darwin")
     _stub(monkeypatch, available=())
     result = runner.invoke(app, ["deploy", "--fly", "--yes"])
     assert result.exit_code == 1
@@ -228,6 +244,19 @@ def test_deploy_missing_fly_errors(monkeypatch: pytest.MonkeyPatch) -> None:
     assert "brew install flyctl" in " ".join(result.output.split())
 
 
+def test_deploy_missing_fly_errors_with_docs_url_on_linux(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    # brew is useless advice off macOS; Linux gets the official install docs URL.
+    monkeypatch.setattr("sys.platform", "linux")
+    _stub(monkeypatch, available=())
+    result = runner.invoke(app, ["deploy", "--fly", "--yes"])
+    assert result.exit_code == 1
+    flat = " ".join(result.output.split())
+    assert "https://fly.io/docs/flyctl/install/" in flat
+    assert "brew install flyctl" not in flat
+
+
 def test_deploy_nonzero_exit_propagates(monkeypatch: pytest.MonkeyPatch) -> None:
     _stub(monkeypatch, available=("vercel",), returncode=2)
     result = runner.invoke(app, ["deploy", "--yes"])
@@ -247,3 +276,61 @@ def test_deploy_help_lists_flags(flag: str) -> None:
     result = runner.invoke(app, ["deploy", "--help"])
     assert result.exit_code == 0
     assert flag in _ANSI.sub("", result.output)
+
+
+def test_deploy_outside_project_errors_like_dev(
+    in_project: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    # Same guard as `aai dev`/`aai share`: outside a scaffolded project, say
+    # "run `aai init`" — not "install the Vercel CLI".
+    in_project.unlink()
+    calls = _stub(monkeypatch, available=("vercel",))
+    result = runner.invoke(app, ["deploy", "--yes"])
+    assert result.exit_code == 1
+    assert "No Procfile here (expected ./Procfile)" in result.output
+    assert "aai init" in result.output
+    assert _cmds(calls) == []  # never deployed
+
+
+def test_deploy_procfile_guard_runs_before_cli_check(
+    in_project: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    # With neither a Procfile nor the Vercel CLI, the missing-project error must win:
+    # the actionable next step is `aai init`, not installing a deploy CLI.
+    in_project.unlink()
+    _stub(monkeypatch, available=())
+    result = runner.invoke(app, ["deploy", "--yes"])
+    assert result.exit_code == 1
+    assert "No Procfile here" in result.output
+    assert "Vercel CLI" not in result.output
+
+
+def test_deploy_prod_usage_error_wins_even_outside_project(
+    in_project: Path, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    # Flag validation precedes the project/CLI checks, with the conventional usage exit 2.
+    in_project.unlink()
+    _stub(monkeypatch, available=())
+    result = runner.invoke(app, ["deploy", "--fly", "--prod", "--yes"])
+    assert result.exit_code == 2
+    assert "--prod is only supported for Vercel deploys." in result.output
+    assert "No Procfile" not in result.output
+
+
+def test_deploy_prod_error_suggests_dropping_the_flag(monkeypatch: pytest.MonkeyPatch) -> None:
+    _stub(monkeypatch, available=("railway",))
+    result = runner.invoke(app, ["deploy", "--railway", "--prod", "--yes"])
+    flat = " ".join(result.output.split())
+    assert "Drop --prod, or drop --railway to deploy to Vercel." in flat
+
+
+def test_install_hint_platform_selection(monkeypatch: pytest.MonkeyPatch) -> None:
+    from aai_cli.commands import deploy
+
+    monkeypatch.setattr("sys.platform", "darwin")
+    assert deploy._install_hint(FLY) == "Install it with `brew install flyctl`."
+    monkeypatch.setattr("sys.platform", "linux")
+    assert deploy._install_hint(FLY) == "Install it: https://fly.io/docs/flyctl/install/"
+    # npm-based targets have one hint that works everywhere, on either platform.
+    assert deploy._install_hint(VERCEL) == "Install it with `npm i -g vercel`."
+    assert deploy._install_hint(RAILWAY) == "Install it with `npm i -g @railway/cli`."
diff --git a/tests/test_dev.py b/tests/test_dev.py
index cf8cf6df..d999b200 100644
--- a/tests/test_dev.py
+++ b/tests/test_dev.py
@@ -6,6 +6,9 @@
 
 runner = CliRunner()
 WEB = "web: python -m uvicorn api.index:app --host 0.0.0.0 --port ${PORT:-3000}\n"
+# The wildcard host exactly as the Procfile spells it (avoids a bare "0.0.0.0"
+# literal, which ruff's S104 binding lint flags).
+WILDCARD_HOST = WEB.split("--host ")[1].split(maxsplit=1)[0]
 
 
 def _make_project(tmp_path):
@@ -40,7 +43,6 @@ def test_dev_boots_procfile_command_with_reload(tmp_path, monkeypatch):
     cmd = captured["command"]
     assert cmd[:5] == ["uv", "run", "python", "-m", "uvicorn"]
     assert "api.index:app" in cmd
-    assert "--host" in cmd
     assert cmd[-3:] == ["--port", "3000", "--reload"]
     assert captured["env"]["PORT"] == "3000"
     assert captured["open_browser"] is False
@@ -48,6 +50,33 @@ def test_dev_boots_procfile_command_with_reload(tmp_path, monkeypatch):
     assert "localhost:3000" in result.output
 
 
+def test_dev_binds_loopback_not_procfile_wildcard(tmp_path, monkeypatch):
+    # The Procfile says 0.0.0.0 (right for deploy targets); `aai dev` must rewrite it
+    # so the dev server (with the real key in .env) never listens on the whole network —
+    # and the printed http://localhost URL then matches the actual bind.
+    monkeypatch.chdir(tmp_path)
+    _make_project(tmp_path)
+    captured = _stub_runner(monkeypatch)
+    result = runner.invoke(app, ["dev", "--no-open"])
+    assert result.exit_code == 0, result.output
+    cmd = captured["command"]
+    assert cmd[cmd.index("--host") + 1] == "127.0.0.1"
+    assert WILDCARD_HOST not in cmd
+    assert "localhost:3000" in result.output
+
+
+def test_dev_host_flag_opts_into_lan_exposure(tmp_path, monkeypatch):
+    monkeypatch.chdir(tmp_path)
+    _make_project(tmp_path)
+    captured = _stub_runner(monkeypatch)
+    result = runner.invoke(app, ["dev", "--no-open", "--host", WILDCARD_HOST])
+    assert result.exit_code == 0, result.output
+    cmd = captured["command"]
+    assert cmd[cmd.index("--host") + 1] == WILDCARD_HOST
+    # The printed URL reflects the explicit bind, not a hardcoded localhost.
+    assert "http://0.0.0.0:3000" in result.output
+
+
 def test_dev_opens_browser_by_default(tmp_path, monkeypatch):
     monkeypatch.chdir(tmp_path)
     _make_project(tmp_path)
diff --git a/tests/test_devserver.py b/tests/test_devserver.py
index b8cfa83b..8e74eb5e 100644
--- a/tests/test_devserver.py
+++ b/tests/test_devserver.py
@@ -1,3 +1,4 @@
+import shlex
 import subprocess
 from pathlib import Path
 
@@ -50,7 +51,17 @@ def test_dev_command_uv():
     cmd = devserver.dev_command(
         Path("/proj"), ["python", "-m", "uvicorn", "api.index:app"], use_uv=True
     )
-    assert cmd == ["uv", "run", "python", "-m", "uvicorn", "api.index:app", "--reload"]
+    assert cmd == [
+        "uv",
+        "run",
+        "python",
+        "-m",
+        "uvicorn",
+        "api.index:app",
+        "--host",
+        "127.0.0.1",
+        "--reload",
+    ]
 
 
 def test_dev_command_venv_swaps_python():
@@ -64,6 +75,8 @@ def test_dev_command_venv_swaps_python():
         "-m",
         "uvicorn",
         "api.index:app",
+        "--host",
+        "127.0.0.1",
         "--reload",
     ]
 
@@ -72,4 +85,71 @@ def test_dev_command_venv_leaves_non_python_first_token():
     # The `python`-swap only fires on a leading `python`; anything else passes through
     # (covers the False branch of the swap condition).
     cmd = devserver.dev_command(Path("/proj"), ["uvicorn", "api.index:app"], use_uv=False)
-    assert cmd == ["uvicorn", "api.index:app", "--reload"]
+    assert cmd == ["uvicorn", "api.index:app", "--host", "127.0.0.1", "--reload"]
+
+
+# The wildcard host exactly as the template Procfile spells it. Assembled via
+# shlex (instead of a bare "0.0.0.0" literal) so ruff's S104 binding lint, which
+# flags the standalone literal, stays meaningful in this file.
+_PROCFILE_WEB = shlex.split("python -m uvicorn api.index:app --host 0.0.0.0 --port 3000")
+WILDCARD_HOST = _PROCFILE_WEB[_PROCFILE_WEB.index("--host") + 1]
+
+
+def test_dev_command_rewrites_procfile_host_to_loopback():
+    # The template Procfile binds 0.0.0.0 (right for deploy targets); a local dev run
+    # must rewrite it to loopback so the server (and the key in .env) never faces the LAN.
+    cmd = devserver.dev_command(Path("/proj"), list(_PROCFILE_WEB), use_uv=True)
+    assert WILDCARD_HOST not in cmd
+    host_value = cmd[cmd.index("--host") + 1]
+    assert host_value == "127.0.0.1"
+    # Everything else from the Procfile line survives, in order, plus --reload.
+    assert cmd == [
+        "uv",
+        "run",
+        "python",
+        "-m",
+        "uvicorn",
+        "api.index:app",
+        "--host",
+        "127.0.0.1",
+        "--port",
+        "3000",
+        "--reload",
+    ]
+
+
+def test_dev_command_does_not_mutate_caller_argv():
+    web = list(_PROCFILE_WEB)
+    devserver.dev_command(Path("/proj"), web, use_uv=False)
+    assert web == _PROCFILE_WEB
+
+
+def test_dev_command_explicit_host_passes_through():
+    # `aai dev --host 0.0.0.0` is the deliberate opt-in to LAN exposure.
+    cmd = devserver.dev_command(Path("/proj"), list(_PROCFILE_WEB), use_uv=True, host=WILDCARD_HOST)
+    assert cmd[cmd.index("--host") + 1] == WILDCARD_HOST
+
+
+def test_override_host_handles_equals_form():
+    argv = devserver._override_host(
+        ["uvicorn", "app", f"--host={WILDCARD_HOST}", "--port", "1"], "127.0.0.1"
+    )
+    assert argv == ["uvicorn", "app", "--host=127.0.0.1", "--port", "1"]
+
+
+def test_override_host_tolerates_trailing_host_flag():
+    # A malformed line ending in a bare `--host` has no value to rewrite; the
+    # override appends an explicit bind instead of reading past the end.
+    argv = devserver._override_host(["uvicorn", "app", "--host"], "127.0.0.1")
+    assert argv == ["uvicorn", "app", "--host", "--host", "127.0.0.1"]
+
+
+def test_override_host_appends_when_absent():
+    # A Procfile line with no --host would otherwise rely on uvicorn's default;
+    # the bind is made explicit so the printed URL always matches reality.
+    argv = devserver._override_host(["uvicorn", "app"], "127.0.0.1")
+    assert argv == ["uvicorn", "app", "--host", "127.0.0.1"]
+
+
+def test_local_host_constant_is_loopback():
+    assert devserver.LOCAL_HOST == "127.0.0.1"
diff --git a/tests/test_doctor.py b/tests/test_doctor.py
index d14414d0..b78060c0 100644
--- a/tests/test_doctor.py
+++ b/tests/test_doctor.py
@@ -107,13 +107,44 @@ def test_doctor_coding_agent_missing_warns(healthy, monkeypatch):
 
 def test_doctor_json_shape(healthy):
     payload = json.loads(runner.invoke(app, ["doctor", "--json"]).output)
-    assert set(payload) == {"ok", "checks"}
+    assert set(payload) == {"ok", "profile", "environment", "checks"}
     names = [c["name"] for c in payload["checks"]]
     assert names == ["python", "api-key", "ffmpeg", "audio", "coding-agent"]
     for c in payload["checks"]:
         assert set(c) == {"name", "status", "affects", "detail", "fix"}
 
 
+def test_doctor_json_reports_profile_and_environment(healthy):
+    payload = json.loads(runner.invoke(app, ["doctor", "--json"]).output)
+    assert payload["profile"] == "default"
+    assert payload["environment"] == "production"
+
+
+def test_doctor_json_reports_selected_env_and_profile(healthy):
+    payload = json.loads(
+        runner.invoke(app, ["--env", "sandbox000", "-p", "default", "doctor", "--json"]).output
+    )
+    assert payload["environment"] == "sandbox000"
+    assert payload["profile"] == "default"
+
+
+def test_doctor_network_fix_names_active_env_host(healthy, monkeypatch):
+    # Under --sandbox the fix must point at the sandbox API host, not hardcode
+    # api.assemblyai.com (which being reachable wouldn't help a sandbox user).
+    def boom(_key):
+        raise APIError("Network error contacting AssemblyAI: timeout")
+
+    monkeypatch.setattr("aai_cli.commands.doctor.client.validate_key", boom)
+    result = runner.invoke(app, ["--env", "sandbox000", "doctor", "--json"])
+    fix = _checks(result)["api-key"]["fix"]
+    assert "that api.sandbox000.assemblyai-labs.com is reachable" in fix
+    assert "api.assemblyai.com" not in fix
+    assert "https://" not in fix  # the scheme is stripped: it's a host, not a URL
+
+    prod = runner.invoke(app, ["doctor", "--json"])
+    assert "that api.assemblyai.com is reachable" in _checks(prod)["api-key"]["fix"]
+
+
 def test_doctor_human_output_renders(healthy):
     # Force human mode by asking explicitly (default would be JSON under the test runner).
     result = runner.invoke(app, ["doctor"], env={"NO_COLOR": "1"})
@@ -166,6 +197,8 @@ def query_devices(self):
 def test_render_ok_payload_shows_ready() -> None:
     payload: doctor.DoctorResult = {
         "ok": True,
+        "profile": "default",
+        "environment": "production",
         "checks": [
             {"name": "python", "status": "ok", "affects": [], "detail": "3.12", "fix": None}
         ],
@@ -175,9 +208,47 @@ def test_render_ok_payload_shows_ready() -> None:
     assert "Everything looks good." in text
 
 
+def test_render_reports_profile_and_environment_line() -> None:
+    payload: doctor.DoctorResult = {
+        "ok": True,
+        "profile": "staging",
+        "environment": "sandbox000",
+        "checks": [
+            {"name": "python", "status": "ok", "affects": [], "detail": "3.12", "fix": None}
+        ],
+    }
+    text = doctor.render(payload)
+    assert "profile: staging" in text
+    assert "environment: sandbox000" in text
+
+
+def test_render_omits_profile_line_for_partial_payloads() -> None:
+    # The onboarding wizard reuses render for a quick environment check with no
+    # profile/environment context — no half-empty "profile:" line may appear.
+    payload: doctor.DoctorResult = {
+        "ok": True,
+        "checks": [
+            {"name": "python", "status": "ok", "affects": [], "detail": "3.12", "fix": None}
+        ],
+    }
+    text = doctor.render(payload)
+    assert "profile:" not in text
+    assert "environment:" not in text
+
+
+def test_doctor_human_output_shows_profile_and_environment(healthy, monkeypatch):
+    monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
+    result = runner.invoke(app, ["doctor"])
+    assert result.exit_code == 0, result.output
+    assert "profile: default" in result.output
+    assert "environment: production" in result.output
+
+
 def test_render_problem_payload_shows_fix_and_problem_banner() -> None:
     payload: doctor.DoctorResult = {
         "ok": False,
+        "profile": "default",
+        "environment": "production",
         "checks": [
             {
                 "name": "api-key",
diff --git a/tests/test_environments.py b/tests/test_environments.py
index d4a36026..dd1b2c56 100644
--- a/tests/test_environments.py
+++ b/tests/test_environments.py
@@ -23,7 +23,11 @@ def test_get_unknown_raises_cli_error():
     assert exc.value.exit_code == 2
     # Carries a recovery hint covering all three sources of the name (flag/env/profile).
     assert exc.value.suggestion is not None
+    assert "--env" in exc.value.suggestion
     assert "unset AAI_ENV" in exc.value.suggestion
+    # A bad env stored in the profile makes every command fail, and neither --env nor
+    # AAI_ENV is the fix there — the hint must point at the profile's config.toml too.
+    assert "profile's stored env in config.toml" in exc.value.suggestion
 
 
 def test_resolve_precedence(monkeypatch):
diff --git a/tests/test_init_command.py b/tests/test_init_command.py
index ce3698da..bf2ac71c 100644
--- a/tests/test_init_command.py
+++ b/tests/test_init_command.py
@@ -143,7 +143,50 @@ def test_init_prints_cli_banner_in_human_mode(tmp_path, monkeypatch):
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
     result = runner.invoke(app, ["init", TEMPLATE, "x", "--no-install"])
     assert result.exit_code == 0, result.output
-    assert "AssemblyAI CLI" in result.output
+    # Decoration goes to stderr (data → stdout), so a piped stdout never sees it.
+    assert "AssemblyAI CLI" in result.stderr
+    assert "AssemblyAI CLI" not in result.stdout
+
+
+def test_init_banner_stays_off_stdout_on_error_paths(tmp_path, monkeypatch):
+    # The banner prints before template validation; an error run must still leave
+    # stdout empty (errors + banner are both stderr-only in human mode).
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
+    result = runner.invoke(app, ["init", "nope", "x", "--no-install"])
+    assert result.exit_code == 1
+    assert "AssemblyAI CLI" in result.stderr
+    assert result.stdout == ""
+
+
+def test_init_help_enumerates_template_names():
+    import re
+
+    from aai_cli.init import templates
+
+    result = runner.invoke(app, ["init", "--help"])
+    assert result.exit_code == 0
+    # Strip ANSI (CI forces color) and unwrap lines before matching. Every template
+    # name from the registry must be visible (live-captions appears nowhere else in
+    # the help, so this pins the enumeration, not the Examples epilog).
+    flat = " ".join(re.sub(r"\x1b\[[0-9;]*m", "", result.output).split())
+    for name in templates.TEMPLATE_ORDER:
+        assert name in flat
+
+
+def test_init_template_arg_help_is_derived_from_registry():
+    # The exact help string, render-independent: derived from TEMPLATE_ORDER so the
+    # enumeration can never drift from the templates that actually ship.
+    import inspect
+
+    from typer.models import ArgumentInfo
+
+    default = inspect.signature(init_cmd.init).parameters["template"].default
+    assert isinstance(default, ArgumentInfo)
+    assert default.help == (
+        "Template to scaffold: audio-transcription, live-captions, voice-agent "
+        "(omit to pick interactively)."
+    )
 
 
 def test_init_here_scaffolds_into_cwd(tmp_path, monkeypatch):
diff --git a/tests/test_procfile.py b/tests/test_procfile.py
index d8270b4c..dfeffd10 100644
--- a/tests/test_procfile.py
+++ b/tests/test_procfile.py
@@ -58,3 +58,17 @@ def test_web_argv_raises_without_web_line(tmp_path):
 def test_web_argv_raises_on_empty_web_command(tmp_path):
     with pytest.raises(CLIError):
         procfile.web_argv(_write(tmp_path, "web:\n"), env={})
+
+
+def test_require_procfile_returns_path_when_present(tmp_path):
+    assert procfile.require_procfile(_write(tmp_path, WEB)) == tmp_path / "Procfile"
+
+
+def test_require_procfile_raises_the_standard_not_a_project_error(tmp_path):
+    # dev/share/deploy all share this guard, so the message must stay actionable.
+    with pytest.raises(CLIError) as exc:
+        procfile.require_procfile(tmp_path)
+    assert exc.value.error_type == "usage_error"
+    assert exc.value.exit_code == 1
+    assert "No Procfile here (expected ./Procfile)" in exc.value.message
+    assert "aai init" in exc.value.message
diff --git a/tests/test_setup.py b/tests/test_setup.py
index d33450de..898e41af 100644
--- a/tests/test_setup.py
+++ b/tests/test_setup.py
@@ -188,6 +188,18 @@ def test_setup_help_lists_all_subcommands():
     assert "remove" in result.output
 
 
+def test_setup_help_install_summary_is_a_complete_sentence():
+    # The panel shows install's docstring first line; it used to be cut mid-sentence
+    # at a colon ("…by installing three things:"). Pin a standalone summary.
+    import re
+
+    result = runner.invoke(app, ["setup", "--help"])
+    # Strip ANSI (CI forces color) and unwrap lines before matching.
+    flat = " ".join(re.sub(r"\x1b\[[0-9;]*m", "", result.output).split())
+    assert "Set up your coding agent for AssemblyAI (docs MCP server + skills)." in flat
+    assert "three things:" not in flat
+
+
 def test_setup_no_subcommand_lists_commands():
     # Bare `aai setup` should show its commands instead of "Missing command".
     result = runner.invoke(app, ["setup"])
diff --git a/tests/test_share.py b/tests/test_share.py
index bcd776b5..e914d44d 100644
--- a/tests/test_share.py
+++ b/tests/test_share.py
@@ -72,15 +72,68 @@ def test_share_prints_public_url(tmp_path, monkeypatch):
     assert server.terminated is False
 
 
-def test_share_missing_cloudflared_errors(tmp_path, monkeypatch):
+def test_share_missing_cloudflared_errors_with_brew_hint_on_macos(tmp_path, monkeypatch):
     monkeypatch.chdir(tmp_path)
     _make_project(tmp_path)
+    monkeypatch.setattr("sys.platform", "darwin")
     _stub(monkeypatch, has_cloudflared=False)
     result = runner.invoke(app, ["share"])
     assert result.exit_code == 1
     assert "brew install cloudflared" in result.output
 
 
+def test_share_missing_cloudflared_errors_with_docs_url_on_linux(tmp_path, monkeypatch):
+    # brew is useless advice off macOS; Linux gets Cloudflare's official install docs.
+    monkeypatch.chdir(tmp_path)
+    _make_project(tmp_path)
+    monkeypatch.setattr("sys.platform", "linux")
+    _stub(monkeypatch, has_cloudflared=False)
+    result = runner.invoke(app, ["share"])
+    assert result.exit_code == 1
+    # Rich wraps the long URL mid-token, so compare with all whitespace removed.
+    packed = "".join(result.output.split())
+    assert (
+        "https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/downloads/"
+        in packed
+    )
+    assert "brewinstallcloudflared" not in packed
+
+
+def test_cloudflared_install_hint_per_platform(monkeypatch):
+    from aai_cli.commands import share as share_cmd
+
+    monkeypatch.setattr("sys.platform", "darwin")
+    assert share_cmd._cloudflared_install_hint() == "Install it: brew install cloudflared"
+    monkeypatch.setattr("sys.platform", "linux")
+    assert share_cmd._cloudflared_install_hint() == (
+        "Install it: "
+        "https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/downloads/"
+    )
+
+
+def test_share_binds_loopback_not_procfile_wildcard(tmp_path, monkeypatch):
+    # share serves the LAN-facing side through cloudflared only; the local server
+    # itself must bind loopback, not the Procfile's 0.0.0.0.
+    monkeypatch.chdir(tmp_path)
+    _make_project(tmp_path)
+    server, proxy = _stub(monkeypatch)
+    seq = iter([server, proxy])
+    commands = []
+
+    def spawn(command, **kwargs):
+        commands.append(command)
+        return next(seq)
+
+    monkeypatch.setattr("aai_cli.init.runner.spawn", spawn)
+    result = runner.invoke(app, ["share"])
+    assert result.exit_code == 0, result.output
+    dev_cmd = commands[0]
+    assert dev_cmd[dev_cmd.index("--host") + 1] == "127.0.0.1"
+    # The Procfile's wildcard bind must not survive into the local server command.
+    wildcard_host = WEB.split("--host ")[1].split(maxsplit=1)[0]
+    assert wildcard_host not in dev_cmd
+
+
 def test_share_missing_procfile_errors(tmp_path, monkeypatch):
     monkeypatch.chdir(tmp_path)
     _stub(monkeypatch)

From 8648ba0691cb35c6e7373de04c47d003380343f1 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:00:00 +0000
Subject: [PATCH 05/11] DX: add pytest-timeout and document sandboxed-session
 QA pitfalls

Session lessons baked into AGENTS.md: probe API reachability before
real-API testing (egress proxies often block AssemblyAI hosts), isolate
XDG_CONFIG_HOME per manual test run (concurrent runs stomp the shared
config.toml), keep scratch redirects out of the repo root, and wrap
mic/browser paths in timeouts on headless boxes. pytest-timeout joins
the dev group so a stuck test fails instead of wedging a session.

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 AGENTS.md      | 20 ++++++++++++++++++++
 pyproject.toml |  3 +++
 uv.lock        | 14 ++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index 9846419c..4bc00dc5 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -58,6 +58,26 @@ The post-edit hook (`.claude/settings.json`) runs `ruff check --fix --unfixable
 
 The suite is hermetic by construction, enforced three ways (`tests/conftest.py` + `pyproject.toml` `[tool.pytest.ini_options]`): **pytest-randomly** shuffles order, an autouse `pin_timezone` fixture pins `TZ` to a fixed non-UTC zone (UTC-normalized rendering must be unaffected; use **time-machine** to freeze `now`), and **pytest-socket** (`--disable-socket`) blocks real network so an unmocked SDK/HTTP call fails loudly instead of hitting the API. A test that only binds a loopback server opts back in with the tight `@pytest.mark.allow_hosts(["127.0.0.1"])` (still blocks external hosts). The `e2e`/`install`/`install_script` marker suites legitimately reach the real network in-process (PyPI reachability probes, real-API runs), so a `pytest_collection_modifyitems` hook in `conftest.py` auto-grants them full sockets — adding a network marker is all that's needed, no per-test `enable_socket`.
 
+### Manual QA / running the CLI in sandboxed sessions
+
+Lessons that cost time in agent sessions — read before exercising `uv run aai` by hand:
+
+- **Probe network reachability first.** Remote/sandboxed environments often allowlist
+  PyPI but block `api.assemblyai.com` / `streaming.assemblyai.com` / `llm-gateway.assemblyai.com`
+  (`curl -s https://api.assemblyai.com/v2/transcript -H "authorization: $ASSEMBLYAI_API_KEY"`
+  returning a proxy 403 like "Host not in allowlist" means **no** real-API path can work —
+  test error handling and `--show-code` instead of burning time on happy paths).
+- **Isolate the config dir per test run.** The CLI persists profiles in
+  `platformdirs`-resolved `config.toml` (e.g. `~/.config/assemblyai/`). Concurrent or
+  destructive manual tests (corrupt-config probes, profile/env switches) stomp each other
+  through that shared file — set `XDG_CONFIG_HOME=$(mktemp -d)` per run instead.
+- **Write scratch output to `/tmp`, never the repo root.** Redirects like `cmd > out.txt`
+  in the repo show up as untracked files and trip commit hooks/gates.
+- **Headless boxes have no mic/speakers/browser.** `aai stream`/`aai agent` mic paths and
+  `aai login`'s browser flow can't complete; wrap exploratory runs in `timeout 30 …` so a
+  blocking path can't wedge the session. For pytest, `--timeout N` (pytest-timeout, in the
+  dev group) does the same per-test.
+
 ## Naming & packaging gotchas
 
 - The **package/module** is `aai_cli`; the **distribution** name is `aai-cli`; the **console command** is `aai` (`[project.scripts] aai = "aai_cli.main:run"`).
diff --git a/pyproject.toml b/pyproject.toml
index ca5308f8..8c1071eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -74,6 +74,9 @@ dev = [
     #     tests/conftest.py so time rendering is deterministic across machines.
     # Floors sit at the second-newest release so safe-chain's min-age gate can resolve.
     "pytest-socket>=0.7.0",
+    # Hang guard for agent/CI runs: `pytest --timeout N` turns a stuck test into a
+    # failure instead of a wedged session (not in addopts — opt-in per run).
+    "pytest-timeout>=2.3.1",
     "time-machine>=3.1.0",
     "hypothesis>=6.155.1",
     "ruff>=0.15.15",
diff --git a/uv.lock b/uv.lock
index e9bafbcd..8a247b23 100644
--- a/uv.lock
+++ b/uv.lock
@@ -46,6 +46,7 @@ dev = [
     { name = "pytest-mock" },
     { name = "pytest-randomly" },
     { name = "pytest-socket" },
+    { name = "pytest-timeout" },
     { name = "pytest-xdist" },
     { name = "python-dotenv" },
     { name = "python-multipart" },
@@ -93,6 +94,7 @@ dev = [
     { name = "pytest-mock", specifier = ">=3.14.0" },
     { name = "pytest-randomly", specifier = ">=3.16.0" },
     { name = "pytest-socket", specifier = ">=0.7.0" },
+    { name = "pytest-timeout", specifier = ">=2.3.1" },
     { name = "pytest-xdist", specifier = ">=3.6.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "python-multipart", specifier = ">=0.0.9" },
@@ -1508,6 +1510,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/e8/4a8568580bae3dcd678599ed8e86a82d505a44df71c1ced4246c1aa14b4b/pytest_socket-0.8.0-py3-none-any.whl", hash = "sha256:81821ba59f07d7600fe2b551d8714f40b068bd46e8b6704c48664e9d60cdacb8", size = 8414, upload-time = "2026-05-21T16:50:21.022Z" },
 ]
 
+[[package]]
+name = "pytest-timeout"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" },
+]
+
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"

From 7500cebec38f49b9a981a6df0be16eb4793c7aee Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:00:30 +0000
Subject: [PATCH 06/11] Fix strict-pyright errors: typed empty set, use typer's
 split-stderr runner

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 aai_cli/code_gen/stream.py  | 2 +-
 tests/test_agent_command.py | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/aai_cli/code_gen/stream.py b/aai_cli/code_gen/stream.py
index 91b036aa..fed3a910 100644
--- a/aai_cli/code_gen/stream.py
+++ b/aai_cli/code_gen/stream.py
@@ -255,7 +255,7 @@ def render(
     # Capture/decode rate must match StreamingParameters.sample_rate, else audio is corrupt.
     rate = merged.get("sample_rate", 16000)
     source_stdlib, setup, banner, stream_expr = _source_parts(source, rate)
-    stdlib = {"os"} | source_stdlib | ({"time"} if llm else set())
+    stdlib = {"os"} | source_stdlib | ({"time"} if llm else set[str]())
     stdlib_imports = "\n".join(f"import {name}" for name in sorted(stdlib))
     preamble = _build_preamble(_imports_block(merged), llm, stdlib_imports)
     connect = _build_connect(merged)
diff --git a/tests/test_agent_command.py b/tests/test_agent_command.py
index abe38229..d093fba0 100644
--- a/tests/test_agent_command.py
+++ b/tests/test_agent_command.py
@@ -1,7 +1,5 @@
 import json
 
-import click.testing
-import typer.main
 from typer.testing import CliRunner
 
 from aai_cli import config
@@ -12,8 +10,8 @@
 
 
 def _invoke_split(args):
-    """Invoke with stdout/stderr captured separately (typer's runner always mixes)."""
-    return click.testing.CliRunner(mix_stderr=False).invoke(typer.main.get_command(app), args)
+    """Invoke with stdout/stderr captured separately (typer's runner splits them)."""
+    return runner.invoke(app, args)
 
 
 def _login_result():

From 135354c5b309947083243701b8dabf115fa1cb7c Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:07:59 +0000
Subject: [PATCH 07/11] Split oversized test files to satisfy the 500-line gate

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 tests/test_agent_session.py        | 320 +------------------------
 tests/test_agent_session_run.py    | 369 +++++++++++++++++++++++++++++
 tests/test_client.py               | 249 +------------------
 tests/test_client_streaming.py     | 256 ++++++++++++++++++++
 tests/test_code_gen.py             | 126 +---------
 tests/test_code_gen_fuzz.py        | 158 ++++++++++++
 tests/test_stream_command.py       | 141 +----------
 tests/test_stream_command_flags.py | 146 ++++++++++++
 tests/test_transcribe.py           | 221 +----------------
 tests/test_transcribe_flags.py     | 261 ++++++++++++++++++++
 10 files changed, 1220 insertions(+), 1027 deletions(-)
 create mode 100644 tests/test_agent_session_run.py
 create mode 100644 tests/test_client_streaming.py
 create mode 100644 tests/test_code_gen_fuzz.py
 create mode 100644 tests/test_stream_command_flags.py
 create mode 100644 tests/test_transcribe_flags.py

diff --git a/tests/test_agent_session.py b/tests/test_agent_session.py
index dce45d31..bcb78ebe 100644
--- a/tests/test_agent_session.py
+++ b/tests/test_agent_session.py
@@ -1,18 +1,16 @@
+"""VoiceAgentSession dispatch/gate-level tests.
+
+run_session-level tests (connection lifecycle, error classification) live in
+test_agent_session_run.py.
+"""
+
 import base64
 import json
-import logging
-import types
 
 import pytest
 
-from aai_cli.agent.session import (
-    _WEBSOCKETS_LOGGERS,
-    AgentRunConfig,
-    VoiceAgentSession,
-    _send_audio_loop,
-    run_session,
-)
-from aai_cli.errors import APIError, CLIError, NotAuthenticated
+from aai_cli.agent.session import VoiceAgentSession, _send_audio_loop
+from aai_cli.errors import APIError, CLIError
 
 
 class FakeRenderer:
@@ -210,209 +208,6 @@ def test_send_audio_loop_stops_on_send_error():
     _send_audio_loop(ws, s, [b"\x01\x02", b"\x03\x04"])
 
 
-class _CloseError(Exception):
-    """Mimics websockets.ConnectionClosed carrying a structured close code."""
-
-    def __init__(self, code):
-        super().__init__(f"received {code} (policy violation)")
-        self.code = code
-
-
-def test_run_session_connect_auth_failure_raises_not_authenticated():
-    def bad_connect(url, **kwargs):
-        raise _CloseError(1008)  # Voice Agent rejects a bad key with close 1008
-
-    with pytest.raises(NotAuthenticated):
-        run_session(
-            "sk_bad",
-            renderer=FakeRenderer(),
-            player=FakePlayer(),
-            mic=[],
-            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-            connect=bad_connect,
-        )
-
-
-def test_run_session_mid_stream_1008_raises_not_authenticated():
-    class FakeWS:
-        def send(self, _msg):
-            pass
-
-        def __iter__(self):
-            raise _CloseError(1008)
-
-        def close(self):
-            pass
-
-    player = FakePlayer()
-    with pytest.raises(NotAuthenticated):
-        run_session(
-            "sk_bad",
-            renderer=FakeRenderer(),
-            player=player,
-            mic=[],
-            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-            connect=lambda url, **kwargs: FakeWS(),
-        )
-    assert player.closed is True  # speaker stream still torn down
-
-
-def test_run_session_surfaces_mic_open_failure_from_capture_thread():
-    import threading as _threading
-
-    from aai_cli.errors import CLIError
-
-    class _BoomMic:
-        def __iter__(self):
-            raise CLIError("no microphone", error_type="mic_error", exit_code=1)
-
-    class _BlockingWS:
-        def __init__(self):
-            self._closed = _threading.Event()
-
-        def send(self, _msg):
-            pass
-
-        def __iter__(self):
-            self._closed.wait(timeout=2)  # unblocked when the capture thread closes us
-            return iter(())
-
-        def close(self):
-            self._closed.set()
-
-    with pytest.raises(CLIError) as exc:
-        run_session(
-            "sk_live",
-            renderer=FakeRenderer(),
-            player=FakePlayer(),
-            mic=_BoomMic(),
-            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-            connect=lambda url, **kwargs: _BlockingWS(),
-        )
-    assert exc.value.exit_code == 1  # the real mic failure reaches the user, not a hang
-
-
-def test_run_session_does_not_close_player_that_failed_to_open():
-    # If opening the speaker stream raises, the cleanup must NOT call close() on a
-    # player that never started (pins the player_started=False initializer).
-    class _FailingPlayer(FakePlayer):
-        def start(self):
-            raise CLIError("speaker busy", error_type="audio_output_error", exit_code=1)
-
-    player = _FailingPlayer()
-    with pytest.raises(CLIError):
-        run_session(
-            "sk",
-            renderer=FakeRenderer(),
-            player=player,
-            mic=[],
-            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-            connect=lambda url, **kwargs: _RecordingWS(),
-        )
-    assert player.closed is False  # never opened, so never closed
-
-
-class _HandshakeRejected(Exception):
-    """Mimics websockets' InvalidStatus: a structured HTTP status on ``.response``."""
-
-    def __init__(self, status):
-        super().__init__(f"server rejected WebSocket connection: HTTP {status}")
-        self.response = types.SimpleNamespace(status_code=status)
-
-
-def _run_with_connect(connect):
-    run_session(
-        "sk",
-        renderer=FakeRenderer(),
-        player=FakePlayer(),
-        mic=[],
-        config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-        connect=connect,
-    )
-
-
-def test_run_session_handshake_403_is_api_error_like_stream():
-    # Harmonized with `stream`: a plain handshake 403 is an API error (exit 1), not
-    # "Your API key was rejected" — 403 also covers non-credential blocks.
-    def reject(url, **kwargs):
-        raise _HandshakeRejected(403)
-
-    with pytest.raises(APIError) as exc:
-        _run_with_connect(reject)
-    assert exc.value.error_type == "api_error"
-    assert exc.value.exit_code == 1
-    assert "HTTP 403" in exc.value.message
-
-
-def test_run_session_handshake_401_is_still_auth_failure():
-    # A genuinely auth-shaped rejection (HTTP 401) keeps the rejected-key path.
-    def reject(url, **kwargs):
-        raise _HandshakeRejected(401)
-
-    with pytest.raises(NotAuthenticated) as exc:
-        _run_with_connect(reject)
-    assert exc.value.exit_code == 4
-
-
-def test_run_session_auth_worded_failure_is_still_auth_failure():
-    # The text heuristic ("unauthorized" etc.) keeps working for real bad keys.
-    def reject(url, **kwargs):
-        raise RuntimeError("connection rejected: Unauthorized")
-
-    with pytest.raises(NotAuthenticated):
-        _run_with_connect(reject)
-
-
-class _CleanWS:
-    def send(self, _msg):
-        pass
-
-    def __iter__(self):
-        return iter(())
-
-    def close(self):
-        pass
-
-
-def test_run_session_silences_websockets_loggers():
-    # websockets' sync reader thread logs teardown errors (EOFError tracebacks) via
-    # its own loggers; run_session must mute them so they never hit the user's stderr.
-    loggers = [logging.getLogger(name) for name in _WEBSOCKETS_LOGGERS]
-    previous = [lg.level for lg in loggers]
-    try:
-        for lg in loggers:
-            lg.setLevel(logging.NOTSET)
-        _run_with_connect(lambda url, **kwargs: _CleanWS())
-        for lg in loggers:
-            assert lg.level == logging.CRITICAL
-            assert not lg.isEnabledFor(logging.ERROR)  # an ERROR record is dropped
-    finally:
-        for lg, level in zip(loggers, previous, strict=True):
-            lg.setLevel(level)
-
-
-def test_websockets_logger_names_cover_the_sync_client():
-    # The sync client logs through "websockets.client"; pin that the silenced set
-    # covers it (and the parent, for any future child loggers).
-    assert "websockets.client" in _WEBSOCKETS_LOGGERS
-    assert "websockets" in _WEBSOCKETS_LOGGERS
-
-
-def test_run_session_non_auth_failure_stays_api_error():
-    def boom(url, **kwargs):
-        raise RuntimeError("network unreachable")
-
-    with pytest.raises(APIError):
-        run_session(
-            "sk",
-            renderer=FakeRenderer(),
-            player=FakePlayer(),
-            mic=[],
-            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-            connect=boom,
-        )
-
-
 def test_full_duplex_reply_started_announces_without_muting():
     s = _session(full_duplex=True)
     s.dispatch({"type": "session.ready"})
@@ -471,102 +266,3 @@ def test_send_audio_loop_waits_for_ready_event_before_streaming():
     ws = _RecordingWS()
     _send_audio_loop(ws, s, [b"\x01\x02"])
     assert len(ws.sent) == 1  # frame forwarded once the gate is open
-
-
-def test_run_session_file_driven_stops_after_reply():
-    """A file-driven session ends on its own after the agent replies (no hang)."""
-
-    class _ScriptedWS:
-        def __init__(self):
-            self.sent = []
-
-        def send(self, msg):
-            self.sent.append(msg)
-
-        def __iter__(self):
-            return iter(
-                json.dumps(e)
-                for e in (
-                    {"type": "session.ready"},
-                    {"type": "transcript.user", "text": "what time is it"},
-                    {"type": "transcript.agent", "text": "it is noon", "interrupted": False},
-                    {"type": "reply.done"},
-                    # A trailing event the loop must never reach (it should have stopped).
-                    {"type": "transcript.user", "text": "SHOULD NOT BE SEEN"},
-                )
-            )
-
-        def close(self):
-            pass
-
-    renderer = FakeRenderer()
-    run_session(
-        "sk_live",
-        renderer=renderer,
-        player=FakePlayer(),
-        mic=[],  # capture thread waits for ready, then this empty source ends at once
-        config=AgentRunConfig(
-            voice="ivy",
-            system_prompt="x",
-            greeting="",
-            full_duplex=True,
-            exit_after_reply=True,
-        ),
-        connect=lambda url, **kwargs: _ScriptedWS(),
-    )
-    finals = [c for c in renderer.calls if c[0] == "user_final"]
-    assert ("user_final", "what time is it") in finals
-    assert ("user_final", "SHOULD NOT BE SEEN") not in finals  # stopped after the reply
-
-
-def test_run_session_ws_url_follows_active_environment() -> None:
-    # The Voice Agent socket must target the active environment's host, not a
-    # hardcoded production URL. Capture the URL connect() is handed, short-
-    # circuiting with a benign close once we've seen it.
-    from aai_cli import environments
-
-    seen: dict[str, str] = {}
-
-    def capture(url, **kwargs):
-        seen["url"] = url
-        raise _CloseError(1008)
-
-    for env_name, expected in (
-        ("sandbox000", "wss://agents.sandbox000.assemblyai-labs.com/v1/ws"),
-        ("production", "wss://agents.assemblyai.com/v1/ws"),
-    ):
-        environments.set_active(environments.get(env_name))
-        with pytest.raises(NotAuthenticated):
-            run_session(
-                "sk",
-                renderer=FakeRenderer(),
-                player=FakePlayer(),
-                mic=[],
-                config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-                connect=capture,
-            )
-        assert seen["url"] == expected
-
-
-def test_run_session_defaults_to_websockets_sync_connect(monkeypatch):
-    # With no injected connect, run_session lazily imports websockets' sync client
-    # (pins the `connect is None` default-import branch). Patch the import target so
-    # no real socket is opened; an empty message stream ends the loop immediately.
-    class _CleanWS:
-        def send(self, _msg):
-            pass
-
-        def __iter__(self):
-            return iter(())
-
-        def close(self):
-            pass
-
-    monkeypatch.setattr("websockets.sync.client.connect", lambda url, **kwargs: _CleanWS())
-    run_session(
-        "sk_live",
-        renderer=FakeRenderer(),
-        player=FakePlayer(),
-        mic=[],
-        config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
-    )
diff --git a/tests/test_agent_session_run.py b/tests/test_agent_session_run.py
new file mode 100644
index 00000000..2889064a
--- /dev/null
+++ b/tests/test_agent_session_run.py
@@ -0,0 +1,369 @@
+"""run_session-level tests: connection lifecycle, error classification, env URLs.
+
+Dispatch/gate-level VoiceAgentSession tests live in test_agent_session.py.
+"""
+
+import json
+import logging
+import types
+
+import pytest
+
+from aai_cli.agent.session import _WEBSOCKETS_LOGGERS, AgentRunConfig, run_session
+from aai_cli.errors import APIError, CLIError, NotAuthenticated
+
+
+class FakeRenderer:
+    def __init__(self):
+        self.calls = []
+
+    def connected(self):
+        self.calls.append(("connected",))
+
+    def user_partial(self, text):
+        self.calls.append(("user_partial", text))
+
+    def user_final(self, text):
+        self.calls.append(("user_final", text))
+
+    def reply_started(self):
+        self.calls.append(("reply_started",))
+
+    def agent_transcript(self, text, *, interrupted):
+        self.calls.append(("agent_transcript", text, interrupted))
+
+    def reply_done(self, *, interrupted):
+        self.calls.append(("reply_done", interrupted))
+
+
+class FakePlayer:
+    def __init__(self):
+        self.enqueued = []
+        self.flushed = 0
+        self.started = False
+        self.closed = False
+
+    def enqueue(self, pcm):
+        self.enqueued.append(pcm)
+
+    def flush(self):
+        self.flushed += 1
+
+    def start(self):
+        self.started = True
+
+    def close(self):
+        self.closed = True
+
+
+class _RecordingWS:
+    def __init__(self, fail_on_send=False):
+        self.sent = []
+        self.fail_on_send = fail_on_send
+
+    def send(self, msg):
+        if self.fail_on_send:
+            raise RuntimeError("socket closed")
+        self.sent.append(msg)
+
+
+class _CloseError(Exception):
+    """Mimics websockets.ConnectionClosed carrying a structured close code."""
+
+    def __init__(self, code):
+        super().__init__(f"received {code} (policy violation)")
+        self.code = code
+
+
+def test_run_session_connect_auth_failure_raises_not_authenticated():
+    def bad_connect(url, **kwargs):
+        raise _CloseError(1008)  # Voice Agent rejects a bad key with close 1008
+
+    with pytest.raises(NotAuthenticated):
+        run_session(
+            "sk_bad",
+            renderer=FakeRenderer(),
+            player=FakePlayer(),
+            mic=[],
+            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+            connect=bad_connect,
+        )
+
+
+def test_run_session_mid_stream_1008_raises_not_authenticated():
+    class FakeWS:
+        def send(self, _msg):
+            pass
+
+        def __iter__(self):
+            raise _CloseError(1008)
+
+        def close(self):
+            pass
+
+    player = FakePlayer()
+    with pytest.raises(NotAuthenticated):
+        run_session(
+            "sk_bad",
+            renderer=FakeRenderer(),
+            player=player,
+            mic=[],
+            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+            connect=lambda url, **kwargs: FakeWS(),
+        )
+    assert player.closed is True  # speaker stream still torn down
+
+
+def test_run_session_surfaces_mic_open_failure_from_capture_thread():
+    import threading as _threading
+
+    from aai_cli.errors import CLIError
+
+    class _BoomMic:
+        def __iter__(self):
+            raise CLIError("no microphone", error_type="mic_error", exit_code=1)
+
+    class _BlockingWS:
+        def __init__(self):
+            self._closed = _threading.Event()
+
+        def send(self, _msg):
+            pass
+
+        def __iter__(self):
+            self._closed.wait(timeout=2)  # unblocked when the capture thread closes us
+            return iter(())
+
+        def close(self):
+            self._closed.set()
+
+    with pytest.raises(CLIError) as exc:
+        run_session(
+            "sk_live",
+            renderer=FakeRenderer(),
+            player=FakePlayer(),
+            mic=_BoomMic(),
+            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+            connect=lambda url, **kwargs: _BlockingWS(),
+        )
+    assert exc.value.exit_code == 1  # the real mic failure reaches the user, not a hang
+
+
+def test_run_session_does_not_close_player_that_failed_to_open():
+    # If opening the speaker stream raises, the cleanup must NOT call close() on a
+    # player that never started (pins the player_started=False initializer).
+    class _FailingPlayer(FakePlayer):
+        def start(self):
+            raise CLIError("speaker busy", error_type="audio_output_error", exit_code=1)
+
+    player = _FailingPlayer()
+    with pytest.raises(CLIError):
+        run_session(
+            "sk",
+            renderer=FakeRenderer(),
+            player=player,
+            mic=[],
+            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+            connect=lambda url, **kwargs: _RecordingWS(),
+        )
+    assert player.closed is False  # never opened, so never closed
+
+
+class _HandshakeRejected(Exception):
+    """Mimics websockets' InvalidStatus: a structured HTTP status on ``.response``."""
+
+    def __init__(self, status):
+        super().__init__(f"server rejected WebSocket connection: HTTP {status}")
+        self.response = types.SimpleNamespace(status_code=status)
+
+
+def _run_with_connect(connect):
+    run_session(
+        "sk",
+        renderer=FakeRenderer(),
+        player=FakePlayer(),
+        mic=[],
+        config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+        connect=connect,
+    )
+
+
+def test_run_session_handshake_403_is_api_error_like_stream():
+    # Harmonized with `stream`: a plain handshake 403 is an API error (exit 1), not
+    # "Your API key was rejected" — 403 also covers non-credential blocks.
+    def reject(url, **kwargs):
+        raise _HandshakeRejected(403)
+
+    with pytest.raises(APIError) as exc:
+        _run_with_connect(reject)
+    assert exc.value.error_type == "api_error"
+    assert exc.value.exit_code == 1
+    assert "HTTP 403" in exc.value.message
+
+
+def test_run_session_handshake_401_is_still_auth_failure():
+    # A genuinely auth-shaped rejection (HTTP 401) keeps the rejected-key path.
+    def reject(url, **kwargs):
+        raise _HandshakeRejected(401)
+
+    with pytest.raises(NotAuthenticated) as exc:
+        _run_with_connect(reject)
+    assert exc.value.exit_code == 4
+
+
+def test_run_session_auth_worded_failure_is_still_auth_failure():
+    # The text heuristic ("unauthorized" etc.) keeps working for real bad keys.
+    def reject(url, **kwargs):
+        raise RuntimeError("connection rejected: Unauthorized")
+
+    with pytest.raises(NotAuthenticated):
+        _run_with_connect(reject)
+
+
+class _CleanWS:
+    def send(self, _msg):
+        pass
+
+    def __iter__(self):
+        return iter(())
+
+    def close(self):
+        pass
+
+
+def test_run_session_silences_websockets_loggers():
+    # websockets' sync reader thread logs teardown errors (EOFError tracebacks) via
+    # its own loggers; run_session must mute them so they never hit the user's stderr.
+    loggers = [logging.getLogger(name) for name in _WEBSOCKETS_LOGGERS]
+    previous = [lg.level for lg in loggers]
+    try:
+        for lg in loggers:
+            lg.setLevel(logging.NOTSET)
+        _run_with_connect(lambda url, **kwargs: _CleanWS())
+        for lg in loggers:
+            assert lg.level == logging.CRITICAL
+            assert not lg.isEnabledFor(logging.ERROR)  # an ERROR record is dropped
+    finally:
+        for lg, level in zip(loggers, previous, strict=True):
+            lg.setLevel(level)
+
+
+def test_websockets_logger_names_cover_the_sync_client():
+    # The sync client logs through "websockets.client"; pin that the silenced set
+    # covers it (and the parent, for any future child loggers).
+    assert "websockets.client" in _WEBSOCKETS_LOGGERS
+    assert "websockets" in _WEBSOCKETS_LOGGERS
+
+
+def test_run_session_non_auth_failure_stays_api_error():
+    def boom(url, **kwargs):
+        raise RuntimeError("network unreachable")
+
+    with pytest.raises(APIError):
+        run_session(
+            "sk",
+            renderer=FakeRenderer(),
+            player=FakePlayer(),
+            mic=[],
+            config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+            connect=boom,
+        )
+
+
+def test_run_session_file_driven_stops_after_reply():
+    """A file-driven session ends on its own after the agent replies (no hang)."""
+
+    class _ScriptedWS:
+        def __init__(self):
+            self.sent = []
+
+        def send(self, msg):
+            self.sent.append(msg)
+
+        def __iter__(self):
+            return iter(
+                json.dumps(e)
+                for e in (
+                    {"type": "session.ready"},
+                    {"type": "transcript.user", "text": "what time is it"},
+                    {"type": "transcript.agent", "text": "it is noon", "interrupted": False},
+                    {"type": "reply.done"},
+                    # A trailing event the loop must never reach (it should have stopped).
+                    {"type": "transcript.user", "text": "SHOULD NOT BE SEEN"},
+                )
+            )
+
+        def close(self):
+            pass
+
+    renderer = FakeRenderer()
+    run_session(
+        "sk_live",
+        renderer=renderer,
+        player=FakePlayer(),
+        mic=[],  # capture thread waits for ready, then this empty source ends at once
+        config=AgentRunConfig(
+            voice="ivy",
+            system_prompt="x",
+            greeting="",
+            full_duplex=True,
+            exit_after_reply=True,
+        ),
+        connect=lambda url, **kwargs: _ScriptedWS(),
+    )
+    finals = [c for c in renderer.calls if c[0] == "user_final"]
+    assert ("user_final", "what time is it") in finals
+    assert ("user_final", "SHOULD NOT BE SEEN") not in finals  # stopped after the reply
+
+
+def test_run_session_ws_url_follows_active_environment() -> None:
+    # The Voice Agent socket must target the active environment's host, not a
+    # hardcoded production URL. Capture the URL connect() is handed, short-
+    # circuiting with a benign close once we've seen it.
+    from aai_cli import environments
+
+    seen: dict[str, str] = {}
+
+    def capture(url, **kwargs):
+        seen["url"] = url
+        raise _CloseError(1008)
+
+    for env_name, expected in (
+        ("sandbox000", "wss://agents.sandbox000.assemblyai-labs.com/v1/ws"),
+        ("production", "wss://agents.assemblyai.com/v1/ws"),
+    ):
+        environments.set_active(environments.get(env_name))
+        with pytest.raises(NotAuthenticated):
+            run_session(
+                "sk",
+                renderer=FakeRenderer(),
+                player=FakePlayer(),
+                mic=[],
+                config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+                connect=capture,
+            )
+        assert seen["url"] == expected
+
+
+def test_run_session_defaults_to_websockets_sync_connect(monkeypatch):
+    # With no injected connect, run_session lazily imports websockets' sync client
+    # (pins the `connect is None` default-import branch). Patch the import target so
+    # no real socket is opened; an empty message stream ends the loop immediately.
+    class _DefaultCleanWS:
+        def send(self, _msg):
+            pass
+
+        def __iter__(self):
+            return iter(())
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr("websockets.sync.client.connect", lambda url, **kwargs: _DefaultCleanWS())
+    run_session(
+        "sk_live",
+        renderer=FakeRenderer(),
+        player=FakePlayer(),
+        mic=[],
+        config=AgentRunConfig(voice="ivy", system_prompt="x", greeting="hi"),
+    )
diff --git a/tests/test_client.py b/tests/test_client.py
index 434701f2..18f7b3e7 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,4 +1,7 @@
-import types as _types
+"""Non-streaming client wrapper tests (transcribe, list, validate, fields).
+
+client.stream_audio tests live in test_client_streaming.py.
+"""
 
 import assemblyai as aai
 import pytest
@@ -7,16 +10,6 @@
 from aai_cli.errors import APIError
 
 
-def _stream_params(sample_rate: int = 16000):
-    from assemblyai.streaming.v3 import SpeechModel, StreamingParameters
-
-    return StreamingParameters(
-        sample_rate=sample_rate,
-        format_turns=True,
-        speech_model=SpeechModel.universal_streaming_multilingual,
-    )
-
-
 def test_validate_key_true_on_success(mocker):
     T = mocker.patch.object(client.aai, "Transcriber", autospec=True)
     T.return_value.list_transcripts.return_value = mocker.MagicMock()
@@ -285,183 +278,6 @@ def test_transcribe_auth_error_becomes_not_authenticated(mocker):
         client.transcribe("sk_bad", "audio.mp3", config=aai.TranscriptionConfig())
 
 
-class _FakeStreamingClient:
-    last: "_FakeStreamingClient | None" = None
-
-    def __init__(self, options):
-        self.handlers = {}
-        self.connected = False
-        self.disconnected = False
-        _FakeStreamingClient.last = self
-
-    def on(self, event, handler):
-        self.handlers[event] = handler
-
-    def connect(self, params):
-        self.connected = True
-        self.params = params
-
-    def stream(self, source):
-        from assemblyai.streaming.v3 import StreamingEvents
-
-        self.handlers[StreamingEvents.Turn](
-            self, _types.SimpleNamespace(transcript="hi", end_of_turn=True)
-        )
-
-    def disconnect(self, terminate=False):
-        self.disconnected = True
-        self.terminate = terminate
-
-
-def test_stream_audio_wires_handlers_and_streams(monkeypatch):
-    monkeypatch.setattr(client, "StreamingClient", _FakeStreamingClient)
-    turns = []
-    begins = []
-    client.stream_audio(
-        "sk",
-        [b"\x00"],
-        params=_stream_params(),
-        on_begin=lambda e: begins.append(e),
-        on_turn=lambda e: turns.append(e.transcript),
-    )
-    assert turns == ["hi"]
-    assert begins == []
-    last = _FakeStreamingClient.last
-    assert last is not None
-    assert last.connected
-    assert last.disconnected  # disconnected in finally
-    assert last.params.sample_rate == 16000
-    assert last.params.format_turns is True
-    assert last.terminate is True  # graceful flush requested
-
-
-def test_stream_audio_registers_begin_handler_when_provided(monkeypatch):
-    # A provided on_begin must actually be wired to the Begin event (pins
-    # `if on_begin is not None`); inverting it would leave Begin unhandled.
-    class BeginClient(_FakeStreamingClient):
-        def stream(self, source):
-            from assemblyai.streaming.v3 import StreamingEvents
-
-            self.handlers[StreamingEvents.Begin](self, _types.SimpleNamespace(id="sess_1"))
-
-    monkeypatch.setattr(client, "StreamingClient", BeginClient)
-    begins = []
-    client.stream_audio(
-        "sk",
-        [b"\x00"],
-        params=_stream_params(),
-        on_begin=lambda e: begins.append(e.id),
-    )
-    assert begins == ["sess_1"]
-
-
-def test_stream_audio_raises_on_error_event(monkeypatch):
-    class ErrClient(_FakeStreamingClient):
-        def stream(self, source):
-            from assemblyai.streaming.v3 import StreamingEvents
-
-            self.handlers[StreamingEvents.Error](self, "boom")
-
-    monkeypatch.setattr(client, "StreamingClient", ErrClient)
-    with pytest.raises(APIError):
-        client.stream_audio("sk", [b"\x00"], params=_stream_params())
-
-
-def test_stream_audio_forwards_termination(monkeypatch):
-    class TermClient(_FakeStreamingClient):
-        def stream(self, source):
-            from assemblyai.streaming.v3 import StreamingEvents
-
-            self.handlers[StreamingEvents.Termination](
-                self, _types.SimpleNamespace(audio_duration_seconds=3.0)
-            )
-
-    monkeypatch.setattr(client, "StreamingClient", TermClient)
-    seen = []
-    client.stream_audio(
-        "sk",
-        [b"\x00"],
-        params=_stream_params(),
-        on_termination=lambda e: seen.append(e.audio_duration_seconds),
-    )
-    assert seen == [3.0]
-
-
-def test_stream_audio_connect_error_becomes_apierror(monkeypatch):
-    class ConnectFails(_FakeStreamingClient):
-        def connect(self, params):
-            raise RuntimeError("handshake refused")
-
-    monkeypatch.setattr(client, "StreamingClient", ConnectFails)
-    with pytest.raises(APIError):
-        client.stream_audio("sk", [b"\x00"], params=_stream_params())
-
-
-def test_stream_audio_connect_auth_error_becomes_not_authenticated(monkeypatch):
-    from aai_cli.errors import NotAuthenticated
-
-    class ConnectUnauthorized(_FakeStreamingClient):
-        def connect(self, params):
-            raise RuntimeError("401 Unauthorized: bad token")
-
-    monkeypatch.setattr(client, "StreamingClient", ConnectUnauthorized)
-    with pytest.raises(NotAuthenticated):
-        client.stream_audio("sk_bad", [b"\x00"], params=_stream_params())
-
-
-def test_stream_audio_auth_error_event_becomes_not_authenticated(monkeypatch):
-    from aai_cli.errors import NotAuthenticated
-
-    class AuthErrClient(_FakeStreamingClient):
-        def stream(self, source):
-            from assemblyai.streaming.v3 import StreamingEvents
-
-            self.handlers[StreamingEvents.Error](self, "Unauthorized: invalid api key")
-
-    monkeypatch.setattr(client, "StreamingClient", AuthErrClient)
-    with pytest.raises(NotAuthenticated):
-        client.stream_audio("sk_bad", [b"\x00"], params=_stream_params())
-
-
-def test_stream_audio_mid_stream_error_becomes_apierror(monkeypatch):
-    class StreamFails(_FakeStreamingClient):
-        def stream(self, source):
-            raise RuntimeError("socket dropped")
-
-    monkeypatch.setattr(client, "StreamingClient", StreamFails)
-    with pytest.raises(APIError):
-        client.stream_audio("sk", [b"\x00"], params=_stream_params())
-    last = StreamFails.last
-    assert last is not None
-    assert last.disconnected  # still disconnected in finally
-
-
-def test_stream_audio_swallows_broken_pipe_in_callback(monkeypatch):
-    # A closed downstream pipe makes a turn write raise BrokenPipeError on the SDK's
-    # reader thread; the guard must swallow it instead of dumping a thread traceback.
-    monkeypatch.setattr(client, "StreamingClient", _FakeStreamingClient)
-    # never touch the real stdout fd during the test
-    monkeypatch.setattr("aai_cli.stdio.silence_stdout", lambda: None)
-
-    def on_turn(_event):
-        raise BrokenPipeError
-
-    client.stream_audio("sk", [b"\x00"], params=_stream_params(), on_turn=on_turn)  # no raise
-
-
-def test_stream_audio_passes_through_clierror(monkeypatch):
-    from aai_cli.errors import CLIError
-
-    class StreamRaisesCLIError(_FakeStreamingClient):
-        def stream(self, source):
-            raise CLIError("boom", error_type="x", exit_code=2)
-
-    monkeypatch.setattr(client, "StreamingClient", StreamRaisesCLIError)
-    with pytest.raises(CLIError) as exc:
-        client.stream_audio("sk", [b"\x00"], params=_stream_params())
-    assert exc.value.exit_code == 2  # not rewrapped into APIError
-
-
 def test_transcribe_passes_prebuilt_config(monkeypatch, mocker):
     import assemblyai as aai
 
@@ -482,60 +298,3 @@ def transcribe(self, audio, config=None):
     client.transcribe("sk", "audio.mp3", config=cfg)
     assert captured["audio"] == "audio.mp3"
     assert captured["config"] is cfg
-
-
-def test_stream_audio_accepts_params(monkeypatch):
-    from assemblyai.streaming.v3 import SpeechModel, StreamingParameters
-
-    from aai_cli import client
-
-    captured = {}
-
-    class FakeSC:
-        def __init__(self, *a, **k):
-            pass
-
-        def on(self, *a, **k):
-            pass
-
-        def connect(self, params):
-            captured["params"] = params
-
-        def stream(self, source):
-            pass
-
-        def disconnect(self, terminate=True):
-            pass
-
-    monkeypatch.setattr("aai_cli.client.StreamingClient", FakeSC)
-    params = StreamingParameters(
-        sample_rate=16000, speech_model=SpeechModel.universal_streaming_multilingual
-    )
-    client.stream_audio("sk", iter([b""]), params=params)
-    assert captured["params"] is params
-
-
-def test_stream_audio_flushes_termination_on_disconnect(monkeypatch):
-    class DeferredTermClient(_FakeStreamingClient):
-        def stream(self, source):
-            pass  # nothing dispatched during stream; the server flushes on terminate
-
-        def disconnect(self, terminate=False):
-            self.disconnected = True
-            self.terminate = terminate
-            if terminate:
-                from assemblyai.streaming.v3 import StreamingEvents
-
-                self.handlers[StreamingEvents.Termination](
-                    self, _types.SimpleNamespace(audio_duration_seconds=5.0)
-                )
-
-    monkeypatch.setattr(client, "StreamingClient", DeferredTermClient)
-    seen = []
-    client.stream_audio(
-        "sk",
-        [b"\x00"],
-        params=_stream_params(),
-        on_termination=lambda e: seen.append(e.audio_duration_seconds),
-    )
-    assert seen == [5.0]
diff --git a/tests/test_client_streaming.py b/tests/test_client_streaming.py
new file mode 100644
index 00000000..fbee8635
--- /dev/null
+++ b/tests/test_client_streaming.py
@@ -0,0 +1,256 @@
+"""client.stream_audio tests: handler wiring, error classification, teardown.
+
+Non-streaming client wrapper tests (transcribe, list_transcripts, validate_key,
+select_transcript_field, get_transcript) live in test_client.py.
+"""
+
+import types as _types
+
+import pytest
+
+from aai_cli import client
+from aai_cli.errors import APIError
+
+
+def _stream_params(sample_rate: int = 16000):
+    from assemblyai.streaming.v3 import SpeechModel, StreamingParameters
+
+    return StreamingParameters(
+        sample_rate=sample_rate,
+        format_turns=True,
+        speech_model=SpeechModel.universal_streaming_multilingual,
+    )
+
+
+class _FakeStreamingClient:
+    last: "_FakeStreamingClient | None" = None
+
+    def __init__(self, options):
+        self.handlers = {}
+        self.connected = False
+        self.disconnected = False
+        _FakeStreamingClient.last = self
+
+    def on(self, event, handler):
+        self.handlers[event] = handler
+
+    def connect(self, params):
+        self.connected = True
+        self.params = params
+
+    def stream(self, source):
+        from assemblyai.streaming.v3 import StreamingEvents
+
+        self.handlers[StreamingEvents.Turn](
+            self, _types.SimpleNamespace(transcript="hi", end_of_turn=True)
+        )
+
+    def disconnect(self, terminate=False):
+        self.disconnected = True
+        self.terminate = terminate
+
+
+def test_stream_audio_wires_handlers_and_streams(monkeypatch):
+    monkeypatch.setattr(client, "StreamingClient", _FakeStreamingClient)
+    turns = []
+    begins = []
+    client.stream_audio(
+        "sk",
+        [b"\x00"],
+        params=_stream_params(),
+        on_begin=lambda e: begins.append(e),
+        on_turn=lambda e: turns.append(e.transcript),
+    )
+    assert turns == ["hi"]
+    assert begins == []
+    last = _FakeStreamingClient.last
+    assert last is not None
+    assert last.connected
+    assert last.disconnected  # disconnected in finally
+    assert last.params.sample_rate == 16000
+    assert last.params.format_turns is True
+    assert last.terminate is True  # graceful flush requested
+
+
+def test_stream_audio_registers_begin_handler_when_provided(monkeypatch):
+    # A provided on_begin must actually be wired to the Begin event (pins
+    # `if on_begin is not None`); inverting it would leave Begin unhandled.
+    class BeginClient(_FakeStreamingClient):
+        def stream(self, source):
+            from assemblyai.streaming.v3 import StreamingEvents
+
+            self.handlers[StreamingEvents.Begin](self, _types.SimpleNamespace(id="sess_1"))
+
+    monkeypatch.setattr(client, "StreamingClient", BeginClient)
+    begins = []
+    client.stream_audio(
+        "sk",
+        [b"\x00"],
+        params=_stream_params(),
+        on_begin=lambda e: begins.append(e.id),
+    )
+    assert begins == ["sess_1"]
+
+
+def test_stream_audio_raises_on_error_event(monkeypatch):
+    class ErrClient(_FakeStreamingClient):
+        def stream(self, source):
+            from assemblyai.streaming.v3 import StreamingEvents
+
+            self.handlers[StreamingEvents.Error](self, "boom")
+
+    monkeypatch.setattr(client, "StreamingClient", ErrClient)
+    with pytest.raises(APIError):
+        client.stream_audio("sk", [b"\x00"], params=_stream_params())
+
+
+def test_stream_audio_forwards_termination(monkeypatch):
+    class TermClient(_FakeStreamingClient):
+        def stream(self, source):
+            from assemblyai.streaming.v3 import StreamingEvents
+
+            self.handlers[StreamingEvents.Termination](
+                self, _types.SimpleNamespace(audio_duration_seconds=3.0)
+            )
+
+    monkeypatch.setattr(client, "StreamingClient", TermClient)
+    seen = []
+    client.stream_audio(
+        "sk",
+        [b"\x00"],
+        params=_stream_params(),
+        on_termination=lambda e: seen.append(e.audio_duration_seconds),
+    )
+    assert seen == [3.0]
+
+
+def test_stream_audio_connect_error_becomes_apierror(monkeypatch):
+    class ConnectFails(_FakeStreamingClient):
+        def connect(self, params):
+            raise RuntimeError("handshake refused")
+
+    monkeypatch.setattr(client, "StreamingClient", ConnectFails)
+    with pytest.raises(APIError):
+        client.stream_audio("sk", [b"\x00"], params=_stream_params())
+
+
+def test_stream_audio_connect_auth_error_becomes_not_authenticated(monkeypatch):
+    from aai_cli.errors import NotAuthenticated
+
+    class ConnectUnauthorized(_FakeStreamingClient):
+        def connect(self, params):
+            raise RuntimeError("401 Unauthorized: bad token")
+
+    monkeypatch.setattr(client, "StreamingClient", ConnectUnauthorized)
+    with pytest.raises(NotAuthenticated):
+        client.stream_audio("sk_bad", [b"\x00"], params=_stream_params())
+
+
+def test_stream_audio_auth_error_event_becomes_not_authenticated(monkeypatch):
+    from aai_cli.errors import NotAuthenticated
+
+    class AuthErrClient(_FakeStreamingClient):
+        def stream(self, source):
+            from assemblyai.streaming.v3 import StreamingEvents
+
+            self.handlers[StreamingEvents.Error](self, "Unauthorized: invalid api key")
+
+    monkeypatch.setattr(client, "StreamingClient", AuthErrClient)
+    with pytest.raises(NotAuthenticated):
+        client.stream_audio("sk_bad", [b"\x00"], params=_stream_params())
+
+
+def test_stream_audio_mid_stream_error_becomes_apierror(monkeypatch):
+    class StreamFails(_FakeStreamingClient):
+        def stream(self, source):
+            raise RuntimeError("socket dropped")
+
+    monkeypatch.setattr(client, "StreamingClient", StreamFails)
+    with pytest.raises(APIError):
+        client.stream_audio("sk", [b"\x00"], params=_stream_params())
+    last = StreamFails.last
+    assert last is not None
+    assert last.disconnected  # still disconnected in finally
+
+
+def test_stream_audio_swallows_broken_pipe_in_callback(monkeypatch):
+    # A closed downstream pipe makes a turn write raise BrokenPipeError on the SDK's
+    # reader thread; the guard must swallow it instead of dumping a thread traceback.
+    monkeypatch.setattr(client, "StreamingClient", _FakeStreamingClient)
+    # never touch the real stdout fd during the test
+    monkeypatch.setattr("aai_cli.stdio.silence_stdout", lambda: None)
+
+    def on_turn(_event):
+        raise BrokenPipeError
+
+    client.stream_audio("sk", [b"\x00"], params=_stream_params(), on_turn=on_turn)  # no raise
+
+
+def test_stream_audio_passes_through_clierror(monkeypatch):
+    from aai_cli.errors import CLIError
+
+    class StreamRaisesCLIError(_FakeStreamingClient):
+        def stream(self, source):
+            raise CLIError("boom", error_type="x", exit_code=2)
+
+    monkeypatch.setattr(client, "StreamingClient", StreamRaisesCLIError)
+    with pytest.raises(CLIError) as exc:
+        client.stream_audio("sk", [b"\x00"], params=_stream_params())
+    assert exc.value.exit_code == 2  # not rewrapped into APIError
+
+
+def test_stream_audio_accepts_params(monkeypatch):
+    from assemblyai.streaming.v3 import SpeechModel, StreamingParameters
+
+    from aai_cli import client
+
+    captured = {}
+
+    class FakeSC:
+        def __init__(self, *a, **k):
+            pass
+
+        def on(self, *a, **k):
+            pass
+
+        def connect(self, params):
+            captured["params"] = params
+
+        def stream(self, source):
+            pass
+
+        def disconnect(self, terminate=True):
+            pass
+
+    monkeypatch.setattr("aai_cli.client.StreamingClient", FakeSC)
+    params = StreamingParameters(
+        sample_rate=16000, speech_model=SpeechModel.universal_streaming_multilingual
+    )
+    client.stream_audio("sk", iter([b""]), params=params)
+    assert captured["params"] is params
+
+
+def test_stream_audio_flushes_termination_on_disconnect(monkeypatch):
+    class DeferredTermClient(_FakeStreamingClient):
+        def stream(self, source):
+            pass  # nothing dispatched during stream; the server flushes on terminate
+
+        def disconnect(self, terminate=False):
+            self.disconnected = True
+            self.terminate = terminate
+            if terminate:
+                from assemblyai.streaming.v3 import StreamingEvents
+
+                self.handlers[StreamingEvents.Termination](
+                    self, _types.SimpleNamespace(audio_duration_seconds=5.0)
+                )
+
+    monkeypatch.setattr(client, "StreamingClient", DeferredTermClient)
+    seen = []
+    client.stream_audio(
+        "sk",
+        [b"\x00"],
+        params=_stream_params(),
+        on_termination=lambda e: seen.append(e.audio_duration_seconds),
+    )
+    assert seen == [5.0]
diff --git a/tests/test_code_gen.py b/tests/test_code_gen.py
index 94b0a229..dc4b69bd 100644
--- a/tests/test_code_gen.py
+++ b/tests/test_code_gen.py
@@ -1,17 +1,17 @@
+"""Example-based code_gen tests: serializers, snippets, rendered scripts.
+
+Hypothesis fuzz/property tests live in test_code_gen_fuzz.py.
+"""
+
 from __future__ import annotations
 
 from typing import ClassVar
 
 import pytest
-from hypothesis import given, settings
-from hypothesis import strategies as st
 
 from aai_cli.code_gen import serialize
 from aai_cli.code_gen.transcribe import render as render_transcribe_code
 
-settings.register_profile("codegen", max_examples=150)
-settings.load_profile("codegen")
-
 
 def test_py_literal_basic_types():
     assert serialize.py_literal("en_us") == "'en_us'"
@@ -40,71 +40,6 @@ def test_config_kwarg_lines_empty_dict():
     assert serialize.config_kwarg_lines({}, indent=4) == []
 
 
-# ---------------------------------------------------------------------------
-# Shared, domain-driven strategy: build merged-kwargs dicts from the AUTHORITATIVE
-# field tables in config_builder. Used by every validity test below. Because the
-# field list comes from the coerce tables, any field added later is fuzzed for free.
-# ---------------------------------------------------------------------------
-from assemblyai.streaming.v3 import SpeechModel  # noqa: E402
-
-from aai_cli import config_builder  # noqa: E402
-
-# JSON-ish values that repr()->eval() round-trips (string keys, no NaN/inf).
-_json = st.recursive(
-    st.none()
-    | st.booleans()
-    | st.integers()
-    | st.floats(allow_nan=False, allow_infinity=False)
-    | st.text(st.characters(blacklist_categories=["Cs"]), max_size=8),
-    lambda children: (
-        st.lists(children, max_size=3)
-        | st.dictionaries(
-            st.text(st.characters(min_codepoint=97, max_codepoint=122), min_size=1, max_size=5),
-            children,
-            max_size=3,
-        )
-    ),
-    max_leaves=5,
-)
-
-_BY_KIND = {
-    "str": st.text(st.characters(blacklist_categories=["Cs"]), max_size=16),
-    "bool": st.booleans(),
-    "int": st.integers(),
-    "float": st.floats(allow_nan=False, allow_infinity=False),
-    "list": st.lists(st.text(st.characters(blacklist_categories=["Cs"]), max_size=8), max_size=4),
-    "json": _json,
-}
-
-
-def _value_for(field: str, kind: str):
-    # speech_model in the streaming table may be a SpeechModel enum in real merged dicts.
-    if field == "speech_model":
-        return st.sampled_from(list(SpeechModel)) | _BY_KIND["str"]
-    return _BY_KIND[kind]
-
-
-def merged_strategy(coerce_table: dict[str, str]) -> st.SearchStrategy:
-    """A hypothesis strategy yielding merged-kwargs dicts over the FULL field table."""
-    return st.fixed_dictionaries(
-        {}, optional={f: _value_for(f, kind) for f, kind in coerce_table.items()}
-    )
-
-
-@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
-def test_serializer_round_trips_full_transcribe_domain(merged):
-    lines = serialize.config_kwarg_lines(merged, indent=0)
-    src = "dict(\n" + "\n".join(lines) + "\n)"
-    assert eval(src, {"SpeechModel": SpeechModel}) == merged  # noqa: S307
-
-
-@given(merged_strategy(config_builder.STREAM_COERCE))
-def test_serializer_round_trips_full_stream_domain(merged):
-    lines = serialize.config_kwarg_lines(merged, indent=0)
-    src = "dict(\n" + "\n".join(lines) + "\n)"
-    assert eval(src, {"SpeechModel": SpeechModel}) == merged  # noqa: S307
-
-
 from aai_cli.code_gen import snippets  # noqa: E402
 
 
@@ -217,7 +152,8 @@ def test_agent_render_escapes_quotes_in_prompt():
 
 
 # ---------------------------------------------------------------------------
-# Exhaustive validity & fidelity harness (Task 10)
+# Validity & fidelity checks (the exhaustive hypothesis harness — Task 10 —
+# lives in test_code_gen_fuzz.py).
 # ---------------------------------------------------------------------------
 
 
@@ -226,40 +162,6 @@ def _compiles(code: str) -> None:
     compile(code, "<generated>", "exec")
 
 
-@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
-def test_fuzz_transcribe_always_compiles(merged):
-    _compiles(code_gen.transcribe(merged, source="audio.mp3"))
-
-
-@given(merged_strategy(config_builder.STREAM_COERCE))
-def test_fuzz_stream_always_compiles(merged):
-    _compiles(code_gen.stream(merged))
-
-
-@given(
-    voice=st.text(st.characters(blacklist_categories=["Cs"]), max_size=20),
-    system_prompt=st.text(st.characters(blacklist_categories=["Cs"]), max_size=200),
-    greeting=st.text(st.characters(blacklist_categories=["Cs"]), max_size=200),
-)
-def test_fuzz_agent_always_compiles(voice, system_prompt, greeting):
-    # Arbitrary text (quotes, newlines, backslashes, unicode) must never break the script.
-    _compiles(code_gen.agent(voice=voice, system_prompt=system_prompt, greeting=greeting))
-
-
-@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
-def test_fuzz_transcribe_config_round_trips_in_generated_code(merged):
-    # The TranscriptionConfig(...) the generated code builds must equal the merged dict.
-    code = code_gen.transcribe(merged, source="audio.mp3")
-    if not merged:
-        assert "TranscriptionConfig(" not in code
-        return
-    # repr() escapes newlines, so no kwarg line contains a literal "\n)"; the first
-    # "\n)" after the constructor opens is always the config block's closer.
-    inner = code.split("aai.TranscriptionConfig(\n", 1)[1].split("\n)", 1)[0]
-    rebuilt = eval("dict(\n" + inner + "\n)", {"SpeechModel": SpeechModel})  # noqa: S307
-    assert rebuilt == merged
-
-
 class _Stub:
     """A transcript-shaped stub exposing every attribute the snippets read."""
 
@@ -296,12 +198,6 @@ def test_every_snippet_execs_against_a_realistic_transcript() -> None:
     exec(compile(body, "<snippets>", "exec"), {"transcript": _Stub()})  # noqa: S102
 
 
-@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
-def test_fuzz_result_handling_always_execs(merged):
-    body = snippets.result_handling(merged)
-    exec(compile(body, "<snippets>", "exec"), {"transcript": _Stub(), "getattr": getattr})  # noqa: S102
-
-
 @pytest.mark.parametrize(
     ("field", "fragment"),
     [
@@ -355,14 +251,6 @@ def test_transcribe_render_unknown_output_falls_back_to_text():
     assert "print(transcript.text)" in code
 
 
-@given(
-    merged=merged_strategy(config_builder.TRANSCRIBE_COERCE),
-    field=st.sampled_from(["text", "id", "status", "utterances", "srt", "json"]),
-)
-def test_fuzz_transcribe_output_fields_always_compile(merged, field):
-    _compiles(render_transcribe_code(merged, "audio.mp3", output=field))
-
-
 def test_transcribe_show_code_includes_llm_gateway_transform():
     code = code_gen.transcribe(
         {"speaker_labels": True},
diff --git a/tests/test_code_gen_fuzz.py b/tests/test_code_gen_fuzz.py
new file mode 100644
index 00000000..f6f1a8e2
--- /dev/null
+++ b/tests/test_code_gen_fuzz.py
@@ -0,0 +1,158 @@
+"""Hypothesis fuzz/property tests for code_gen: validity and round-trip fidelity.
+
+Example-based code_gen tests live in test_code_gen.py.
+"""
+
+from __future__ import annotations
+
+from typing import ClassVar
+
+from assemblyai.streaming.v3 import SpeechModel
+from hypothesis import given, settings
+from hypothesis import strategies as st
+
+from aai_cli import code_gen, config_builder
+from aai_cli.code_gen import serialize, snippets
+from aai_cli.code_gen.transcribe import render as render_transcribe_code
+
+settings.register_profile("codegen", max_examples=150)
+settings.load_profile("codegen")
+
+# ---------------------------------------------------------------------------
+# Shared, domain-driven strategy: build merged-kwargs dicts from the AUTHORITATIVE
+# field tables in config_builder. Used by every validity test below. Because the
+# field list comes from the coerce tables, any field added later is fuzzed for free.
+# ---------------------------------------------------------------------------
+
+# JSON-ish values that repr()->eval() round-trips (string keys, no NaN/inf).
+_json = st.recursive(
+    st.none()
+    | st.booleans()
+    | st.integers()
+    | st.floats(allow_nan=False, allow_infinity=False)
+    | st.text(st.characters(blacklist_categories=["Cs"]), max_size=8),
+    lambda children: (
+        st.lists(children, max_size=3)
+        | st.dictionaries(
+            st.text(st.characters(min_codepoint=97, max_codepoint=122), min_size=1, max_size=5),
+            children,
+            max_size=3,
+        )
+    ),
+    max_leaves=5,
+)
+
+_BY_KIND = {
+    "str": st.text(st.characters(blacklist_categories=["Cs"]), max_size=16),
+    "bool": st.booleans(),
+    "int": st.integers(),
+    "float": st.floats(allow_nan=False, allow_infinity=False),
+    "list": st.lists(st.text(st.characters(blacklist_categories=["Cs"]), max_size=8), max_size=4),
+    "json": _json,
+}
+
+
+def _value_for(field: str, kind: str):
+    # speech_model in the streaming table may be a SpeechModel enum in real merged dicts.
+    if field == "speech_model":
+        return st.sampled_from(list(SpeechModel)) | _BY_KIND["str"]
+    return _BY_KIND[kind]
+
+
+def merged_strategy(coerce_table: dict[str, str]) -> st.SearchStrategy:
+    """A hypothesis strategy yielding merged-kwargs dicts over the FULL field table."""
+    return st.fixed_dictionaries(
+        {}, optional={f: _value_for(f, kind) for f, kind in coerce_table.items()}
+    )
+
+
+@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
+def test_serializer_round_trips_full_transcribe_domain(merged):
+    lines = serialize.config_kwarg_lines(merged, indent=0)
+    src = "dict(\n" + "\n".join(lines) + "\n)"
+    assert eval(src, {"SpeechModel": SpeechModel}) == merged  # noqa: S307
+
+
+@given(merged_strategy(config_builder.STREAM_COERCE))
+def test_serializer_round_trips_full_stream_domain(merged):
+    lines = serialize.config_kwarg_lines(merged, indent=0)
+    src = "dict(\n" + "\n".join(lines) + "\n)"
+    assert eval(src, {"SpeechModel": SpeechModel}) == merged  # noqa: S307
+
+
+# ---------------------------------------------------------------------------
+# Exhaustive validity & fidelity harness (Task 10)
+# ---------------------------------------------------------------------------
+
+
+def _compiles(code: str) -> None:
+    # compile() is stricter than ast.parse() and is what `python file.py` runs through.
+    compile(code, "<generated>", "exec")
+
+
+@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
+def test_fuzz_transcribe_always_compiles(merged):
+    _compiles(code_gen.transcribe(merged, source="audio.mp3"))
+
+
+@given(merged_strategy(config_builder.STREAM_COERCE))
+def test_fuzz_stream_always_compiles(merged):
+    _compiles(code_gen.stream(merged))
+
+
+@given(
+    voice=st.text(st.characters(blacklist_categories=["Cs"]), max_size=20),
+    system_prompt=st.text(st.characters(blacklist_categories=["Cs"]), max_size=200),
+    greeting=st.text(st.characters(blacklist_categories=["Cs"]), max_size=200),
+)
+def test_fuzz_agent_always_compiles(voice, system_prompt, greeting):
+    # Arbitrary text (quotes, newlines, backslashes, unicode) must never break the script.
+    _compiles(code_gen.agent(voice=voice, system_prompt=system_prompt, greeting=greeting))
+
+
+@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
+def test_fuzz_transcribe_config_round_trips_in_generated_code(merged):
+    # The TranscriptionConfig(...) the generated code builds must equal the merged dict.
+    code = code_gen.transcribe(merged, source="audio.mp3")
+    if not merged:
+        assert "TranscriptionConfig(" not in code
+        return
+    # repr() escapes newlines, so no kwarg line contains a literal "\n)"; the first
+    # "\n)" after the constructor opens is always the config block's closer.
+    inner = code.split("aai.TranscriptionConfig(\n", 1)[1].split("\n)", 1)[0]
+    rebuilt = eval("dict(\n" + inner + "\n)", {"SpeechModel": SpeechModel})  # noqa: S307
+    assert rebuilt == merged
+
+
+class _Stub:
+    """A transcript-shaped stub exposing every attribute the snippets read."""
+
+    text: ClassVar[str] = "hello world"
+    utterances: ClassVar[list[object]] = [type("U", (), {"speaker": "A", "text": "hi"})()]
+    summary: ClassVar[str] = "a summary"
+    chapters: ClassVar[list[object]] = [type("C", (), {"headline": "intro"})()]
+    auto_highlights: ClassVar[object] = type(
+        "H", (), {"results": [type("R", (), {"count": 2, "text": "k"})()]}
+    )()
+    sentiment_analysis: ClassVar[list[object]] = [
+        type("S", (), {"sentiment": "POSITIVE", "text": "good"})()
+    ]
+    entities: ClassVar[list[object]] = [
+        type("E", (), {"entity_type": "person_name", "text": "Ada"})()
+    ]
+    iab_categories: ClassVar[object] = type("I", (), {"summary": {"Tech": 0.9}})()
+    content_safety: ClassVar[object] = type("CS", (), {"summary": {"profanity": 0.1}})()
+
+
+@given(merged_strategy(config_builder.TRANSCRIBE_COERCE))
+def test_fuzz_result_handling_always_execs(merged):
+    body = snippets.result_handling(merged)
+    exec(compile(body, "<snippets>", "exec"), {"transcript": _Stub(), "getattr": getattr})  # noqa: S102
+
+
+@given(
+    merged=merged_strategy(config_builder.TRANSCRIBE_COERCE),
+    field=st.sampled_from(["text", "id", "status", "utterances", "srt", "json"]),
+)
+def test_fuzz_transcribe_output_fields_always_compile(merged, field):
+    _compiles(render_transcribe_code(merged, "audio.mp3", output=field))
diff --git a/tests/test_stream_command.py b/tests/test_stream_command.py
index 142e0a9f..1a51c9fa 100644
--- a/tests/test_stream_command.py
+++ b/tests/test_stream_command.py
@@ -1,3 +1,9 @@
+"""`aai stream` source/streaming behavior and --show-code tests.
+
+Flag-to-params mapping and conflicting-flag validation live in
+test_stream_command_flags.py.
+"""
+
 import json
 import time
 import types
@@ -172,12 +178,6 @@ def test_stream_url_source_uses_filesource(monkeypatch):
     assert seen["source"].source == "https://example.com/clip.mp3"
 
 
-def test_stream_sample_with_sample_rate_rejected():
-    config.set_api_key("default", "sk_live")
-    result = runner.invoke(app, ["stream", "--sample", "--sample-rate", "44100"])
-    assert result.exit_code == 2  # mic-only flags don't apply to a file/sample source
-
-
 def test_stream_ctrl_c_exits_cleanly(monkeypatch):
     config.set_api_key("default", "sk_live")
 
@@ -202,20 +202,6 @@ def raise_kbd(*a, **k):
     assert "Stopped." in result.output
 
 
-def test_stream_file_with_sample_rate_flag_rejected(tmp_path):
-    config.set_api_key("default", "sk_live")
-    import wave
-
-    p = tmp_path / "a.wav"
-    with wave.open(str(p), "wb") as w:
-        w.setnchannels(1)
-        w.setsampwidth(2)
-        w.setframerate(16000)
-        w.writeframes(b"\x00\x01" * 100)
-    result = runner.invoke(app, ["stream", str(p), "--sample-rate", "44100"])
-    assert result.exit_code == 2
-
-
 def test_stream_broken_pipe_exits_zero(monkeypatch):
     config.set_api_key("default", "sk_live")
 
@@ -298,83 +284,6 @@ def fake_stream(api_key, source, *, params, **kwargs):
     assert seen["src"] == str(fake)
 
 
-def test_stream_maps_turn_detection_flags(monkeypatch):
-    config.set_api_key("default", "sk_live")
-    captured = {}
-
-    def fake_stream_audio(api_key, source, *, params, **kw):
-        captured["params"] = params
-
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
-
-    runner.invoke(
-        app,
-        [
-            "stream",
-            "--sample",
-            "--max-turn-silence",
-            "400",
-            "--filter-profanity",
-            "--speaker-labels",
-        ],
-    )
-    params = captured["params"]
-    assert params.max_turn_silence == 400
-    assert params.filter_profanity is True
-    assert params.speaker_labels is True
-
-
-def test_stream_config_escape_hatch(monkeypatch):
-    config.set_api_key("default", "sk_live")
-    captured = {}
-    monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
-        lambda api_key, source, *, params, **kw: captured.update(params=params),
-    )
-
-    runner.invoke(app, ["stream", "--sample", "--config", "vad_threshold=0.7"])
-    assert captured["params"].vad_threshold == 0.7
-
-
-def test_stream_maps_webhook_auth_header(monkeypatch):
-    config.set_api_key("default", "sk_live")
-    captured = {}
-    monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
-        lambda api_key, source, *, params, **kw: captured.update(params=params),
-    )
-
-    runner.invoke(
-        app,
-        [
-            "stream",
-            "--sample",
-            "--webhook-url",
-            "https://example.com/hook",
-            "--webhook-auth-header",
-            "Authorization:Bearer xyz",
-        ],
-    )
-    params = captured["params"]
-    assert params.webhook_auth_header_name == "Authorization"
-    assert params.webhook_auth_header_value == "Bearer xyz"
-
-
-def test_stream_format_turns_tristate(monkeypatch):
-    config.set_api_key("default", "sk_live")
-    captured = {}
-    monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
-        lambda api_key, source, *, params, **kw: captured.update(params=params),
-    )
-
-    runner.invoke(app, ["stream", "--sample"])
-    assert captured["params"].format_turns is True  # unset defaults to True
-
-    runner.invoke(app, ["stream", "--sample", "--no-format-turns"])
-    assert captured["params"].format_turns is False
-
-
 def test_stream_show_code_prints_without_streaming(monkeypatch):
     # Print-only: emits the mic-streaming script, never opens audio or streams, no auth.
     called = []
@@ -456,38 +365,6 @@ def test_stream_show_code_rejects_youtube_sources():
     assert "YouTube" in result.output
 
 
-def test_stream_json_with_text_output_is_usage_error():
-    # Contradictory output shapes (--json + -o text) are rejected up front, before
-    # credentials, like the --llm + -o text precedent.
-    result = runner.invoke(app, ["stream", "--json", "-o", "text"])
-    assert result.exit_code == 2
-    assert "can't be combined with -o text" in result.output
-
-
-def test_stream_stdin_with_sample_rejected():
-    config.set_api_key("default", "sk_live")
-    result = runner.invoke(app, ["stream", "-", "--sample"], input=b"\x00\x00")
-    assert result.exit_code == 2
-    assert "--sample" in result.output
-
-
-def test_stream_file_source_with_sample_rejected(monkeypatch, tmp_path):
-    # A real source plus --sample is a conflict (the file would silently lose),
-    # surfaced by resolve_audio_source as a usage error before any streaming.
-    config.set_api_key("default", "sk_live")
-
-    def _boom(*a, **k):
-        raise AssertionError("must not stream a conflicting source")
-
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom)
-    wav = tmp_path / "a.wav"
-    wav.write_bytes(b"RIFF")
-    result = runner.invoke(app, ["stream", str(wav), "--sample"])
-    assert result.exit_code == 2
-    assert "--sample" in result.output
-    assert "cannot be combined" in result.output
-
-
 def test_stream_show_code_ignores_json_flag(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not stream")
@@ -516,12 +393,6 @@ def fake_stream_audio(api_key, source, *, params, on_begin=None, **_kwargs):
     assert seen["audio"] == b"\x01\x02" * 100
 
 
-def test_stream_stdin_rejects_device(monkeypatch):
-    config.set_api_key("default", "sk_live")
-    result = runner.invoke(app, ["stream", "-", "--device", "2"], input=b"\x00\x00")
-    assert result.exit_code == 2  # --device applies only to the microphone
-
-
 def test_stream_system_audio_parallel_worker_error_surfaces(monkeypatch):
     config.set_api_key("default", "sk_live")
 
diff --git a/tests/test_stream_command_flags.py b/tests/test_stream_command_flags.py
new file mode 100644
index 00000000..0e6b171e
--- /dev/null
+++ b/tests/test_stream_command_flags.py
@@ -0,0 +1,146 @@
+"""`aai stream` flag handling: flag-to-params mapping and conflicting-flag validation.
+
+Source/streaming behavior and --show-code tests live in test_stream_command.py.
+"""
+
+from typer.testing import CliRunner
+
+from aai_cli import config
+from aai_cli.main import app
+
+runner = CliRunner()
+
+
+def test_stream_maps_turn_detection_flags(monkeypatch):
+    config.set_api_key("default", "sk_live")
+    captured = {}
+
+    def fake_stream_audio(api_key, source, *, params, **kw):
+        captured["params"] = params
+
+    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+
+    runner.invoke(
+        app,
+        [
+            "stream",
+            "--sample",
+            "--max-turn-silence",
+            "400",
+            "--filter-profanity",
+            "--speaker-labels",
+        ],
+    )
+    params = captured["params"]
+    assert params.max_turn_silence == 400
+    assert params.filter_profanity is True
+    assert params.speaker_labels is True
+
+
+def test_stream_config_escape_hatch(monkeypatch):
+    config.set_api_key("default", "sk_live")
+    captured = {}
+    monkeypatch.setattr(
+        "aai_cli.commands.stream.client.stream_audio",
+        lambda api_key, source, *, params, **kw: captured.update(params=params),
+    )
+
+    runner.invoke(app, ["stream", "--sample", "--config", "vad_threshold=0.7"])
+    assert captured["params"].vad_threshold == 0.7
+
+
+def test_stream_maps_webhook_auth_header(monkeypatch):
+    config.set_api_key("default", "sk_live")
+    captured = {}
+    monkeypatch.setattr(
+        "aai_cli.commands.stream.client.stream_audio",
+        lambda api_key, source, *, params, **kw: captured.update(params=params),
+    )
+
+    runner.invoke(
+        app,
+        [
+            "stream",
+            "--sample",
+            "--webhook-url",
+            "https://example.com/hook",
+            "--webhook-auth-header",
+            "Authorization:Bearer xyz",
+        ],
+    )
+    params = captured["params"]
+    assert params.webhook_auth_header_name == "Authorization"
+    assert params.webhook_auth_header_value == "Bearer xyz"
+
+
+def test_stream_format_turns_tristate(monkeypatch):
+    config.set_api_key("default", "sk_live")
+    captured = {}
+    monkeypatch.setattr(
+        "aai_cli.commands.stream.client.stream_audio",
+        lambda api_key, source, *, params, **kw: captured.update(params=params),
+    )
+
+    runner.invoke(app, ["stream", "--sample"])
+    assert captured["params"].format_turns is True  # unset defaults to True
+
+    runner.invoke(app, ["stream", "--sample", "--no-format-turns"])
+    assert captured["params"].format_turns is False
+
+
+def test_stream_sample_with_sample_rate_rejected():
+    config.set_api_key("default", "sk_live")
+    result = runner.invoke(app, ["stream", "--sample", "--sample-rate", "44100"])
+    assert result.exit_code == 2  # mic-only flags don't apply to a file/sample source
+
+
+def test_stream_file_with_sample_rate_flag_rejected(tmp_path):
+    config.set_api_key("default", "sk_live")
+    import wave
+
+    p = tmp_path / "a.wav"
+    with wave.open(str(p), "wb") as w:
+        w.setnchannels(1)
+        w.setsampwidth(2)
+        w.setframerate(16000)
+        w.writeframes(b"\x00\x01" * 100)
+    result = runner.invoke(app, ["stream", str(p), "--sample-rate", "44100"])
+    assert result.exit_code == 2
+
+
+def test_stream_json_with_text_output_is_usage_error():
+    # Contradictory output shapes (--json + -o text) are rejected up front, before
+    # credentials, like the --llm + -o text precedent.
+    result = runner.invoke(app, ["stream", "--json", "-o", "text"])
+    assert result.exit_code == 2
+    assert "can't be combined with -o text" in result.output
+
+
+def test_stream_stdin_with_sample_rejected():
+    config.set_api_key("default", "sk_live")
+    result = runner.invoke(app, ["stream", "-", "--sample"], input=b"\x00\x00")
+    assert result.exit_code == 2
+    assert "--sample" in result.output
+
+
+def test_stream_file_source_with_sample_rejected(monkeypatch, tmp_path):
+    # A real source plus --sample is a conflict (the file would silently lose),
+    # surfaced by resolve_audio_source as a usage error before any streaming.
+    config.set_api_key("default", "sk_live")
+
+    def _boom(*a, **k):
+        raise AssertionError("must not stream a conflicting source")
+
+    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom)
+    wav = tmp_path / "a.wav"
+    wav.write_bytes(b"RIFF")
+    result = runner.invoke(app, ["stream", str(wav), "--sample"])
+    assert result.exit_code == 2
+    assert "--sample" in result.output
+    assert "cannot be combined" in result.output
+
+
+def test_stream_stdin_rejects_device(monkeypatch):
+    config.set_api_key("default", "sk_live")
+    result = runner.invoke(app, ["stream", "-", "--device", "2"], input=b"\x00\x00")
+    assert result.exit_code == 2  # --device applies only to the microphone
diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py
index bdbf6c8f..95314f55 100644
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@@ -1,3 +1,8 @@
+"""`aai transcribe` behavior: output rendering, LLM transforms, sources, --show-code.
+
+Flag-to-config mapping and flag validation live in test_transcribe_flags.py.
+"""
+
 import json
 
 import pytest
@@ -48,10 +53,6 @@ def _fake_transcript(mocker):
     return t
 
 
-def _enum_or_str(value):
-    return getattr(value, "value", value)
-
-
 def test_transcribe_sample_prints_text(mocker):
     _auth()
     tx = mocker.patch(
@@ -66,17 +67,6 @@ def test_transcribe_sample_prints_text(mocker):
     assert audio_arg.endswith("wildfires.mp3")
 
 
-def test_transcribe_passes_speaker_labels(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    runner.invoke(app, ["transcribe", "audio.mp3", "--speaker-labels"])
-    assert tx.call_args.kwargs["config"].speaker_labels is True
-
-
 def test_transcribe_json_output(mocker):
     _auth()
     mocker.patch(
@@ -290,117 +280,6 @@ def test_transcribe_chained_prompts_human_labels_each_step(monkeypatch, mocker):
     assert "out(summarize)" in result.output
 
 
-def test_transcribe_prompt_biases_speech_model(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    result = runner.invoke(app, ["transcribe", "audio.mp3", "--prompt", "expect medical terms"])
-    assert result.exit_code == 0
-    # --prompt is the speech-model prompt, forwarded to the transcription call.
-    assert tx.call_args.kwargs["config"].prompt == "expect medical terms"
-
-
-def test_transcribe_maps_analysis_flags(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    runner.invoke(
-        app,
-        [
-            "transcribe",
-            "audio.mp3",
-            "--summarization",
-            "--summary-type",
-            "bullets",
-            "--sentiment-analysis",
-            "--topic-detection",
-        ],
-    )
-    cfg = tx.call_args.kwargs["config"]
-    assert cfg.raw.summarization is True
-    assert cfg.raw.summary_type == "bullets"
-    assert cfg.raw.sentiment_analysis is True
-    assert cfg.raw.iab_categories is True
-
-
-def test_transcribe_redact_pii_policy_csv(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    runner.invoke(
-        app,
-        [
-            "transcribe",
-            "audio.mp3",
-            "--redact-pii",
-            "--redact-pii-policy",
-            "person_name,phone_number",
-        ],
-    )
-    cfg = tx.call_args.kwargs["config"]
-    assert cfg.raw.redact_pii is True
-    assert [_enum_or_str(p) for p in cfg.raw.redact_pii_policies] == [
-        "person_name",
-        "phone_number",
-    ]
-
-
-def test_transcribe_config_escape_hatch(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    runner.invoke(app, ["transcribe", "audio.mp3", "--config", "speech_threshold=0.5"])
-    assert tx.call_args.kwargs["config"].raw.speech_threshold == 0.5
-
-
-def test_transcribe_unknown_config_field_exits_2(mocker):
-    _auth()
-    mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    result = runner.invoke(app, ["transcribe", "audio.mp3", "--config", "bogus=1"])
-    assert result.exit_code == 2
-    assert "bogus" in result.output
-
-
-def test_transcribe_webhook_auth_header(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    runner.invoke(
-        app,
-        [
-            "transcribe",
-            "audio.mp3",
-            "--webhook-url",
-            "https://example.com/hook",
-            "--webhook-auth-header",
-            "X-Token:secret",
-        ],
-    )
-    cfg = tx.call_args.kwargs["config"]
-    assert cfg.raw.webhook_url == "https://example.com/hook"
-    assert cfg.raw.webhook_auth_header_name == "X-Token"
-    assert cfg.raw.webhook_auth_header_value == "secret"
-
-
 def test_transcribe_youtube_url_downloads_then_transcribes(monkeypatch, mocker, tmp_path):
     _auth()
     fake = tmp_path / "vid.m4a"
@@ -503,96 +382,6 @@ def _boom(*a, **k):
     assert 'print(f"Speaker {utt.speaker}: {utt.text}")' in result.output
 
 
-def test_transcribe_negative_audio_start_exits_2(mocker):
-    _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
-    result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-start", "-100"])
-    assert result.exit_code == 2
-    tx.assert_not_called()
-
-
-def test_transcribe_language_code_with_detection_exits_2(mocker):
-    _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
-    result = runner.invoke(
-        app,
-        ["transcribe", "audio.mp3", "--language-code", "en_us", "--language-detection"],
-    )
-    assert result.exit_code == 2
-    assert "--language-code and --language-detection can't be combined." in result.output
-    tx.assert_not_called()
-
-
-def test_transcribe_language_flags_alone_are_accepted(mocker):
-    # Only the combination is contradictory; each flag works on its own.
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    result = runner.invoke(app, ["transcribe", "audio.mp3", "--language-code", "en_us"])
-    assert result.exit_code == 0
-    assert tx.call_args.kwargs["config"].language_code == "en_us"
-    result = runner.invoke(app, ["transcribe", "audio.mp3", "--language-detection"])
-    assert result.exit_code == 0
-    assert tx.call_args.kwargs["config"].language_detection is True
-
-
-def test_transcribe_speakers_expected_without_labels_exits_2(mocker):
-    _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
-    result = runner.invoke(app, ["transcribe", "audio.mp3", "--speakers-expected", "2"])
-    assert result.exit_code == 2
-    assert "--speakers-expected only applies when diarization is enabled." in result.output
-    assert "Add --speaker-labels." in result.output
-    tx.assert_not_called()
-
-
-def test_transcribe_speakers_expected_with_labels_is_accepted(mocker):
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    result = runner.invoke(
-        app, ["transcribe", "audio.mp3", "--speaker-labels", "--speakers-expected", "2"]
-    )
-    assert result.exit_code == 0
-    assert tx.call_args.kwargs["config"].speakers_expected == 2
-
-
-def test_transcribe_speakers_expected_with_config_speaker_labels_is_accepted(mocker):
-    # Diarization enabled through the --config escape hatch counts too: the check
-    # runs on the merged config, not just the curated flag.
-    _auth()
-    tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
-        autospec=True,
-        return_value=_fake_transcript(mocker),
-    )
-    result = runner.invoke(
-        app,
-        ["transcribe", "audio.mp3", "--config", "speaker_labels=true", "--speakers-expected", "2"],
-    )
-    assert result.exit_code == 0
-    assert tx.call_args.kwargs["config"].speakers_expected == 2
-
-
-def test_transcribe_unknown_pii_policy_exits_2_and_lists_valid(mocker):
-    _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
-    result = runner.invoke(
-        app,
-        ["transcribe", "audio.mp3", "--redact-pii", "--redact-pii-policy", "not_a_policy"],
-    )
-    assert result.exit_code == 2
-    assert "Unknown PII policy(s) ['not_a_policy']" in result.output
-    assert "person_name" in result.output  # the valid values are listed
-    tx.assert_not_called()
-
-
 def test_transcribe_renders_summary_human(monkeypatch, mocker):
     _auth()
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
diff --git a/tests/test_transcribe_flags.py b/tests/test_transcribe_flags.py
new file mode 100644
index 00000000..0420646a
--- /dev/null
+++ b/tests/test_transcribe_flags.py
@@ -0,0 +1,261 @@
+"""`aai transcribe` flag handling: flag-to-config mapping and flag validation.
+
+Output rendering, LLM transforms, sources, and --show-code tests live in
+test_transcribe.py.
+"""
+
+import pytest
+from typer.testing import CliRunner
+
+from aai_cli import config
+from aai_cli.main import app
+
+runner = CliRunner()
+
+
+@pytest.fixture(autouse=True)
+def audio_file(tmp_path, monkeypatch):
+    # The command checks the local path exists before resolving credentials, so the
+    # "audio.mp3" the tests pass must be a real file; run each test in its own cwd.
+    monkeypatch.chdir(tmp_path)
+    (tmp_path / "audio.mp3").write_bytes(b"fake-audio")
+
+
+def _auth():
+    config.set_api_key("default", "sk_live")
+
+
+def _fake_transcript(mocker):
+    t = mocker.MagicMock()
+    t.id = "t_1"
+    t.text = "hello world"
+    t.status = "completed"
+    t.json_response = {"id": "t_1", "text": "hello world", "status": "completed"}
+    for attr in (
+        "summary",
+        "chapters",
+        "auto_highlights",
+        "sentiment_analysis",
+        "entities",
+        "iab_categories",
+        "content_safety",
+    ):
+        setattr(t, attr, None)
+    t.utterances = None
+    return t
+
+
+def _enum_or_str(value):
+    return getattr(value, "value", value)
+
+
+def test_transcribe_passes_speaker_labels(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    runner.invoke(app, ["transcribe", "audio.mp3", "--speaker-labels"])
+    assert tx.call_args.kwargs["config"].speaker_labels is True
+
+
+def test_transcribe_prompt_biases_speech_model(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--prompt", "expect medical terms"])
+    assert result.exit_code == 0
+    # --prompt is the speech-model prompt, forwarded to the transcription call.
+    assert tx.call_args.kwargs["config"].prompt == "expect medical terms"
+
+
+def test_transcribe_maps_analysis_flags(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    runner.invoke(
+        app,
+        [
+            "transcribe",
+            "audio.mp3",
+            "--summarization",
+            "--summary-type",
+            "bullets",
+            "--sentiment-analysis",
+            "--topic-detection",
+        ],
+    )
+    cfg = tx.call_args.kwargs["config"]
+    assert cfg.raw.summarization is True
+    assert cfg.raw.summary_type == "bullets"
+    assert cfg.raw.sentiment_analysis is True
+    assert cfg.raw.iab_categories is True
+
+
+def test_transcribe_redact_pii_policy_csv(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    runner.invoke(
+        app,
+        [
+            "transcribe",
+            "audio.mp3",
+            "--redact-pii",
+            "--redact-pii-policy",
+            "person_name,phone_number",
+        ],
+    )
+    cfg = tx.call_args.kwargs["config"]
+    assert cfg.raw.redact_pii is True
+    assert [_enum_or_str(p) for p in cfg.raw.redact_pii_policies] == [
+        "person_name",
+        "phone_number",
+    ]
+
+
+def test_transcribe_config_escape_hatch(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    runner.invoke(app, ["transcribe", "audio.mp3", "--config", "speech_threshold=0.5"])
+    assert tx.call_args.kwargs["config"].raw.speech_threshold == 0.5
+
+
+def test_transcribe_unknown_config_field_exits_2(mocker):
+    _auth()
+    mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--config", "bogus=1"])
+    assert result.exit_code == 2
+    assert "bogus" in result.output
+
+
+def test_transcribe_webhook_auth_header(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    runner.invoke(
+        app,
+        [
+            "transcribe",
+            "audio.mp3",
+            "--webhook-url",
+            "https://example.com/hook",
+            "--webhook-auth-header",
+            "X-Token:secret",
+        ],
+    )
+    cfg = tx.call_args.kwargs["config"]
+    assert cfg.raw.webhook_url == "https://example.com/hook"
+    assert cfg.raw.webhook_auth_header_name == "X-Token"
+    assert cfg.raw.webhook_auth_header_value == "secret"
+
+
+def test_transcribe_negative_audio_start_exits_2(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-start", "-100"])
+    assert result.exit_code == 2
+    tx.assert_not_called()
+
+
+def test_transcribe_language_code_with_detection_exits_2(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(
+        app,
+        ["transcribe", "audio.mp3", "--language-code", "en_us", "--language-detection"],
+    )
+    assert result.exit_code == 2
+    assert "--language-code and --language-detection can't be combined." in result.output
+    tx.assert_not_called()
+
+
+def test_transcribe_language_flags_alone_are_accepted(mocker):
+    # Only the combination is contradictory; each flag works on its own.
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--language-code", "en_us"])
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].language_code == "en_us"
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--language-detection"])
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].language_detection is True
+
+
+def test_transcribe_speakers_expected_without_labels_exits_2(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(app, ["transcribe", "audio.mp3", "--speakers-expected", "2"])
+    assert result.exit_code == 2
+    assert "--speakers-expected only applies when diarization is enabled." in result.output
+    assert "Add --speaker-labels." in result.output
+    tx.assert_not_called()
+
+
+def test_transcribe_speakers_expected_with_labels_is_accepted(mocker):
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(
+        app, ["transcribe", "audio.mp3", "--speaker-labels", "--speakers-expected", "2"]
+    )
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].speakers_expected == 2
+
+
+def test_transcribe_speakers_expected_with_config_speaker_labels_is_accepted(mocker):
+    # Diarization enabled through the --config escape hatch counts too: the check
+    # runs on the merged config, not just the curated flag.
+    _auth()
+    tx = mocker.patch(
+        "aai_cli.commands.transcribe.client.transcribe",
+        autospec=True,
+        return_value=_fake_transcript(mocker),
+    )
+    result = runner.invoke(
+        app,
+        ["transcribe", "audio.mp3", "--config", "speaker_labels=true", "--speakers-expected", "2"],
+    )
+    assert result.exit_code == 0
+    assert tx.call_args.kwargs["config"].speakers_expected == 2
+
+
+def test_transcribe_unknown_pii_policy_exits_2_and_lists_valid(mocker):
+    _auth()
+    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    result = runner.invoke(
+        app,
+        ["transcribe", "audio.mp3", "--redact-pii", "--redact-pii-policy", "not_a_policy"],
+    )
+    assert result.exit_code == 2
+    assert "Unknown PII policy(s) ['not_a_policy']" in result.output
+    assert "person_name" in result.output  # the valid values are listed
+    tx.assert_not_called()

From c007b0869ab67e7d7cf378eb5f70613ca4e4bf20 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:10:39 +0000
Subject: [PATCH 08/11] Refactor run_command and code_gen.transcribe.render
 under the complexity gate

Extract the auto-login flow from run_command and split render into
header/transcribe/result helpers; behavior unchanged.

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 aai_cli/code_gen/transcribe.py | 60 +++++++++++++++++++---------------
 aai_cli/context.py             | 46 ++++++++++++++++----------
 2 files changed, 62 insertions(+), 44 deletions(-)

diff --git a/aai_cli/code_gen/transcribe.py b/aai_cli/code_gen/transcribe.py
index 5527e8c4..2fa77d44 100644
--- a/aai_cli/code_gen/transcribe.py
+++ b/aai_cli/code_gen/transcribe.py
@@ -40,22 +40,23 @@ def render(
     """
     if output is not None:
         llm_gateway = None  # `-o` returns before the chain runs in the real command
-    if merged:
-        kwargs = "\n".join(serialize.config_kwarg_lines(merged, indent=4))
-        config_block = f"config = aai.TranscriptionConfig(\n{kwargs}\n)"
-        call = f"transcript = transcriber.transcribe({source!r}, config=config)"
-    else:
-        config_block = ""
-        call = f"transcript = transcriber.transcribe({source!r})"
+    parts = (
+        _header_block(llm_gateway, output)
+        + _transcribe_block(merged, source)
+        + _result_block(merged, llm_gateway, output)
+    )
+    parts.append("")
+    return "\n".join(parts)
 
-    imports = ["import assemblyai as aai"]
-    if llm_gateway:
-        imports.append("from openai import OpenAI")
 
+def _header_block(llm_gateway: dict[str, object] | None, output: str | None) -> list[str]:
+    """Imports plus the api-key (and non-default environment) settings lines."""
     stdlib_imports = ["import os"]
     if output == "json":
         stdlib_imports.insert(0, "import json")
-
+    imports = ["import assemblyai as aai"]
+    if llm_gateway:
+        imports.append("from openai import OpenAI")
     parts = [
         *stdlib_imports,
         "",
@@ -69,13 +70,19 @@ def render(
     env = environments.active()
     if env.api_base != environments.get(environments.DEFAULT_ENV).api_base:
         parts.append(f"aai.settings.base_url = {env.api_base!r}")
-    parts += [
-        "",
-        "transcriber = aai.Transcriber()",
-    ]
-    if config_block:
-        parts += ["", config_block]
-    parts += [
+    return parts
+
+
+def _transcribe_block(merged: dict[str, object], source: str) -> list[str]:
+    """The transcriber setup, optional config, the transcribe call, and error check."""
+    parts = ["", "transcriber = aai.Transcriber()"]
+    if merged:
+        kwargs = "\n".join(serialize.config_kwarg_lines(merged, indent=4))
+        parts += ["", f"config = aai.TranscriptionConfig(\n{kwargs}\n)"]
+        call = f"transcript = transcriber.transcribe({source!r}, config=config)"
+    else:
+        call = f"transcript = transcriber.transcribe({source!r})"
+    return parts + [
         "",
         call,
         "",
@@ -84,16 +91,17 @@ def render(
         "",
     ]
 
+
+def _result_block(
+    merged: dict[str, object], llm_gateway: dict[str, object] | None, output: str | None
+) -> list[str]:
+    """The printed-result lines: one ``-o`` field, the LLM chain, or the analysis sections."""
     if output is not None:
         # Unknown names fall back to the plain text, like select_transcript_field does.
-        parts.append(_OUTPUT_SNIPPETS.get(output, _OUTPUT_SNIPPETS["text"]))
-    elif llm_gateway:
-        parts += _llm_gateway_block(llm_gateway)
-    else:
-        parts.append(snippets.result_handling(merged))
-
-    parts.append("")
-    return "\n".join(parts)
+        return [_OUTPUT_SNIPPETS.get(output, _OUTPUT_SNIPPETS["text"])]
+    if llm_gateway:
+        return _llm_gateway_block(llm_gateway)
+    return [snippets.result_handling(merged)]
 
 
 def _llm_gateway_block(llm_gateway: dict[str, object]) -> list[str]:
diff --git a/aai_cli/context.py b/aai_cli/context.py
index c46c764a..d57a9268 100644
--- a/aai_cli/context.py
+++ b/aai_cli/context.py
@@ -4,6 +4,7 @@
 import sys
 from collections.abc import Callable
 from dataclasses import dataclass
+from typing import NoReturn
 
 import keyring.errors
 import typer
@@ -149,6 +150,32 @@ def _should_auto_login(ctx: typer.Context, err: NotAuthenticated) -> bool:
     return not (os.environ.get(config.ENV_API_KEY) and err.message == REJECTED_KEY_MESSAGE)
 
 
+def _auto_login_and_exit(state: AppState, json_mode: bool) -> NoReturn:
+    """Run the browser login for an unauthenticated command, then exit.
+
+    Always raises typer.Exit: with the login error's code on failure, or the
+    "signed in — run the command again" code (4) on success.
+    """
+    try:
+        # Suppressed in json_mode too: --json stderr must stay machine-readable,
+        # never mix human prose into it.
+        if not state.quiet and not json_mode:
+            output.error_console.print(
+                "[aai.muted]Not signed in; starting browser login.[/aai.muted]"
+            )
+        _persist_browser_login(state)
+    except CLIError as login_err:
+        output.emit_error(login_err, json_mode=json_mode)
+        raise typer.Exit(code=login_err.exit_code) from None
+    except (OSError, RuntimeError, keyring.errors.KeyringError) as exc:
+        persistence_err = _login_persistence_error(exc)
+        output.emit_error(persistence_err, json_mode=json_mode)
+        raise typer.Exit(code=persistence_err.exit_code) from None
+    rerun_err = _rerun_after_login_error()
+    output.emit_error(rerun_err, json_mode=json_mode)
+    raise typer.Exit(code=rerun_err.exit_code) from None
+
+
 def run_command(
     ctx: typer.Context,
     fn: Callable[[AppState, bool], None],
@@ -165,24 +192,7 @@ def run_command(
         if not auto_login or not _should_auto_login(ctx, err):
             output.emit_error(err, json_mode=json_mode)
             raise typer.Exit(code=err.exit_code) from None
-        try:
-            # Suppressed in json_mode too: --json stderr must stay machine-readable,
-            # never mix human prose into it.
-            if not state.quiet and not json_mode:
-                output.error_console.print(
-                    "[aai.muted]Not signed in; starting browser login.[/aai.muted]"
-                )
-            _persist_browser_login(state)
-        except CLIError as login_err:
-            output.emit_error(login_err, json_mode=json_mode)
-            raise typer.Exit(code=login_err.exit_code) from None
-        except (OSError, RuntimeError, keyring.errors.KeyringError) as exc:
-            persistence_err = _login_persistence_error(exc)
-            output.emit_error(persistence_err, json_mode=json_mode)
-            raise typer.Exit(code=persistence_err.exit_code) from None
-        rerun_err = _rerun_after_login_error()
-        output.emit_error(rerun_err, json_mode=json_mode)
-        raise typer.Exit(code=rerun_err.exit_code) from None
+        _auto_login_and_exit(state, json_mode)
     except CLIError as err:
         output.emit_error(err, json_mode=json_mode)
         raise typer.Exit(code=err.exit_code) from None

From f9fe9950a2fbc25ecf66b4a5864e202477bb0dff Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:11:39 +0000
Subject: [PATCH 09/11] Appease ruff: iterable unpacking, keyword-only
 json_mode

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 aai_cli/code_gen/transcribe.py | 3 ++-
 aai_cli/context.py             | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/aai_cli/code_gen/transcribe.py b/aai_cli/code_gen/transcribe.py
index 2fa77d44..8c1ab5fa 100644
--- a/aai_cli/code_gen/transcribe.py
+++ b/aai_cli/code_gen/transcribe.py
@@ -82,7 +82,8 @@ def _transcribe_block(merged: dict[str, object], source: str) -> list[str]:
         call = f"transcript = transcriber.transcribe({source!r}, config=config)"
     else:
         call = f"transcript = transcriber.transcribe({source!r})"
-    return parts + [
+    return [
+        *parts,
         "",
         call,
         "",
diff --git a/aai_cli/context.py b/aai_cli/context.py
index d57a9268..13c63675 100644
--- a/aai_cli/context.py
+++ b/aai_cli/context.py
@@ -150,7 +150,7 @@ def _should_auto_login(ctx: typer.Context, err: NotAuthenticated) -> bool:
     return not (os.environ.get(config.ENV_API_KEY) and err.message == REJECTED_KEY_MESSAGE)
 
 
-def _auto_login_and_exit(state: AppState, json_mode: bool) -> NoReturn:
+def _auto_login_and_exit(state: AppState, *, json_mode: bool) -> NoReturn:
     """Run the browser login for an unauthenticated command, then exit.
 
     Always raises typer.Exit: with the login error's code on failure, or the
@@ -192,7 +192,7 @@ def run_command(
         if not auto_login or not _should_auto_login(ctx, err):
             output.emit_error(err, json_mode=json_mode)
             raise typer.Exit(code=err.exit_code) from None
-        _auto_login_and_exit(state, json_mode)
+        _auto_login_and_exit(state, json_mode=json_mode)
     except CLIError as err:
         output.emit_error(err, json_mode=json_mode)
         raise typer.Exit(code=err.exit_code) from None

From 1a11e59f7c87e70883541b27a0f70275a46ab1e3 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:14:23 +0000
Subject: [PATCH 10/11] Fix two order/width-sensitive tests under the full
 randomized suite

The audit auto-login test needed the same force-interactive shim as its
eight siblings now that auto-login is TTY-gated, and the directory-source
assertion must tolerate Rich wrapping long tmp paths.

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 tests/test_audit_command.py     | 1 +
 tests/test_source_validation.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/test_audit_command.py b/tests/test_audit_command.py
index 4584ad66..0ca2befb 100644
--- a/tests/test_audit_command.py
+++ b/tests/test_audit_command.py
@@ -174,6 +174,7 @@ def test_audit_summarizes_all_login_rows(monkeypatch, mocker):
 
 
 def test_audit_without_session_runs_login(monkeypatch, mocker):
+    monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     logs = mocker.patch(
         "aai_cli.commands.audit.ams.list_audit_logs", autospec=True, return_value={"data": []}
diff --git a/tests/test_source_validation.py b/tests/test_source_validation.py
index 5d979473..1a7997a1 100644
--- a/tests/test_source_validation.py
+++ b/tests/test_source_validation.py
@@ -72,7 +72,9 @@ def test_transcribe_directory_source_fails_before_credentials(mocker, tmp_path):
     tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", str(tmp_path)])
     assert result.exit_code == 2
-    assert f"Not a file: {tmp_path}" in result.output
+    # Rich may wrap the long tmp path mid-message; compare on unwrapped text.
+    unwrapped = " ".join(result.output.split())
+    assert f"Not a file: {tmp_path}" in unwrapped
     assert "starting browser login" not in result.output
     tx.assert_not_called()
 

From 3b145c6e48513400bd2184d40e06496e22dfde81 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 10 Jun 2026 05:16:35 +0000
Subject: [PATCH 11/11] Cover the fractional usage-number formatting branch

https://claude.ai/code/session_01Uv7cEgJi2LgknkvfHP52g7
---
 tests/test_account_command.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/test_account_command.py b/tests/test_account_command.py
index 788d3514..8c96e3e8 100644
--- a/tests/test_account_command.py
+++ b/tests/test_account_command.py
@@ -356,3 +356,10 @@ def test_limits_json_passthrough_when_empty(mocker):
     result = runner.invoke(app, ["limits", "--json"])
     assert result.exit_code == 0
     assert json.loads(result.output) == {"rate_limits": []}
+
+
+def test_format_usage_number_fractional_trims_trailing_zeros():
+    # Non-integers keep up to six decimals with trailing zeros (and a bare dot) trimmed.
+    assert account._format_usage_number(1234.5) == "1,234.5"
+    assert account._format_usage_number(0.000001) == "0.000001"
+    assert account._format_usage_number(2.5000004) == "2.5"