diff --git a/.importlinter b/.importlinter
index 38ccd1be..3959aa5d 100644
--- a/.importlinter
+++ b/.importlinter
@@ -7,6 +7,7 @@ name = Core modules do not import command modules
 type = forbidden
 source_modules =
     aai_cli.agent
+    aai_cli.agent_exec
     aai_cli.argscan
     aai_cli.auth
     aai_cli.client
@@ -24,11 +25,14 @@ source_modules =
     aai_cli.help_text
     aai_cli.init
     aai_cli.llm
+    aai_cli.llm_exec
     aai_cli.microphone
     aai_cli.options
     aai_cli.output
     aai_cli.render
+    aai_cli.speak_exec
     aai_cli.stdio
+    aai_cli.stream_exec
     aai_cli.streaming
     aai_cli.telemetry
     aai_cli.theme
diff --git a/AGENTS.md b/AGENTS.md
index b65cd506..7c49469a 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -164,6 +164,8 @@ A Typer CLI. `aai_cli/main.py` builds the `app`, registers each command sub-app,
 
 Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `agent`, `speak`, `llm`, `transcripts`, `login` (login/logout/whoami), `doctor`, `init`, `dev`, `share`, `deploy`, `setup`, `onboard`, `account` (balance/usage/limits), `keys`, `sessions`, `audit`, `telemetry` (status/enable/disable)). Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps any `CLIError` to clean stderr output + the error's exit code. Commands never print tracebacks for expected failures.
 
+**Options/run split for flag-heavy commands** (gh-CLI style): the Typer function only parses argv into a frozen `<Cmd>Options` dataclass and hands it to a module-level `run_<cmd>(opts, state, *, json_mode)` through a thin lambda adapter in `run_command(ctx, ..., json=...)`. The five run commands follow it — `aai_cli/stream_exec.py` (the reference implementation), `transcribe_exec.py`, `agent_exec.py`, `speak_exec.py`, `llm_exec.py`. Because the run path is a plain function of data, tests construct options directly (`dataclasses.replace` off a defaults instance, see `tests/test_stream_exec.py` and `tests/test_command_options_seam.py`) instead of round-tripping argv through `CliRunner` — which is also the cheap way to kill mutation-gate mutants on orchestration lines. Follow this for new or heavily-reworked commands with long bodies; small commands keep the inline `body()` closure — the dataclass is pure ceremony there.
+
 ### Cross-cutting state (resolution order matters)
 
 - **`context.py`** — `AppState` (profile, env) is attached to the Typer context in the root `@app.callback()`. `run_command` is the standard command wrapper.
diff --git a/aai_cli/agent_exec.py b/aai_cli/agent_exec.py
new file mode 100644
index 00000000..8d8b2cdc
--- /dev/null
+++ b/aai_cli/agent_exec.py
@@ -0,0 +1,154 @@
+"""Run logic for `assembly agent`: the options/run split (see AGENTS.md).
+
+The command module (aai_cli/commands/agent.py) only parses argv — it builds an
+``AgentOptions`` and hands it to ``run_agent`` via ``context.run_command``, so tests
+can drive validation, --show-code, and session wiring by constructing options
+directly, with no CliRunner argv round-trip.
+"""
+
+from __future__ import annotations
+
+import contextlib
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+import typer
+
+from aai_cli import choices, client, code_gen, output
+from aai_cli.agent.audio import SAMPLE_RATE, DuplexAudio, NullPlayer
+from aai_cli.agent.render import AgentRenderer
+from aai_cli.agent.session import AgentRunConfig, run_session
+from aai_cli.agent.voices import VOICE_NAMES
+from aai_cli.context import AppState
+from aai_cli.errors import CLIError, UsageError
+from aai_cli.streaming.session import validate_output_flags
+from aai_cli.streaming.sources import FileSource
+
+
+@dataclass(frozen=True)
+class AgentOptions:
+    """Every `assembly agent` conversation flag as plain data.
+
+    ``--list-voices`` is excluded: it dispatches to its own auth-free body in the
+    command module. ``--json`` is excluded: run_command resolves it into the
+    ``json_mode`` argument.
+    """
+
+    source: str | None
+    sample: bool
+    voice: str
+    system_prompt: str
+    system_prompt_file: Path | None
+    greeting: str
+    device: int | None
+    output_field: choices.TextOrJson | None
+    show_code: bool
+
+
+def _resolve_system_prompt(system_prompt: str, system_prompt_file: Path | None) -> str:
+    """The persona text: a --system-prompt-file (if given) overrides --system-prompt."""
+    if system_prompt_file is None:
+        return system_prompt
+    try:
+        return system_prompt_file.read_text(encoding="utf-8")
+    except OSError as exc:
+        raise CLIError(
+            f"Could not read --system-prompt-file {system_prompt_file}: {exc}",
+            error_type="file_not_found",
+            exit_code=2,
+            suggestion="Check the path and that the file is readable.",
+        ) from exc
+
+
+def _open_audio(
+    renderer: AgentRenderer,
+    *,
+    source: str | None,
+    sample: bool,
+    device: int | None,
+    from_file: bool,
+) -> tuple[Any, Any]:
+    """Build the (mic, player) pair for either file-driven or live-mic input."""
+    if from_file:
+        # Stream the clip as the user's speech and stop after the agent replies.
+        # No greeting and full-duplex so no part of the clip is muted/dropped,
+        # and a NullPlayer since there is no listener for the reply audio.
+        return FileSource(client.resolve_audio_source(source, sample=sample)), NullPlayer()
+    # One full-duplex stream for mic + speaker: macOS rejects two separate
+    # streams on a device, which silently kills capture.
+    duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device)
+    # notice() self-suppresses in JSON mode and routes to stderr otherwise, so a
+    # piped `assembly agent | …` never reads this advisory as transcript data.
+    renderer.notice(
+        "Use headphones — the mic stays open while the agent speaks, "
+        "so speakers would let it hear itself.\n"
+    )
+    return duplex.mic, duplex.player
+
+
+def _print_show_code(opts: AgentOptions, system_prompt_text: str) -> None:
+    """Print the equivalent agent script and exit without authenticating or opening
+    audio. Raw stdout for `> script.py`."""
+    if opts.source or opts.sample:
+        # A faithful file-driven agent script would need the CLI's whole
+        # ffmpeg-decode + ready-gate + exit-after-reply machinery, which is
+        # impractical to inline; the snippet is microphone-driven, so say so
+        # on stderr instead of silently dropping the source. stderr keeps
+        # `--show-code > script.py` byte-clean.
+        output.error_console.print(
+            "[aai.warn]Note:[/aai.warn] the generated script uses the microphone; "
+            "it does not stream the audio source you passed."
+        )
+    output.print_code(code_gen.agent(opts.voice, system_prompt_text, opts.greeting))
+
+
+def run_agent(opts: AgentOptions, state: AppState, *, json_mode: bool) -> None:
+    """Execute one `assembly agent` conversation from already-parsed flags."""
+    validate_output_flags(json_mode=json_mode, output_field=opts.output_field)
+    text_mode, json_mode = output.stream_output_modes(opts.output_field, json_mode=json_mode)
+    if opts.voice not in VOICE_NAMES:
+        raise UsageError(
+            f"Unknown voice {opts.voice!r}.",
+            suggestion="Run 'assembly agent --list-voices' to see the options.",
+        )
+    system_prompt_text = _resolve_system_prompt(opts.system_prompt, opts.system_prompt_file)
+
+    if opts.show_code:
+        _print_show_code(opts, system_prompt_text)
+        return
+
+    from_file = bool(opts.source) or opts.sample
+    if from_file and opts.device is not None:
+        raise UsageError("--device applies only to microphone input.")
+    if from_file:
+        # Existence-check the clip before credentials, so a typo'd path reads as
+        # "file not found" instead of triggering a login.
+        client.resolve_audio_source(opts.source, sample=opts.sample)
+    api_key = state.resolve_api_key()
+
+    renderer = AgentRenderer(
+        json_mode=json_mode,
+        text_mode=text_mode,
+        mic_input=not from_file,
+    )
+    audio, player = _open_audio(
+        renderer, source=opts.source, sample=opts.sample, device=opts.device, from_file=from_file
+    )
+    run_config = AgentRunConfig(
+        voice=opts.voice,
+        system_prompt=system_prompt_text,
+        greeting="" if from_file else opts.greeting,
+        full_duplex=True,  # one duplex stream -> mic always open (use headphones)
+        exit_after_reply=from_file,
+    )
+    try:
+        run_session(api_key, renderer=renderer, player=player, mic=audio, config=run_config)
+    except KeyboardInterrupt:
+        renderer.stopped()
+    except BrokenPipeError as exc:
+        # Downstream consumer (e.g. `| head`) closed the pipe; stop quietly.
+        raise typer.Exit(code=0) from exc
+    finally:
+        with contextlib.suppress(BrokenPipeError):
+            renderer.close()
diff --git a/aai_cli/commands/agent.py b/aai_cli/commands/agent.py
index b3e259c2..da9dbe8e 100644
--- a/aai_cli/commands/agent.py
+++ b/aai_cli/commands/agent.py
@@ -1,77 +1,23 @@
 from __future__ import annotations
 
-import contextlib
 from pathlib import Path
-from typing import Any
 
 import typer
 
-from aai_cli import choices, client, code_gen, help_panels, options, output
-from aai_cli.agent.audio import SAMPLE_RATE, DuplexAudio, NullPlayer
-from aai_cli.agent.render import AgentRenderer
-from aai_cli.agent.session import (
-    DEFAULT_GREETING,
-    DEFAULT_PROMPT,
-    AgentRunConfig,
-    run_session,
-)
+from aai_cli import agent_exec, choices, help_panels, options, output
+from aai_cli.agent.session import DEFAULT_GREETING, DEFAULT_PROMPT
 from aai_cli.agent.voices import (
     DEFAULT_VOICE,
-    VOICE_NAMES,
     VOICES,
     complete_voice,
     format_voice_list,
 )
 from aai_cli.context import AppState, run_command
-from aai_cli.errors import CLIError, UsageError
 from aai_cli.help_text import examples_epilog
-from aai_cli.streaming.session import validate_output_flags
-from aai_cli.streaming.sources import FileSource
 
 app = typer.Typer()
 
 
-def _resolve_system_prompt(system_prompt: str, system_prompt_file: Path | None) -> str:
-    """The persona text: a --system-prompt-file (if given) overrides --system-prompt."""
-    if system_prompt_file is None:
-        return system_prompt
-    try:
-        return system_prompt_file.read_text(encoding="utf-8")
-    except OSError as exc:
-        raise CLIError(
-            f"Could not read --system-prompt-file {system_prompt_file}: {exc}",
-            error_type="file_not_found",
-            exit_code=2,
-            suggestion="Check the path and that the file is readable.",
-        ) from exc
-
-
-def _open_audio(
-    renderer: AgentRenderer,
-    *,
-    source: str | None,
-    sample: bool,
-    device: int | None,
-    from_file: bool,
-) -> tuple[Any, Any]:
-    """Build the (mic, player) pair for either file-driven or live-mic input."""
-    if from_file:
-        # Stream the clip as the user's speech and stop after the agent replies.
-        # No greeting and full-duplex so no part of the clip is muted/dropped,
-        # and a NullPlayer since there is no listener for the reply audio.
-        return FileSource(client.resolve_audio_source(source, sample=sample)), NullPlayer()
-    # One full-duplex stream for mic + speaker: macOS rejects two separate
-    # streams on a device, which silently kills capture.
-    duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device)
-    # notice() self-suppresses in JSON mode and routes to stderr otherwise, so a
-    # piped `assembly agent | …` never reads this advisory as transcript data.
-    renderer.notice(
-        "Use headphones — the mic stays open while the agent speaks, "
-        "so speakers would let it hear itself.\n"
-    )
-    return duplex.mic, duplex.player
-
-
 def _emit_voice_list(_state: AppState, json_mode: bool) -> None:
     """--list-voices body, routed through run_command so --json yields a
     machine-readable array instead of the human list; needs no auth."""
@@ -149,65 +95,19 @@ def agent(
         run_command(ctx, _emit_voice_list, json=json_out)
         return
 
-    def body(state: AppState, json_mode: bool) -> None:
-        validate_output_flags(json_mode=json_mode, output_field=output_field)
-        text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode)
-        if voice not in VOICE_NAMES:
-            raise UsageError(
-                f"Unknown voice {voice!r}.",
-                suggestion="Run 'assembly agent --list-voices' to see the options.",
-            )
-        system_prompt_text = _resolve_system_prompt(system_prompt, system_prompt_file)
-
-        if show_code:
-            # Print-only: emit the equivalent agent script from the flags and exit
-            # without authenticating or opening audio. Raw stdout for `> script.py`.
-            if source or sample:
-                # A faithful file-driven agent script would need the CLI's whole
-                # ffmpeg-decode + ready-gate + exit-after-reply machinery, which is
-                # impractical to inline; the snippet is microphone-driven, so say so
-                # on stderr instead of silently dropping the source. stderr keeps
-                # `--show-code > script.py` byte-clean.
-                output.error_console.print(
-                    "[aai.warn]Note:[/aai.warn] the generated script uses the microphone; "
-                    "it does not stream the audio source you passed."
-                )
-            output.print_code(code_gen.agent(voice, system_prompt_text, greeting))
-            return
-
-        from_file = bool(source) or sample
-        if from_file and device is not None:
-            raise UsageError("--device applies only to microphone input.")
-        if from_file:
-            # Existence-check the clip before credentials, so a typo'd path reads as
-            # "file not found" instead of triggering a login.
-            client.resolve_audio_source(source, sample=sample)
-        api_key = state.resolve_api_key()
-
-        renderer = AgentRenderer(
-            json_mode=json_mode,
-            text_mode=text_mode,
-            mic_input=not from_file,
-        )
-        audio, player = _open_audio(
-            renderer, source=source, sample=sample, device=device, from_file=from_file
-        )
-        run_config = AgentRunConfig(
-            voice=voice,
-            system_prompt=system_prompt_text,
-            greeting="" if from_file else greeting,
-            full_duplex=True,  # one duplex stream -> mic always open (use headphones)
-            exit_after_reply=from_file,
-        )
-        try:
-            run_session(api_key, renderer=renderer, player=player, mic=audio, config=run_config)
-        except KeyboardInterrupt:
-            renderer.stopped()
-        except BrokenPipeError as exc:
-            # Downstream consumer (e.g. `| head`) closed the pipe; stop quietly.
-            raise typer.Exit(code=0) from exc
-        finally:
-            with contextlib.suppress(BrokenPipeError):
-                renderer.close()
-
-    run_command(ctx, body, json=json_out)
+    opts = agent_exec.AgentOptions(
+        source=source,
+        sample=sample,
+        voice=voice,
+        system_prompt=system_prompt,
+        system_prompt_file=system_prompt_file,
+        greeting=greeting,
+        device=device,
+        output_field=output_field,
+        show_code=show_code,
+    )
+    run_command(
+        ctx,
+        lambda state, json_mode: agent_exec.run_agent(opts, state, json_mode=json_mode),
+        json=json_out,
+    )
diff --git a/aai_cli/commands/llm.py b/aai_cli/commands/llm.py
index 2850a264..14d088fd 100644
--- a/aai_cli/commands/llm.py
+++ b/aai_cli/commands/llm.py
@@ -3,46 +3,15 @@
 from collections.abc import Callable
 
 import typer
-from rich.markup import escape
 
-from aai_cli import choices, client, help_panels, options, output, stdio
+from aai_cli import choices, help_panels, llm_exec, options, output
 from aai_cli import llm as gateway
 from aai_cli.context import AppState, run_command
 from aai_cli.errors import UsageError
-from aai_cli.follow import FollowRenderer
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
 
-_FOLLOW_STDIN_MESSAGE = (
-    "--follow needs transcript text piped on stdin, e.g. "
-    '`assembly stream -o text | assembly llm -f "summarize action items as I talk"`.'
-)
-
-
-def _validate_follow_args(
-    prompt: str | None, output_field: str | None, transcript_id: str | None
-) -> str:
-    """Reject flag combinations that don't apply to --follow's live-panel mode.
-
-    Returns the validated (non-empty) prompt so the caller has a plain ``str``.
-    """
-    if not prompt:
-        raise UsageError("Provide a prompt to run over the streamed transcript.")
-    if output_field is not None:
-        raise UsageError(
-            "--output applies to one-shot mode; --follow renders a live panel "
-            "(or NDJSON when piped)."
-        )
-    if transcript_id:
-        raise UsageError(
-            "--follow runs over live transcript text piped on stdin; it can't be "
-            "combined with --transcript-id."
-        )
-    if not stdio.stdin_is_piped():
-        raise UsageError(_FOLLOW_STDIN_MESSAGE)
-    return prompt
-
 
 def _emit_model_list(_state: AppState, json_mode: bool) -> None:
     """--list-models body, routed through run_command so --json yields a
@@ -67,27 +36,6 @@ def body(state: AppState, json_mode: bool) -> None:
     return body
 
 
-def _stdin_transcript_text(
-    state: AppState, json_mode: bool, transcript_id: str | None
-) -> str | None:
-    """Resolve the inline transcript text for one-shot mode.
-
-    Text piped on stdin becomes the content the prompt operates on, unless an
-    explicit --transcript-id is given — that injects server-side and takes
-    priority, so piped text is ignored with a visible warning (suppressed by
-    --quiet, structured under --json).
-    """
-    if transcript_id is None:
-        return stdio.piped_stdin_text()
-    # Same cheap local id check as `transcripts get`, before auth or network.
-    client.validate_transcript_id(transcript_id)
-    if stdio.stdin_is_piped() and not state.quiet:
-        output.emit_warning(
-            "Ignoring piped stdin; --transcript-id takes priority.", json_mode=json_mode
-        )
-    return None
-
-
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
     epilog=examples_epilog(
@@ -149,62 +97,17 @@ def llm(
         run_command(ctx, _list_models_body(output_field), json=json_out)
         return
 
-    def follow_body(state: AppState, json_mode: bool) -> None:
-        prompt_text = _validate_follow_args(prompt, output_field, transcript_id)
-        api_key = state.resolve_api_key()
-
-        def ask(transcript_text: str) -> str:
-            messages = gateway.build_messages(
-                prompt_text, system=system, transcript_text=transcript_text
-            )
-            response = gateway.complete(
-                api_key, model=model, messages=messages, max_tokens=max_tokens
-            )
-            return gateway.content_of(response)
-
-        transcript: list[str] = []
-        interrupted = False
-        with FollowRenderer(json_mode=json_mode) as render:
-            # Ctrl-C is the normal "stop watching" signal -> exit cleanly (code 0).
-            try:
-                for turn in stdio.iter_piped_stdin_lines():
-                    transcript.append(turn)
-                    render(ask("\n".join(transcript)), len(transcript))
-            except KeyboardInterrupt:
-                interrupted = True
-        if not transcript and not interrupted:
-            # An empty pipe (`assembly llm -f "…" </dev/null`) would otherwise exit 0
-            # silently, having asked nothing.
-            raise UsageError(_FOLLOW_STDIN_MESSAGE)
-
-    def body(state: AppState, json_mode: bool) -> None:
-        if not prompt:
-            raise UsageError(
-                "Provide a prompt.",
-                suggestion="Or pass --list-models to see available models.",
-            )
-        prompt_text = prompt
-        stdin_text = _stdin_transcript_text(state, json_mode, transcript_id)
-        api_key = state.resolve_api_key()
-        messages = gateway.build_messages(
-            prompt_text, system=system, transcript_id=transcript_id, transcript_text=stdin_text
-        )
-        response = gateway.complete(
-            api_key,
-            model=model,
-            messages=messages,
-            max_tokens=max_tokens,
-            transcript_id=transcript_id,
-        )
-        content = gateway.content_of(response)
-        if output_field == "text":
-            # Just the answer, raw — so `… | assembly llm -o text "…" | next` composes cleanly.
-            output.emit_text(content)
-            return
-        output.emit(
-            {"model": model, "output": content, "usage": gateway.usage_of(response)},
-            lambda d: escape(str(d["output"])),
-            json_mode=json_mode or output_field == "json",
-        )
-
-    run_command(ctx, follow_body if follow else body, json=json_out)
+    opts = llm_exec.LlmOptions(
+        prompt=prompt,
+        model=model,
+        transcript_id=transcript_id,
+        system=system,
+        follow=follow,
+        output_field=output_field,
+        max_tokens=max_tokens,
+    )
+    run_command(
+        ctx,
+        lambda state, json_mode: llm_exec.run_llm(opts, state, json_mode=json_mode),
+        json=json_out,
+    )
diff --git a/aai_cli/commands/speak.py b/aai_cli/commands/speak.py
index 89b13770..948def0b 100644
--- a/aai_cli/commands/speak.py
+++ b/aai_cli/commands/speak.py
@@ -1,169 +1,16 @@
 from __future__ import annotations
 
-import sys
 from pathlib import Path
 
 import typer
 
-from aai_cli import help_panels, options, output
-from aai_cli.context import AppState, run_command
-from aai_cli.errors import CLIError, UsageError
+from aai_cli import help_panels, options, speak_exec
+from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
-from aai_cli.tts import audio, dialogue, session
+from aai_cli.speak_exec import DEFAULT_LANGUAGE
 
 app = typer.Typer()
 
-# The streaming-TTS reference client defaults to the PocketTTS "jane" voice and
-# English, so the CLI sends the same and a bare `assembly speak` works out of the box.
-# Override either with --voice/--language.
-DEFAULT_VOICE = "jane"
-DEFAULT_LANGUAGE = "English"
-
-
-def _read_text(text: str | None) -> str:
-    """The text to speak: the non-blank argument, or piped stdin when the argument
-    is omitted entirely. A *blank* argument (e.g. "") is a usage error, never a
-    silent fall-through to stdin — so `assembly speak "$MSG"` with an empty MSG fails
-    fast instead of consuming whatever happens to be on the pipe."""
-    if text is not None and text.strip():
-        return text
-    # `text is None` (argument omitted), not merely blank: see the docstring rationale.
-    if text is None and not sys.stdin.isatty():
-        piped = sys.stdin.read().strip()
-        if piped:
-            return piped
-    raise UsageError(
-        "No text to speak.",
-        suggestion='Pass text as an argument: assembly speak "Hello" — or pipe it via stdin.',
-    )
-
-
-def _output_audio(result: session.SpeakResult, out: Path | None) -> None:
-    """Write a WAV when --out is given, else play through the speakers."""
-    if out is not None:
-        audio.write_wav(out, result.pcm, result.sample_rate)
-    else:
-        audio.play_pcm(result.pcm, result.sample_rate)
-
-
-def _disposition(out: Path | None) -> str:
-    return f"saved to {out}" if out is not None else "played"
-
-
-def _emit_single(
-    result: session.SpeakResult,
-    cfg: session.SpeakConfig,
-    out: Path | None,
-    *,
-    json_mode: bool,
-) -> None:
-    """Single-voice result: a JSON object on stdout, or a human note on stderr."""
-    duration = round(result.audio_duration_seconds, 3)
-    if json_mode:
-        output.emit_ndjson(
-            {
-                "voice": cfg.voice,
-                "language": cfg.language,
-                "sample_rate": result.sample_rate,
-                "audio_duration_seconds": duration,
-                "bytes": len(result.pcm),
-                "out": str(out) if out is not None else None,
-            }
-        )
-        return
-    output.error_console.print(
-        f"[aai.muted]Spoke {duration}s of audio ({_disposition(out)}).[/aai.muted]"
-    )
-
-
-def _emit_multi(
-    result: session.SpeakResult,
-    speakers: dict[str, str],
-    segment_count: int,
-    out: Path | None,
-    *,
-    json_mode: bool,
-) -> None:
-    """Multi-voice result: a JSON object on stdout, or a human note on stderr."""
-    duration = round(result.audio_duration_seconds, 3)
-    if json_mode:
-        output.emit_ndjson(
-            {
-                "mode": "multi",
-                "speakers": speakers,
-                "segments": segment_count,
-                "sample_rate": result.sample_rate,
-                "audio_duration_seconds": duration,
-                "bytes": len(result.pcm),
-                "out": str(out) if out is not None else None,
-            }
-        )
-        return
-    voices = ", ".join(f"{spk}={voice}" for spk, voice in speakers.items())
-    output.error_console.print(
-        f"[aai.muted]Spoke {duration}s across {len(speakers)} voices "
-        f"({voices}) ({_disposition(out)}).[/aai.muted]"
-    )
-
-
-def _speak_single(
-    api_key: str,
-    text: str,
-    voice: str,
-    language: str,
-    sample_rate: int | None,
-    out: Path | None,
-    *,
-    json_mode: bool,
-    quiet: bool,
-) -> None:
-    cfg = session.SpeakConfig(text=text, voice=voice, language=language, sample_rate=sample_rate)
-    with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet):
-        result = session.synthesize(
-            api_key, cfg, on_warning=lambda m: output.emit_warning(m, json_mode=json_mode)
-        )
-    _output_audio(result, out)
-    _emit_single(result, cfg, out, json_mode=json_mode)
-
-
-def _speak_dialogue(
-    api_key: str,
-    text: str,
-    bare_voice: str | None,
-    overrides: dict[str, str],
-    language: str,
-    sample_rate: int | None,
-    out: Path | None,
-    *,
-    json_mode: bool,
-    quiet: bool,
-) -> None:
-    segments = dialogue.parse_segments(text)
-    if not segments:
-        raise UsageError(
-            "No text to speak.",
-            suggestion="The input had speaker labels but no spoken text.",
-        )
-    if bare_voice is not None:
-        output.emit_warning(
-            "Ignoring bare --voice in multi-speaker mode; "
-            "set a voice per speaker with --voice A=NAME.",
-            json_mode=json_mode,
-        )
-    resolved, speakers = dialogue.assign_voices(
-        segments, dialogue.DEFAULT_VOICE_ROTATION, overrides
-    )
-    with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet):
-        result = session.synthesize_dialogue(
-            api_key,
-            resolved,
-            language=language,
-            sample_rate=sample_rate,
-            on_warning=lambda m: output.emit_warning(m, json_mode=json_mode),
-        )
-    _output_audio(result, out)
-    _emit_multi(result, speakers, len(resolved), out, json_mode=json_mode)
-
 
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
@@ -221,47 +68,15 @@ def speak(
     goes before the subcommand).
     """
 
-    def body(state: AppState, json_mode: bool) -> None:
-        if not session.is_available():
-            raise CLIError(
-                "assembly speak is only available in the sandbox.",
-                error_type="unsupported_environment",
-                exit_code=2,
-                suggestion="Re-run as: assembly --sandbox speak … "
-                "(--sandbox goes before the command; or use --env sandbox000).",
-            )
-        spoken = _read_text(text)
-        api_key = state.resolve_api_key()
-        bare_voice, overrides = dialogue.parse_voice_overrides(voice)
-        if dialogue.looks_like_speaker_labeled(spoken):
-            _speak_dialogue(
-                api_key,
-                spoken,
-                bare_voice,
-                overrides,
-                language,
-                sample_rate,
-                out,
-                json_mode=json_mode,
-                quiet=state.quiet,
-            )
-        else:
-            if overrides:
-                # Mirror the inverse warning in _speak_dialogue: never drop a
-                # requested voice mapping silently.
-                output.emit_warning(
-                    "Ignoring --voice SPEAKER=VOICE mappings; input has no speaker labels.",
-                    json_mode=json_mode,
-                )
-            _speak_single(
-                api_key,
-                spoken,
-                bare_voice or DEFAULT_VOICE,
-                language,
-                sample_rate,
-                out,
-                json_mode=json_mode,
-                quiet=state.quiet,
-            )
-
-    run_command(ctx, body, json=json_out)
+    opts = speak_exec.SpeakOptions(
+        text=text,
+        voice=voice,
+        language=language,
+        sample_rate=sample_rate,
+        out=out,
+    )
+    run_command(
+        ctx,
+        lambda state, json_mode: speak_exec.run_speak(opts, state, json_mode=json_mode),
+        json=json_out,
+    )
diff --git a/aai_cli/commands/stream.py b/aai_cli/commands/stream.py
index 822018d9..4eb9bc40 100644
--- a/aai_cli/commands/stream.py
+++ b/aai_cli/commands/stream.py
@@ -1,81 +1,19 @@
 from __future__ import annotations
 
-import tempfile
 from pathlib import Path
 
 import typer
 from assemblyai.streaming.v3 import Encoding, NoiseSuppressionModel, SpeechModel
 
-from aai_cli import (
-    choices,
-    client,
-    code_gen,
-    config_builder,
-    help_panels,
-    llm,
-    options,
-    output,
-    youtube,
-)
-from aai_cli.context import AppState, run_command
-from aai_cli.errors import UsageError
-from aai_cli.follow import FollowRenderer
+from aai_cli import choices, help_panels, llm, options, stream_exec
+from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
-from aai_cli.microphone import MicrophoneSource
-from aai_cli.streaming.macos import MacSystemAudioSource
-from aai_cli.streaming.render import StreamRenderer
-from aai_cli.streaming.session import (
-    SourceOptions,
-    StreamSession,
-    validate_output_flags,
-    validate_sources,
-)
-from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource
 
 app = typer.Typer()
 
 DEFAULT_SPEECH_MODEL = SpeechModel.u3_rt_pro
 
 
-def _dispatch(session: StreamSession, opts: SourceOptions) -> None:
-    """Open the right audio source(s) for the flags and stream them."""
-    if opts.from_system_audio:
-        system = MacSystemAudioSource(on_open=session.on_open)
-        if opts.system_audio_only:
-            session.run(system, system.sample_rate, source_label="system")
-        else:
-            mic = MicrophoneSource(
-                target_rate=TARGET_RATE,
-                device=opts.device,
-                capture_rate=opts.sample_rate,
-                on_open=session.on_open,
-            )
-            session.run_parallel(
-                [("system", system, system.sample_rate), ("you", mic, mic.sample_rate)]
-            )
-    elif opts.from_stdin:
-        # Raw PCM16 mono piped on stdin (e.g. `ffmpeg … -f s16le - | assembly stream -`).
-        stdin_src = StdinSource(sample_rate=opts.sample_rate or TARGET_RATE)
-        session.run(stdin_src, stdin_src.sample_rate)
-    elif opts.source and youtube.is_downloadable_url(opts.source):
-        # Fetch the audio first, then stream the local file in real time.
-        with tempfile.TemporaryDirectory(prefix="aai-yt-") as td:
-            local = youtube.download_audio(opts.source, Path(td))
-            session.run(FileSource(str(local)), TARGET_RATE)
-    elif opts.from_file:
-        file_audio = FileSource(client.resolve_audio_source(opts.source, sample=opts.sample))
-        session.run(file_audio, file_audio.sample_rate)
-    else:
-        # Capture at the device's native rate (or --sample-rate override) and tell the
-        # streaming API that rate, rather than forcing one the device may reject.
-        # "Listening…" is announced once the device is open (see StreamSession.on_open),
-        # not when the session opens — so early speech isn't lost in the gap.
-        mic = MicrophoneSource(
-            device=opts.device, capture_rate=opts.sample_rate, on_open=session.on_open
-        )
-        session.run(mic, mic.sample_rate)
-
-
 @app.command(
     rich_help_panel=help_panels.TRANSCRIPTION,
     epilog=examples_epilog(
@@ -348,102 +286,47 @@ def stream(
     in-process, refreshing the answer on every finalized turn; for a separate step
     instead, pipe the text out with -o text | assembly llm -f "…".
     """
-
-    def body(state: AppState, json_mode: bool) -> None:
-        validate_output_flags(json_mode=json_mode, output_field=output_field)
-        text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode)
-        opts = SourceOptions(
-            source=source,
-            sample=sample,
-            sample_rate=sample_rate,
-            device=device,
-            system_audio=system_audio,
-            system_audio_only=system_audio_only,
-        )
-        # Every streaming flag except sample_rate, which is set per source at stream time.
-        base_flags: dict[str, object] = {
-            "speech_model": config_builder.enum_value(speech_model),
-            "format_turns": format_turns if format_turns is not None else True,
-            "encoding": config_builder.enum_value(encoding),
-            "language_detection": language_detection,
-            "domain": domain,
-            "end_of_turn_confidence_threshold": end_of_turn_confidence_threshold,
-            "min_turn_silence": min_turn_silence,
-            "max_turn_silence": max_turn_silence,
-            "vad_threshold": vad_threshold,
-            "include_partial_turns": include_partial_turns,
-            "keyterms_prompt": list(keyterms_prompt) if keyterms_prompt else None,
-            "filter_profanity": filter_profanity,
-            "speaker_labels": speaker_labels,
-            "max_speakers": max_speakers,
-            "voice_focus": config_builder.enum_value(voice_focus),
-            "voice_focus_threshold": voice_focus_threshold,
-            "redact_pii": redact_pii,
-            "redact_pii_policies": config_builder.split_csv(redact_pii_policy),
-            "redact_pii_sub": redact_pii_sub,
-            "inactivity_timeout": inactivity_timeout,
-            "webhook_url": webhook_url,
-            "prompt": prompt,
-        }
-        base_flags.update(config_builder.auth_header_flags(webhook_auth_header))
-
-        if show_code:
-            # Print-only: emit a script faithful to the requested source — mic
-            # (default), stdin (-), or a file/URL — and exit without opening audio or
-            # authenticating. Raw stdout so `--show-code > script.py` is runnable.
-            # The same source validation as a real run, so e.g. a file + --sample-rate
-            # conflict errors here too instead of silently generating mic code.
-            validate_sources(opts, has_llm=bool(llm_prompt), text_mode=text_mode)
-            if opts.from_system_audio:
-                raise UsageError("--show-code does not support macOS system audio capture yet.")
-            if opts.source and youtube.is_downloadable_url(opts.source):
-                raise UsageError(
-                    "--show-code does not support downloaded sources (YouTube, podcast pages) yet.",
-                    suggestion="Download the audio first (e.g. yt-dlp) and pass the local file.",
-                )
-            code_source: str | None = None
-            if opts.from_stdin:
-                code_source = "-"
-            elif opts.from_file:
-                # check_local=False: generating code for a file you don't have yet is fine.
-                code_source = client.resolve_audio_source(
-                    opts.source, sample=opts.sample, check_local=False
-                )
-            merged = config_builder.merge_streaming_params(
-                # sample_rate precedence: --sample-rate (None is dropped by the merge)
-                # beats --config/--config-file, which beat the 16 kHz default below —
-                # so an explicit `--config sample_rate=…` is honored, not overridden.
-                flags=base_flags | {"sample_rate": opts.sample_rate},
-                overrides=config_kv,
-                config_file=config_file,
-            )
-            merged.setdefault("sample_rate", TARGET_RATE)
-            gateway = code_gen.gateway_options(
-                list(llm_prompt or []), model, max_tokens, interval=llm_interval
-            )
-            output.print_code(code_gen.stream(merged, llm=gateway, source=code_source))
-            return
-
-        # Validate the requested sources (including that a local file exists) before
-        # credentials, so a typo'd path reads as "file not found" — not as a login.
-        validate_sources(opts, has_llm=bool(llm_prompt), text_mode=text_mode)
-        if opts.from_file and not opts.from_stdin:
-            client.resolve_audio_source(opts.source, sample=opts.sample)
-        api_key = state.resolve_api_key()
-
-        llm_prompts = list(llm_prompt or [])
-        session = StreamSession(
-            api_key=api_key,
-            base_flags=base_flags,
-            overrides=config_kv,
-            config_file=config_file,
-            renderer=StreamRenderer(json_mode=json_mode, text_mode=text_mode),
-            follow=FollowRenderer(json_mode=json_mode) if llm_prompts else None,
-            llm_prompts=llm_prompts,
-            model=model,
-            max_tokens=max_tokens,
-            llm_interval=llm_interval,
-        )
-        _dispatch(session, opts)
-
-    run_command(ctx, body, json=json_out)
+    opts = stream_exec.StreamOptions(
+        source=source,
+        sample=sample,
+        sample_rate=sample_rate,
+        device=device,
+        system_audio=system_audio,
+        system_audio_only=system_audio_only,
+        speech_model=speech_model,
+        encoding=encoding,
+        language_detection=language_detection,
+        domain=domain,
+        prompt=prompt,
+        keyterms_prompt=keyterms_prompt,
+        end_of_turn_confidence_threshold=end_of_turn_confidence_threshold,
+        min_turn_silence=min_turn_silence,
+        max_turn_silence=max_turn_silence,
+        vad_threshold=vad_threshold,
+        format_turns=format_turns,
+        include_partial_turns=include_partial_turns,
+        speaker_labels=speaker_labels,
+        max_speakers=max_speakers,
+        voice_focus=voice_focus,
+        voice_focus_threshold=voice_focus_threshold,
+        inactivity_timeout=inactivity_timeout,
+        filter_profanity=filter_profanity,
+        redact_pii=redact_pii,
+        redact_pii_policy=redact_pii_policy,
+        redact_pii_sub=redact_pii_sub,
+        webhook_url=webhook_url,
+        webhook_auth_header=webhook_auth_header,
+        llm_prompt=llm_prompt,
+        llm_interval=llm_interval,
+        model=model,
+        max_tokens=max_tokens,
+        config_kv=config_kv,
+        config_file=config_file,
+        output_field=output_field,
+        show_code=show_code,
+    )
+    run_command(
+        ctx,
+        lambda state, json_mode: stream_exec.run_stream(opts, state, json_mode=json_mode),
+        json=json_out,
+    )
diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py
index 6989205f..21a0320b 100644
--- a/aai_cli/commands/transcribe.py
+++ b/aai_cli/commands/transcribe.py
@@ -5,23 +5,8 @@
 import assemblyai as aai
 import typer
 
-from aai_cli import (
-    choices,
-    client,
-    code_gen,
-    config_builder,
-    help_panels,
-    llm,
-    options,
-    output,
-    transcribe_batch,
-    transcribe_exec,
-)
-
-# The package attribute `code_gen.transcribe` is the wrapper function, so the module's
-# render() (which also takes the -o output field) is imported from the submodule itself.
-from aai_cli.code_gen.transcribe import render as render_transcribe_code
-from aai_cli.context import AppState, run_command
+from aai_cli import choices, help_panels, llm, options, transcribe_exec
+from aai_cli.context import run_command
 from aai_cli.help_text import examples_epilog
 
 app = typer.Typer()
@@ -361,130 +346,59 @@ def transcribe(
 
     Curated flags cover common features; --config KEY=VALUE and --config-file reach every other field. Analysis (summary, chapters, ...) renders in human mode.
     """
-
-    def body(state: AppState, json_mode: bool) -> None:
-        transcribe_exec.validate_language_flags(
-            language_code, language_detection=language_detection
-        )
-        pii_policies = config_builder.split_csv(redact_pii_policy)
-        transcribe_exec.validate_pii_policies(pii_policies)
-        flags: dict[str, object] = {
-            "speech_model": config_builder.enum_value(speech_model),
-            "language_code": language_code,
-            "language_detection": language_detection,
-            "keyterms_prompt": list(keyterms_prompt) if keyterms_prompt else None,
-            "temperature": temperature,
-            "prompt": prompt,
-            "punctuate": punctuate,
-            "format_text": format_text,
-            "disfluencies": disfluencies,
-            "speaker_labels": speaker_labels or None,
-            "speakers_expected": speakers_expected,
-            "multichannel": multichannel,
-            "redact_pii": redact_pii,
-            "redact_pii_policies": pii_policies,
-            "redact_pii_sub": config_builder.enum_value(redact_pii_sub),
-            "redact_pii_audio": redact_pii_audio,
-            "filter_profanity": filter_profanity,
-            "content_safety": content_safety,
-            "content_safety_confidence": content_safety_confidence,
-            "speech_threshold": speech_threshold,
-            "summarization": summarization,
-            "summary_model": config_builder.enum_value(summary_model),
-            "summary_type": config_builder.enum_value(summary_type),
-            "auto_chapters": auto_chapters,
-            "sentiment_analysis": sentiment_analysis,
-            "entity_detection": entity_detection,
-            "auto_highlights": auto_highlights,
-            "iab_categories": topic_detection,
-            "word_boost": list(word_boost) if word_boost else None,
-            "custom_spelling": (
-                config_builder.load_custom_spelling(custom_spelling_file)
-                if custom_spelling_file
-                else None
-            ),
-            "audio_start_from": audio_start,
-            "audio_end_at": audio_end,
-            "webhook_url": webhook_url,
-            "speech_understanding": (
-                config_builder.translation_request(list(translate_to)) if translate_to else None
-            ),
-        }
-        flags.update(config_builder.auth_header_flags(webhook_auth_header))
-
-        transcribe_exec.validate_out_with_llm(out, llm_prompt)
-        transcribe_exec.validate_out_path(out)
-        transcribe_exec.validate_json_with_output(output_field, json_mode=json_mode)
-
-        merged = config_builder.merge_transcribe_config(
-            flags=flags, overrides=config_kv, config_file=config_file
-        )
-
-        transcribe_exec.validate_speakers_expected(merged)
-
-        sources = transcribe_batch.expand_sources(source, from_stdin=from_stdin, sample=sample)
-        if sources is not None:
-            transcribe_batch.reject_single_source_flags(
-                out=out, output_field=output_field, llm_prompt=llm_prompt, show_code=show_code
-            )
-            transcribe_batch.run_batch(
-                state.resolve_api_key(),
-                sources,
-                transcription_config=config_builder.construct_transcription_config(merged),
-                concurrency=concurrency,
-                force=force,
-                json_mode=json_mode,
-                quiet=state.quiet,
-            )
-            return
-
-        if show_code:
-            # Print-only: build the equivalent script and exit without transcribing or
-            # authenticating (raw stdout, so `--show-code > script.py` runs). No
-            # source/--sample needed — fall back to a placeholder path for a pure snippet.
-            audio = (
-                client.resolve_audio_source(source, sample=sample, check_local=False)
-                if source or sample
-                else "your-audio-file.mp3"
-            )
-            gateway = code_gen.gateway_options(list(llm_prompt or []), model, max_tokens)
-            output.print_code(
-                render_transcribe_code(
-                    merged,
-                    audio,
-                    llm_gateway=gateway,
-                    output=output_field,
-                    download_sections=list(download_sections or []),
-                )
-            )
-            return
-
-        tc = config_builder.construct_transcription_config(merged)
-
-        # A typo'd path must read as "file not found", not trigger a login.
-        transcribe_exec.check_source_exists(source, sample=sample)
-        transcribe_exec.warn_unrecognized_extension(source, json_mode=json_mode, quiet=state.quiet)
-
-        api_key = state.resolve_api_key()
-        with output.status("Transcribing…", json_mode=json_mode, quiet=state.quiet):
-            transcript = transcribe_exec.run_transcription(
-                api_key,
-                source,
-                sample=sample,
-                transcription_config=tc,
-                download_sections=list(download_sections or []),
-            )
-
-        transcribe_exec.deliver_result(
-            transcript,
-            api_key=api_key,
-            out=out,
-            output_field=output_field,
-            transform=transcribe_exec.TransformOptions(
-                prompts=list(llm_prompt or []), model=model, max_tokens=max_tokens
-            ),
-            json_mode=json_mode,
-            quiet=state.quiet,
-        )
-
-    run_command(ctx, body, json=json_out)
+    opts = transcribe_exec.TranscribeOptions(
+        source=source,
+        sample=sample,
+        from_stdin=from_stdin,
+        concurrency=concurrency,
+        force=force,
+        speech_model=speech_model,
+        language_code=language_code,
+        language_detection=language_detection,
+        keyterms_prompt=keyterms_prompt,
+        temperature=temperature,
+        prompt=prompt,
+        punctuate=punctuate,
+        format_text=format_text,
+        disfluencies=disfluencies,
+        speaker_labels=speaker_labels,
+        speakers_expected=speakers_expected,
+        multichannel=multichannel,
+        redact_pii=redact_pii,
+        redact_pii_policy=redact_pii_policy,
+        redact_pii_sub=redact_pii_sub,
+        redact_pii_audio=redact_pii_audio,
+        filter_profanity=filter_profanity,
+        content_safety=content_safety,
+        content_safety_confidence=content_safety_confidence,
+        speech_threshold=speech_threshold,
+        summarization=summarization,
+        summary_model=summary_model,
+        summary_type=summary_type,
+        auto_chapters=auto_chapters,
+        sentiment_analysis=sentiment_analysis,
+        entity_detection=entity_detection,
+        auto_highlights=auto_highlights,
+        topic_detection=topic_detection,
+        word_boost=word_boost,
+        custom_spelling_file=custom_spelling_file,
+        audio_start=audio_start,
+        audio_end=audio_end,
+        download_sections=download_sections,
+        webhook_url=webhook_url,
+        webhook_auth_header=webhook_auth_header,
+        translate_to=translate_to,
+        config_kv=config_kv,
+        config_file=config_file,
+        llm_prompt=llm_prompt,
+        model=model,
+        max_tokens=max_tokens,
+        output_field=output_field,
+        out=out,
+        show_code=show_code,
+    )
+    run_command(
+        ctx,
+        lambda state, json_mode: transcribe_exec.run_transcribe(opts, state, json_mode=json_mode),
+        json=json_out,
+    )
diff --git a/aai_cli/llm_exec.py b/aai_cli/llm_exec.py
new file mode 100644
index 00000000..d8c500e6
--- /dev/null
+++ b/aai_cli/llm_exec.py
@@ -0,0 +1,159 @@
+"""Run logic for `assembly llm`: the options/run split (see AGENTS.md).
+
+The command module (aai_cli/commands/llm.py) only parses argv — it builds an
+``LlmOptions`` and hands it to ``run_llm`` via ``context.run_command``, so tests can
+drive one-shot and --follow behavior by constructing options directly, with no
+CliRunner argv round-trip. (``aai_cli/llm.py`` is the gateway client itself and is
+rich-free by architecture contract, so the rendering-aware run path lives here.)
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+from rich.markup import escape
+
+from aai_cli import choices, client, output, stdio
+from aai_cli import llm as gateway
+from aai_cli.context import AppState
+from aai_cli.errors import UsageError
+from aai_cli.follow import FollowRenderer
+
+_FOLLOW_STDIN_MESSAGE = (
+    "--follow needs transcript text piped on stdin, e.g. "
+    '`assembly stream -o text | assembly llm -f "summarize action items as I talk"`.'
+)
+
+
+@dataclass(frozen=True)
+class LlmOptions:
+    """Every `assembly llm` prompt flag as plain data.
+
+    ``--list-models`` is excluded: it dispatches to its own auth-free body in the
+    command module. ``--json`` is excluded: run_command resolves it into the
+    ``json_mode`` argument.
+    """
+
+    prompt: str | None
+    model: str
+    transcript_id: str | None
+    system: str | None
+    follow: bool
+    output_field: choices.TextOrJson | None
+    max_tokens: int
+
+
+def _validate_follow_args(
+    prompt: str | None, output_field: str | None, transcript_id: str | None
+) -> str:
+    """Reject flag combinations that don't apply to --follow's live-panel mode.
+
+    Returns the validated (non-empty) prompt so the caller has a plain ``str``.
+    """
+    if not prompt:
+        raise UsageError("Provide a prompt to run over the streamed transcript.")
+    if output_field is not None:
+        raise UsageError(
+            "--output applies to one-shot mode; --follow renders a live panel "
+            "(or NDJSON when piped)."
+        )
+    if transcript_id:
+        raise UsageError(
+            "--follow runs over live transcript text piped on stdin; it can't be "
+            "combined with --transcript-id."
+        )
+    if not stdio.stdin_is_piped():
+        raise UsageError(_FOLLOW_STDIN_MESSAGE)
+    return prompt
+
+
+def _stdin_transcript_text(
+    state: AppState, transcript_id: str | None, *, json_mode: bool
+) -> str | None:
+    """Resolve the inline transcript text for one-shot mode.
+
+    Text piped on stdin becomes the content the prompt operates on, unless an
+    explicit --transcript-id is given — that injects server-side and takes
+    priority, so piped text is ignored with a visible warning (suppressed by
+    --quiet, structured under --json).
+    """
+    if transcript_id is None:
+        return stdio.piped_stdin_text()
+    # Same cheap local id check as `transcripts get`, before auth or network.
+    client.validate_transcript_id(transcript_id)
+    if stdio.stdin_is_piped() and not state.quiet:
+        output.emit_warning(
+            "Ignoring piped stdin; --transcript-id takes priority.", json_mode=json_mode
+        )
+    return None
+
+
+def _run_follow(opts: LlmOptions, state: AppState, *, json_mode: bool) -> None:
+    prompt_text = _validate_follow_args(opts.prompt, opts.output_field, opts.transcript_id)
+    api_key = state.resolve_api_key()
+
+    def ask(transcript_text: str) -> str:
+        messages = gateway.build_messages(
+            prompt_text, system=opts.system, transcript_text=transcript_text
+        )
+        response = gateway.complete(
+            api_key, model=opts.model, messages=messages, max_tokens=opts.max_tokens
+        )
+        return gateway.content_of(response)
+
+    transcript: list[str] = []
+    interrupted = False
+    with FollowRenderer(json_mode=json_mode) as render:
+        # Ctrl-C is the normal "stop watching" signal -> exit cleanly (code 0).
+        try:
+            for turn in stdio.iter_piped_stdin_lines():
+                transcript.append(turn)
+                render(ask("\n".join(transcript)), len(transcript))
+        except KeyboardInterrupt:
+            interrupted = True
+    if not transcript and not interrupted:
+        # An empty pipe (`assembly llm -f "…" </dev/null`) would otherwise exit 0
+        # silently, having asked nothing.
+        raise UsageError(_FOLLOW_STDIN_MESSAGE)
+
+
+def _run_oneshot(opts: LlmOptions, state: AppState, *, json_mode: bool) -> None:
+    if not opts.prompt:
+        raise UsageError(
+            "Provide a prompt.",
+            suggestion="Or pass --list-models to see available models.",
+        )
+    prompt_text = opts.prompt
+    stdin_text = _stdin_transcript_text(state, opts.transcript_id, json_mode=json_mode)
+    api_key = state.resolve_api_key()
+    messages = gateway.build_messages(
+        prompt_text,
+        system=opts.system,
+        transcript_id=opts.transcript_id,
+        transcript_text=stdin_text,
+    )
+    response = gateway.complete(
+        api_key,
+        model=opts.model,
+        messages=messages,
+        max_tokens=opts.max_tokens,
+        transcript_id=opts.transcript_id,
+    )
+    content = gateway.content_of(response)
+    if opts.output_field == "text":
+        # Just the answer, raw — so `… | assembly llm -o text "…" | next` composes cleanly.
+        output.emit_text(content)
+        return
+    output.emit(
+        {"model": opts.model, "output": content, "usage": gateway.usage_of(response)},
+        lambda d: escape(str(d["output"])),
+        json_mode=json_mode or opts.output_field == "json",
+    )
+
+
+def run_llm(opts: LlmOptions, state: AppState, *, json_mode: bool) -> None:
+    """Execute one `assembly llm` invocation (one-shot or --follow) from parsed flags."""
+    if opts.follow:
+        _run_follow(opts, state, json_mode=json_mode)
+    else:
+        _run_oneshot(opts, state, json_mode=json_mode)
diff --git a/aai_cli/speak_exec.py b/aai_cli/speak_exec.py
new file mode 100644
index 00000000..96f9ca3d
--- /dev/null
+++ b/aai_cli/speak_exec.py
@@ -0,0 +1,220 @@
+"""Run logic for `assembly speak`: the options/run split (see AGENTS.md).
+
+The command module (aai_cli/commands/speak.py) only parses argv — it builds a
+``SpeakOptions`` and hands it to ``run_speak`` via ``context.run_command``, so tests
+can drive text resolution, voice assignment, and synthesis wiring by constructing
+options directly, with no CliRunner argv round-trip.
+"""
+
+from __future__ import annotations
+
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+
+from aai_cli import output
+from aai_cli.context import AppState
+from aai_cli.errors import CLIError, UsageError
+from aai_cli.tts import audio, dialogue, session
+
+# The streaming-TTS reference client defaults to the PocketTTS "jane" voice and
+# English, so the CLI sends the same and a bare `assembly speak` works out of the box.
+# Override either with --voice/--language.
+DEFAULT_VOICE = "jane"
+DEFAULT_LANGUAGE = "English"
+
+
+@dataclass(frozen=True)
+class SpeakOptions:
+    """Every `assembly speak` flag as plain data (``--json`` excluded: run_command
+    resolves it into the ``json_mode`` argument)."""
+
+    text: str | None
+    voice: list[str]
+    language: str
+    sample_rate: int | None
+    out: Path | None
+
+
+def _read_text(text: str | None) -> str:
+    """The text to speak: the non-blank argument, or piped stdin when the argument
+    is omitted entirely. A *blank* argument (e.g. "") is a usage error, never a
+    silent fall-through to stdin — so `assembly speak "$MSG"` with an empty MSG fails
+    fast instead of consuming whatever happens to be on the pipe."""
+    if text is not None and text.strip():
+        return text
+    # `text is None` (argument omitted), not merely blank: see the docstring rationale.
+    if text is None and not sys.stdin.isatty():
+        piped = sys.stdin.read().strip()
+        if piped:
+            return piped
+    raise UsageError(
+        "No text to speak.",
+        suggestion='Pass text as an argument: assembly speak "Hello" — or pipe it via stdin.',
+    )
+
+
+def _output_audio(result: session.SpeakResult, out: Path | None) -> None:
+    """Write a WAV when --out is given, else play through the speakers."""
+    if out is not None:
+        audio.write_wav(out, result.pcm, result.sample_rate)
+    else:
+        audio.play_pcm(result.pcm, result.sample_rate)
+
+
+def _disposition(out: Path | None) -> str:
+    return f"saved to {out}" if out is not None else "played"
+
+
+def _emit_single(
+    result: session.SpeakResult,
+    cfg: session.SpeakConfig,
+    out: Path | None,
+    *,
+    json_mode: bool,
+) -> None:
+    """Single-voice result: a JSON object on stdout, or a human note on stderr."""
+    duration = round(result.audio_duration_seconds, 3)
+    if json_mode:
+        output.emit_ndjson(
+            {
+                "voice": cfg.voice,
+                "language": cfg.language,
+                "sample_rate": result.sample_rate,
+                "audio_duration_seconds": duration,
+                "bytes": len(result.pcm),
+                "out": str(out) if out is not None else None,
+            }
+        )
+        return
+    output.error_console.print(
+        f"[aai.muted]Spoke {duration}s of audio ({_disposition(out)}).[/aai.muted]"
+    )
+
+
+def _emit_multi(
+    result: session.SpeakResult,
+    speakers: dict[str, str],
+    segment_count: int,
+    out: Path | None,
+    *,
+    json_mode: bool,
+) -> None:
+    """Multi-voice result: a JSON object on stdout, or a human note on stderr."""
+    duration = round(result.audio_duration_seconds, 3)
+    if json_mode:
+        output.emit_ndjson(
+            {
+                "mode": "multi",
+                "speakers": speakers,
+                "segments": segment_count,
+                "sample_rate": result.sample_rate,
+                "audio_duration_seconds": duration,
+                "bytes": len(result.pcm),
+                "out": str(out) if out is not None else None,
+            }
+        )
+        return
+    voices = ", ".join(f"{spk}={voice}" for spk, voice in speakers.items())
+    output.error_console.print(
+        f"[aai.muted]Spoke {duration}s across {len(speakers)} voices "
+        f"({voices}) ({_disposition(out)}).[/aai.muted]"
+    )
+
+
+def _speak_single(
+    api_key: str,
+    text: str,
+    voice: str,
+    opts: SpeakOptions,
+    *,
+    json_mode: bool,
+    quiet: bool,
+) -> None:
+    cfg = session.SpeakConfig(
+        text=text, voice=voice, language=opts.language, sample_rate=opts.sample_rate
+    )
+    with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet):
+        result = session.synthesize(
+            api_key, cfg, on_warning=lambda m: output.emit_warning(m, json_mode=json_mode)
+        )
+    _output_audio(result, opts.out)
+    _emit_single(result, cfg, opts.out, json_mode=json_mode)
+
+
+def _speak_dialogue(
+    api_key: str,
+    text: str,
+    bare_voice: str | None,
+    overrides: dict[str, str],
+    opts: SpeakOptions,
+    *,
+    json_mode: bool,
+    quiet: bool,
+) -> None:
+    segments = dialogue.parse_segments(text)
+    if not segments:
+        raise UsageError(
+            "No text to speak.",
+            suggestion="The input had speaker labels but no spoken text.",
+        )
+    if bare_voice is not None:
+        output.emit_warning(
+            "Ignoring bare --voice in multi-speaker mode; "
+            "set a voice per speaker with --voice A=NAME.",
+            json_mode=json_mode,
+        )
+    resolved, speakers = dialogue.assign_voices(
+        segments, dialogue.DEFAULT_VOICE_ROTATION, overrides
+    )
+    with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet):
+        result = session.synthesize_dialogue(
+            api_key,
+            resolved,
+            language=opts.language,
+            sample_rate=opts.sample_rate,
+            on_warning=lambda m: output.emit_warning(m, json_mode=json_mode),
+        )
+    _output_audio(result, opts.out)
+    _emit_multi(result, speakers, len(resolved), opts.out, json_mode=json_mode)
+
+
+def run_speak(opts: SpeakOptions, state: AppState, *, json_mode: bool) -> None:
+    """Execute one `assembly speak` invocation from already-parsed flags."""
+    if not session.is_available():
+        raise CLIError(
+            "assembly speak is only available in the sandbox.",
+            error_type="unsupported_environment",
+            exit_code=2,
+            suggestion="Re-run as: assembly --sandbox speak … "
+            "(--sandbox goes before the command; or use --env sandbox000).",
+        )
+    spoken = _read_text(opts.text)
+    api_key = state.resolve_api_key()
+    bare_voice, overrides = dialogue.parse_voice_overrides(opts.voice)
+    if dialogue.looks_like_speaker_labeled(spoken):
+        _speak_dialogue(
+            api_key,
+            spoken,
+            bare_voice,
+            overrides,
+            opts,
+            json_mode=json_mode,
+            quiet=state.quiet,
+        )
+    else:
+        if overrides:
+            # Mirror the inverse warning in _speak_dialogue: never drop a
+            # requested voice mapping silently.
+            output.emit_warning(
+                "Ignoring --voice SPEAKER=VOICE mappings; input has no speaker labels.",
+                json_mode=json_mode,
+            )
+        _speak_single(
+            api_key,
+            spoken,
+            bare_voice or DEFAULT_VOICE,
+            opts,
+            json_mode=json_mode,
+            quiet=state.quiet,
+        )
diff --git a/aai_cli/stream_exec.py b/aai_cli/stream_exec.py
new file mode 100644
index 00000000..f2bdf72e
--- /dev/null
+++ b/aai_cli/stream_exec.py
@@ -0,0 +1,236 @@
+"""Run logic for `assembly stream`: a gh-style options/run split.
+
+The command module (aai_cli/commands/stream.py) only parses argv — it builds a
+``StreamOptions`` and hands it to ``run_stream`` via ``context.run_command``. Keeping
+the run path a module-level function of plain data (instead of a closure over the
+Typer locals) lets tests drive validation, --show-code, and session wiring by
+constructing a ``StreamOptions`` directly, with no CliRunner argv round-trip.
+"""
+
+from __future__ import annotations
+
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+
+from assemblyai.streaming.v3 import Encoding, NoiseSuppressionModel, SpeechModel
+
+from aai_cli import choices, client, code_gen, config_builder, output, youtube
+from aai_cli.context import AppState
+from aai_cli.errors import UsageError
+from aai_cli.follow import FollowRenderer
+from aai_cli.microphone import MicrophoneSource
+from aai_cli.streaming.macos import MacSystemAudioSource
+from aai_cli.streaming.render import StreamRenderer
+from aai_cli.streaming.session import (
+    SourceOptions,
+    StreamSession,
+    validate_output_flags,
+    validate_sources,
+)
+from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource
+
+
+@dataclass(frozen=True)
+class StreamOptions:
+    """Every `assembly stream` flag as plain data.
+
+    One field per CLI flag (``--json`` excluded: run_command resolves it into the
+    ``json_mode`` argument), so a test can describe an invocation without argv.
+    """
+
+    source: str | None
+    sample: bool
+    sample_rate: int | None
+    device: int | None
+    system_audio: bool
+    system_audio_only: bool
+    speech_model: SpeechModel
+    encoding: Encoding | None
+    language_detection: bool | None
+    domain: str | None
+    prompt: str | None
+    keyterms_prompt: list[str] | None
+    end_of_turn_confidence_threshold: float | None
+    min_turn_silence: int | None
+    max_turn_silence: int | None
+    vad_threshold: float | None
+    format_turns: bool | None
+    include_partial_turns: bool | None
+    speaker_labels: bool | None
+    max_speakers: int | None
+    voice_focus: NoiseSuppressionModel | None
+    voice_focus_threshold: float | None
+    inactivity_timeout: int | None
+    filter_profanity: bool | None
+    redact_pii: bool | None
+    redact_pii_policy: str | None
+    redact_pii_sub: str | None
+    webhook_url: str | None
+    webhook_auth_header: str | None
+    llm_prompt: list[str] | None
+    llm_interval: float
+    model: str
+    max_tokens: int
+    config_kv: list[str] | None
+    config_file: Path | None
+    output_field: choices.TextOrJson | None
+    show_code: bool
+
+    def source_options(self) -> SourceOptions:
+        """The audio-input subset, in the shape the validation/dispatch helpers read."""
+        return SourceOptions(
+            source=self.source,
+            sample=self.sample,
+            sample_rate=self.sample_rate,
+            device=self.device,
+            system_audio=self.system_audio,
+            system_audio_only=self.system_audio_only,
+        )
+
+    def base_flags(self) -> dict[str, object]:
+        """Every streaming flag except sample_rate, which is set per source at stream time."""
+        flags: dict[str, object] = {
+            "speech_model": config_builder.enum_value(self.speech_model),
+            "format_turns": self.format_turns if self.format_turns is not None else True,
+            "encoding": config_builder.enum_value(self.encoding),
+            "language_detection": self.language_detection,
+            "domain": self.domain,
+            "end_of_turn_confidence_threshold": self.end_of_turn_confidence_threshold,
+            "min_turn_silence": self.min_turn_silence,
+            "max_turn_silence": self.max_turn_silence,
+            "vad_threshold": self.vad_threshold,
+            "include_partial_turns": self.include_partial_turns,
+            "keyterms_prompt": list(self.keyterms_prompt) if self.keyterms_prompt else None,
+            "filter_profanity": self.filter_profanity,
+            "speaker_labels": self.speaker_labels,
+            "max_speakers": self.max_speakers,
+            "voice_focus": config_builder.enum_value(self.voice_focus),
+            "voice_focus_threshold": self.voice_focus_threshold,
+            "redact_pii": self.redact_pii,
+            "redact_pii_policies": config_builder.split_csv(self.redact_pii_policy),
+            "redact_pii_sub": self.redact_pii_sub,
+            "inactivity_timeout": self.inactivity_timeout,
+            "webhook_url": self.webhook_url,
+            "prompt": self.prompt,
+        }
+        flags.update(config_builder.auth_header_flags(self.webhook_auth_header))
+        return flags
+
+
+def _print_show_code(
+    opts: StreamOptions,
+    sources: SourceOptions,
+    base_flags: dict[str, object],
+    *,
+    text_mode: bool,
+) -> None:
+    """Print the equivalent SDK script without opening audio or authenticating.
+
+    Emits a script faithful to the requested source — mic (default), stdin (-), or a
+    file/URL — on raw stdout, so `--show-code > script.py` is runnable. Applies the
+    same source validation as a real run, so e.g. a file + --sample-rate conflict
+    errors here too instead of silently generating mic code.
+    """
+    validate_sources(sources, has_llm=bool(opts.llm_prompt), text_mode=text_mode)
+    if sources.from_system_audio:
+        raise UsageError("--show-code does not support macOS system audio capture yet.")
+    if sources.source and youtube.is_downloadable_url(sources.source):
+        raise UsageError(
+            "--show-code does not support downloaded sources (YouTube, podcast pages) yet.",
+            suggestion="Download the audio first (e.g. yt-dlp) and pass the local file.",
+        )
+    code_source: str | None = None
+    if sources.from_stdin:
+        code_source = "-"
+    elif sources.from_file:
+        # check_local=False: generating code for a file you don't have yet is fine.
+        code_source = client.resolve_audio_source(
+            sources.source, sample=sources.sample, check_local=False
+        )
+    merged = config_builder.merge_streaming_params(
+        # sample_rate precedence: --sample-rate (None is dropped by the merge)
+        # beats --config/--config-file, which beat the 16 kHz default below —
+        # so an explicit `--config sample_rate=…` is honored, not overridden.
+        flags=base_flags | {"sample_rate": sources.sample_rate},
+        overrides=opts.config_kv,
+        config_file=opts.config_file,
+    )
+    merged.setdefault("sample_rate", TARGET_RATE)
+    gateway = code_gen.gateway_options(
+        list(opts.llm_prompt or []), opts.model, opts.max_tokens, interval=opts.llm_interval
+    )
+    output.print_code(code_gen.stream(merged, llm=gateway, source=code_source))
+
+
+def _dispatch(session: StreamSession, opts: SourceOptions) -> None:
+    """Open the right audio source(s) for the flags and stream them."""
+    if opts.from_system_audio:
+        system = MacSystemAudioSource(on_open=session.on_open)
+        if opts.system_audio_only:
+            session.run(system, system.sample_rate, source_label="system")
+        else:
+            mic = MicrophoneSource(
+                target_rate=TARGET_RATE,
+                device=opts.device,
+                capture_rate=opts.sample_rate,
+                on_open=session.on_open,
+            )
+            session.run_parallel(
+                [("system", system, system.sample_rate), ("you", mic, mic.sample_rate)]
+            )
+    elif opts.from_stdin:
+        # Raw PCM16 mono piped on stdin (e.g. `ffmpeg … -f s16le - | assembly stream -`).
+        stdin_src = StdinSource(sample_rate=opts.sample_rate or TARGET_RATE)
+        session.run(stdin_src, stdin_src.sample_rate)
+    elif opts.source and youtube.is_downloadable_url(opts.source):
+        # Fetch the audio first, then stream the local file in real time.
+        with tempfile.TemporaryDirectory(prefix="aai-yt-") as td:
+            local = youtube.download_audio(opts.source, Path(td))
+            session.run(FileSource(str(local)), TARGET_RATE)
+    elif opts.from_file:
+        file_audio = FileSource(client.resolve_audio_source(opts.source, sample=opts.sample))
+        session.run(file_audio, file_audio.sample_rate)
+    else:
+        # Capture at the device's native rate (or --sample-rate override) and tell the
+        # streaming API that rate, rather than forcing one the device may reject.
+        # "Listening…" is announced once the device is open (see StreamSession.on_open),
+        # not when the session opens — so early speech isn't lost in the gap.
+        mic = MicrophoneSource(
+            device=opts.device, capture_rate=opts.sample_rate, on_open=session.on_open
+        )
+        session.run(mic, mic.sample_rate)
+
+
+def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None:
+    """Execute one `assembly stream` invocation from already-parsed flags."""
+    validate_output_flags(json_mode=json_mode, output_field=opts.output_field)
+    text_mode, json_mode = output.stream_output_modes(opts.output_field, json_mode=json_mode)
+    sources = opts.source_options()
+    base_flags = opts.base_flags()
+
+    if opts.show_code:
+        _print_show_code(opts, sources, base_flags, text_mode=text_mode)
+        return
+
+    # Validate the requested sources (including that a local file exists) before
+    # credentials, so a typo'd path reads as "file not found" — not as a login.
+    validate_sources(sources, has_llm=bool(opts.llm_prompt), text_mode=text_mode)
+    if sources.from_file and not sources.from_stdin:
+        client.resolve_audio_source(sources.source, sample=sources.sample)
+    api_key = state.resolve_api_key()
+
+    llm_prompts = list(opts.llm_prompt or [])
+    session = StreamSession(
+        api_key=api_key,
+        base_flags=base_flags,
+        overrides=opts.config_kv,
+        config_file=opts.config_file,
+        renderer=StreamRenderer(json_mode=json_mode, text_mode=text_mode),
+        follow=FollowRenderer(json_mode=json_mode) if llm_prompts else None,
+        llm_prompts=llm_prompts,
+        model=opts.model,
+        max_tokens=opts.max_tokens,
+        llm_interval=opts.llm_interval,
+    )
+    _dispatch(session, sources)
diff --git a/aai_cli/transcribe_exec.py b/aai_cli/transcribe_exec.py
index d0693ef6..5c602e8a 100644
--- a/aai_cli/transcribe_exec.py
+++ b/aai_cli/transcribe_exec.py
@@ -10,13 +10,26 @@
 import json
 import os
 import tempfile
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, NamedTuple
 
 import assemblyai as aai
 from rich.markup import escape
 
-from aai_cli import choices, client, llm, output, stdio, transcribe_render, youtube
+from aai_cli import (
+    choices,
+    client,
+    code_gen,
+    config_builder,
+    llm,
+    output,
+    stdio,
+    transcribe_render,
+    youtube,
+)
+from aai_cli.code_gen.transcribe import render as render_transcribe_code
+from aai_cli.context import AppState
 from aai_cli.errors import UsageError, mutually_exclusive
 
 # The PII policy strings the SDK accepts, validated client-side so a typo'd
@@ -225,3 +238,207 @@ def deliver_result(
         output.emit(client.transcript_json_payload(transcript), lambda d: d, json_mode=True)
     else:
         transcribe_render.render_transcript_result(transcript, output.console)
+
+
+@dataclass(frozen=True)
+class TranscribeOptions:
+    """Every `assembly transcribe` flag as plain data (options/run split, see AGENTS.md).
+
+    One field per CLI flag (``--json`` excluded: run_command resolves it into the
+    ``json_mode`` argument), so a test can describe an invocation without argv.
+    """
+
+    source: str | None
+    sample: bool
+    from_stdin: bool
+    concurrency: int
+    force: bool
+    speech_model: aai.SpeechModel | None
+    language_code: str | None
+    language_detection: bool | None
+    keyterms_prompt: list[str] | None
+    temperature: float | None
+    prompt: str | None
+    punctuate: bool | None
+    format_text: bool | None
+    disfluencies: bool | None
+    speaker_labels: bool
+    speakers_expected: int | None
+    multichannel: bool | None
+    redact_pii: bool | None
+    redact_pii_policy: str | None
+    redact_pii_sub: aai.PIISubstitutionPolicy | None
+    redact_pii_audio: bool | None
+    filter_profanity: bool | None
+    content_safety: bool | None
+    content_safety_confidence: int | None
+    speech_threshold: float | None
+    summarization: bool | None
+    summary_model: aai.SummarizationModel | None
+    summary_type: aai.SummarizationType | None
+    auto_chapters: bool | None
+    sentiment_analysis: bool | None
+    entity_detection: bool | None
+    auto_highlights: bool | None
+    topic_detection: bool | None
+    word_boost: list[str] | None
+    custom_spelling_file: Path | None
+    audio_start: int | None
+    audio_end: int | None
+    download_sections: list[str] | None
+    webhook_url: str | None
+    webhook_auth_header: str | None
+    translate_to: list[str] | None
+    config_kv: list[str] | None
+    config_file: Path | None
+    llm_prompt: list[str] | None
+    model: str
+    max_tokens: int
+    output_field: choices.TranscriptOutput | None
+    out: Path | None
+    show_code: bool
+
+    def flags(self, pii_policies: list[str] | None) -> dict[str, object]:
+        """The curated flags in TranscriptionConfig field names (None = unset)."""
+        flags: dict[str, object] = {
+            "speech_model": config_builder.enum_value(self.speech_model),
+            "language_code": self.language_code,
+            "language_detection": self.language_detection,
+            "keyterms_prompt": list(self.keyterms_prompt) if self.keyterms_prompt else None,
+            "temperature": self.temperature,
+            "prompt": self.prompt,
+            "punctuate": self.punctuate,
+            "format_text": self.format_text,
+            "disfluencies": self.disfluencies,
+            "speaker_labels": self.speaker_labels or None,
+            "speakers_expected": self.speakers_expected,
+            "multichannel": self.multichannel,
+            "redact_pii": self.redact_pii,
+            "redact_pii_policies": pii_policies,
+            "redact_pii_sub": config_builder.enum_value(self.redact_pii_sub),
+            "redact_pii_audio": self.redact_pii_audio,
+            "filter_profanity": self.filter_profanity,
+            "content_safety": self.content_safety,
+            "content_safety_confidence": self.content_safety_confidence,
+            "speech_threshold": self.speech_threshold,
+            "summarization": self.summarization,
+            "summary_model": config_builder.enum_value(self.summary_model),
+            "summary_type": config_builder.enum_value(self.summary_type),
+            "auto_chapters": self.auto_chapters,
+            "sentiment_analysis": self.sentiment_analysis,
+            "entity_detection": self.entity_detection,
+            "auto_highlights": self.auto_highlights,
+            "iab_categories": self.topic_detection,
+            "word_boost": list(self.word_boost) if self.word_boost else None,
+            "custom_spelling": (
+                config_builder.load_custom_spelling(self.custom_spelling_file)
+                if self.custom_spelling_file
+                else None
+            ),
+            "audio_start_from": self.audio_start,
+            "audio_end_at": self.audio_end,
+            "webhook_url": self.webhook_url,
+            "speech_understanding": (
+                config_builder.translation_request(list(self.translate_to))
+                if self.translate_to
+                else None
+            ),
+        }
+        flags.update(config_builder.auth_header_flags(self.webhook_auth_header))
+        return flags
+
+
+def _print_show_code(opts: TranscribeOptions, merged: dict[str, object]) -> None:
+    """Print the equivalent SDK script and exit without transcribing or authenticating.
+
+    Raw stdout, so `--show-code > script.py` runs. No source/--sample needed — fall
+    back to a placeholder path for a pure snippet.
+    """
+    audio = (
+        client.resolve_audio_source(opts.source, sample=opts.sample, check_local=False)
+        if opts.source or opts.sample
+        else "your-audio-file.mp3"
+    )
+    gateway = code_gen.gateway_options(list(opts.llm_prompt or []), opts.model, opts.max_tokens)
+    output.print_code(
+        render_transcribe_code(
+            merged,
+            audio,
+            llm_gateway=gateway,
+            output=opts.output_field,
+            download_sections=list(opts.download_sections or []),
+        )
+    )
+
+
+def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) -> None:
+    """Execute one `assembly transcribe` invocation from already-parsed flags."""
+    # Module-load order: transcribe_batch imports this module, so import it lazily.
+    from aai_cli import transcribe_batch
+
+    validate_language_flags(opts.language_code, language_detection=opts.language_detection)
+    pii_policies = config_builder.split_csv(opts.redact_pii_policy)
+    validate_pii_policies(pii_policies)
+    flags = opts.flags(pii_policies)
+
+    validate_out_with_llm(opts.out, opts.llm_prompt)
+    validate_out_path(opts.out)
+    validate_json_with_output(opts.output_field, json_mode=json_mode)
+
+    merged = config_builder.merge_transcribe_config(
+        flags=flags, overrides=opts.config_kv, config_file=opts.config_file
+    )
+    validate_speakers_expected(merged)
+
+    sources = transcribe_batch.expand_sources(
+        opts.source, from_stdin=opts.from_stdin, sample=opts.sample
+    )
+    if sources is not None:
+        transcribe_batch.reject_single_source_flags(
+            out=opts.out,
+            output_field=opts.output_field,
+            llm_prompt=opts.llm_prompt,
+            show_code=opts.show_code,
+        )
+        transcribe_batch.run_batch(
+            state.resolve_api_key(),
+            sources,
+            transcription_config=config_builder.construct_transcription_config(merged),
+            concurrency=opts.concurrency,
+            force=opts.force,
+            json_mode=json_mode,
+            quiet=state.quiet,
+        )
+        return
+
+    if opts.show_code:
+        _print_show_code(opts, merged)
+        return
+
+    tc = config_builder.construct_transcription_config(merged)
+
+    # A typo'd path must read as "file not found", not trigger a login.
+    check_source_exists(opts.source, sample=opts.sample)
+    warn_unrecognized_extension(opts.source, json_mode=json_mode, quiet=state.quiet)
+
+    api_key = state.resolve_api_key()
+    with output.status("Transcribing…", json_mode=json_mode, quiet=state.quiet):
+        transcript = run_transcription(
+            api_key,
+            opts.source,
+            sample=opts.sample,
+            transcription_config=tc,
+            download_sections=list(opts.download_sections or []),
+        )
+
+    deliver_result(
+        transcript,
+        api_key=api_key,
+        out=opts.out,
+        output_field=opts.output_field,
+        transform=TransformOptions(
+            prompts=list(opts.llm_prompt or []), model=opts.model, max_tokens=opts.max_tokens
+        ),
+        json_mode=json_mode,
+        quiet=state.quiet,
+    )
diff --git a/tests/test_agent_command.py b/tests/test_agent_command.py
index ae8482fc..c2d23b7b 100644
--- a/tests/test_agent_command.py
+++ b/tests/test_agent_command.py
@@ -33,7 +33,7 @@ def test_list_voices_prints_and_exits_without_connecting(monkeypatch):
     def fake_run_session(api_key, *, renderer, player, mic, config):
         called["ran"] = True
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     result = runner.invoke(app, ["agent", "--list-voices"])
     assert result.exit_code == 0
     assert "ivy" in result.output
@@ -44,7 +44,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config):
 
 def test_list_voices_json_emits_machine_readable_array(monkeypatch):
     monkeypatch.setattr(
-        "aai_cli.commands.agent.run_session",
+        "aai_cli.agent_exec.run_session",
         lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not connect")),
     )
     result = runner.invoke(app, ["agent", "--list-voices", "--json"])
@@ -59,12 +59,12 @@ def test_list_voices_json_emits_machine_readable_array(monkeypatch):
 def test_agent_unauthenticated_runs_login(monkeypatch):
     monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
-    monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: f"filesrc:{src}")
+    monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: f"filesrc:{src}")
 
     def fake_run_session(api_key, *, renderer, player, mic, config):
         raise AssertionError(f"agent session should not run after auto-login: {api_key}")
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     result = runner.invoke(app, ["agent", "--sample", "--json"])
     assert result.exit_code == 4
     assert config.get_api_key("default") == "sk_from_oauth"
@@ -79,7 +79,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config):
         renderer.user_final("hello agent")
         renderer.agent_transcript("hello human", interrupted=False)
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     result = runner.invoke(app, ["agent", "--json"])
     assert result.exit_code == 0
     lines = [json.loads(x) for x in result.output.splitlines() if x.strip()]
@@ -96,7 +96,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config):
         seen["prompt"] = config.system_prompt
         seen["full_duplex"] = config.full_duplex
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     prompt_file = tmp_path / "p.txt"
     prompt_file.write_text("be a pirate")
     result = runner.invoke(
@@ -120,7 +120,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config):
 def test_agent_headphones_notice_in_human_mode(monkeypatch):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None)
     result = runner.invoke(app, ["agent"])
     assert result.exit_code == 0
     assert "headphones" in result.output.lower()  # mic stays open -> warn to use headphones
@@ -132,21 +132,21 @@ def test_agent_ctrl_c_exits_cleanly(monkeypatch):
     def raise_kbd(*a, **k):
         raise KeyboardInterrupt
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", raise_kbd)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", raise_kbd)
     result = runner.invoke(app, ["agent"])
     assert result.exit_code == 0
 
 
 def test_agent_unknown_voice_exits_2(monkeypatch):
     config.set_api_key("default", "sk_live")
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None)
     result = runner.invoke(app, ["agent", "--voice", "not-a-voice"])
     assert result.exit_code == 2
 
 
 def test_agent_prompt_file_not_found_exits_2(monkeypatch):
     config.set_api_key("default", "sk_live")
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None)
     result = runner.invoke(
         app, ["agent", "--system-prompt-file", "/tmp/no_such_file_xyz_voiceagent.txt"]
     )
@@ -160,7 +160,7 @@ def _capture_run_session(monkeypatch):
     def fake_run_session(api_key, *, renderer, player, mic, config):
         seen.update(renderer=renderer, player=player, mic=mic, config=config)
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     return seen
 
 
@@ -169,7 +169,7 @@ def test_agent_file_source_streams_clip_and_exits_after_reply(monkeypatch, tmp_p
     wav = tmp_path / "say.wav"
     wav.write_bytes(b"RIFF")  # FileSource is faked below; contents don't matter
 
-    monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: f"filesrc:{src}")
+    monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: f"filesrc:{src}")
     seen = _capture_run_session(monkeypatch)
 
     result = runner.invoke(app, ["agent", str(wav)])
@@ -192,7 +192,7 @@ def fake_file_source(src):
         captured["src"] = src
         return "filesrc"
 
-    monkeypatch.setattr("aai_cli.commands.agent.FileSource", fake_file_source)
+    monkeypatch.setattr("aai_cli.agent_exec.FileSource", fake_file_source)
     seen = _capture_run_session(monkeypatch)
 
     result = runner.invoke(app, ["agent", "--sample"])
@@ -203,7 +203,7 @@ def fake_file_source(src):
 
 def test_agent_file_source_with_device_exits_2(monkeypatch, tmp_path):
     config.set_api_key("default", "sk_live")
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None)
     wav = tmp_path / "say.wav"
     wav.write_bytes(b"RIFF")
     result = runner.invoke(app, ["agent", str(wav), "--device", "1"])
@@ -213,8 +213,8 @@ def test_agent_file_source_with_device_exits_2(monkeypatch, tmp_path):
 def test_agent_file_source_no_headphones_notice(monkeypatch, tmp_path):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
-    monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: "filesrc")
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: "filesrc")
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None)
     wav = tmp_path / "say.wav"
     wav.write_bytes(b"RIFF")
     result = runner.invoke(app, ["agent", str(wav)])
@@ -225,12 +225,12 @@ def test_agent_file_source_no_headphones_notice(monkeypatch, tmp_path):
 def test_agent_file_source_no_start_talking_notice(monkeypatch, tmp_path):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
-    monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: "filesrc")
+    monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: "filesrc")
 
     def fake_run_session(api_key, *, renderer, player, mic, config):
         renderer.connected()  # session.ready arrives even for a file-driven run
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     wav = tmp_path / "say.wav"
     wav.write_bytes(b"RIFF")
     result = runner.invoke(app, ["agent", str(wav)])
@@ -255,12 +255,12 @@ def start(self):
         def close(self):
             pass
 
-    monkeypatch.setattr("aai_cli.commands.agent.DuplexAudio", FakeDuplex)
+    monkeypatch.setattr("aai_cli.agent_exec.DuplexAudio", FakeDuplex)
 
     def fake_run_session(api_key, *, renderer, player, mic, config):
         renderer.connected()
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     result = runner.invoke(app, ["agent"])
     assert result.exit_code == 0
     assert "start talking" in result.output.lower()  # live mic -> prompt the user to speak
@@ -269,7 +269,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config):
 def test_agent_show_code_prints_without_session(monkeypatch):
     # Print-only: emits the agent script, never starts a session or opens audio, no auth.
     called = []
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: called.append(True))
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: called.append(True))
     result = runner.invoke(app, ["agent", "--voice", "ivy", "--show-code"])
     assert result.exit_code == 0
     assert called == []  # never ran a session
@@ -284,7 +284,7 @@ def test_agent_show_code_file_source_warns_on_stderr(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not run a session")
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", _boom)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", _boom)
     result = _invoke_split(["agent", "clip.wav", "--show-code"])
     assert result.exit_code == 0
     assert "uses the microphone" in result.stderr
@@ -319,7 +319,7 @@ def test_agent_headphones_notice_routes_to_stderr(monkeypatch):
     # default human mode the notice goes to stderr, stdout stays transcript-only.
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None)
     result = _invoke_split(["agent"])
     assert result.exit_code == 0
     assert "headphones" in result.stderr.lower()
@@ -331,7 +331,7 @@ def _boom(*a, **k):
         raise AssertionError("must not run a session")
 
     monkeypatch.setattr(
-        "aai_cli.commands.agent.run_session",
+        "aai_cli.agent_exec.run_session",
         _boom,
     )
     result = runner.invoke(app, ["agent", "--voice", "ivy", "--show-code", "--json"])
@@ -342,13 +342,13 @@ def _boom(*a, **k):
 def test_agent_output_text_emits_plain_transcript(monkeypatch):
     # `-o text` -> plain you:/agent: lines on stdout (pipe into assembly llm).
     config.set_api_key("default", "sk_live")
-    monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: "filesrc")
+    monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: "filesrc")
 
     def fake_run_session(api_key, *, renderer, player, mic, config):
         renderer.user_final("hello there")
         renderer.agent_transcript("hi, how can I help?", interrupted=False)
 
-    monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session)
+    monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session)
     result = runner.invoke(app, ["agent", "--sample", "-o", "text"])
     assert result.exit_code == 0
     assert "you: hello there" in result.output
@@ -370,11 +370,11 @@ def test_resolve_system_prompt_unreadable_file_raises_clierror(tmp_path):
 
     import pytest
 
-    from aai_cli.commands import agent
+    from aai_cli import agent_exec
     from aai_cli.errors import CLIError
 
     missing = Path(tmp_path) / "does-not-exist.txt"
     with pytest.raises(CLIError) as exc:
-        agent._resolve_system_prompt("fallback prompt", missing)
+        agent_exec._resolve_system_prompt("fallback prompt", missing)
     assert exc.value.exit_code == 2
     assert "system-prompt-file" in exc.value.message
diff --git a/tests/test_command_options_seam.py b/tests/test_command_options_seam.py
new file mode 100644
index 00000000..1c81af21
--- /dev/null
+++ b/tests/test_command_options_seam.py
@@ -0,0 +1,210 @@
+"""Direct tests of the options/run seams (transcribe/agent/speak/llm exec modules).
+
+Each command module parses argv into a frozen <Cmd>Options dataclass; everything
+after that is a module-level run function of plain data. These tests construct
+options directly (dataclasses.replace off a defaults instance) instead of
+round-tripping argv through CliRunner. The stream seam's tests live in
+test_stream_exec.py.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+
+import pytest
+import typer
+
+from aai_cli import agent_exec, choices, config, llm, llm_exec, speak_exec, transcribe_exec
+from aai_cli.agent.session import DEFAULT_GREETING, DEFAULT_PROMPT
+from aai_cli.agent.voices import DEFAULT_VOICE
+from aai_cli.context import AppState
+from aai_cli.errors import CLIError, UsageError
+from aai_cli.options import DEFAULT_BATCH_CONCURRENCY
+
+# The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace.
+TRANSCRIBE_DEFAULTS = transcribe_exec.TranscribeOptions(
+    source=None,
+    sample=False,
+    from_stdin=False,
+    concurrency=DEFAULT_BATCH_CONCURRENCY,
+    force=False,
+    speech_model=None,
+    language_code=None,
+    language_detection=None,
+    keyterms_prompt=None,
+    temperature=None,
+    prompt=None,
+    punctuate=None,
+    format_text=None,
+    disfluencies=None,
+    speaker_labels=False,
+    speakers_expected=None,
+    multichannel=None,
+    redact_pii=None,
+    redact_pii_policy=None,
+    redact_pii_sub=None,
+    redact_pii_audio=None,
+    filter_profanity=None,
+    content_safety=None,
+    content_safety_confidence=None,
+    speech_threshold=None,
+    summarization=None,
+    summary_model=None,
+    summary_type=None,
+    auto_chapters=None,
+    sentiment_analysis=None,
+    entity_detection=None,
+    auto_highlights=None,
+    topic_detection=None,
+    word_boost=None,
+    custom_spelling_file=None,
+    audio_start=None,
+    audio_end=None,
+    download_sections=None,
+    webhook_url=None,
+    webhook_auth_header=None,
+    translate_to=None,
+    config_kv=None,
+    config_file=None,
+    llm_prompt=None,
+    model=llm.DEFAULT_MODEL,
+    max_tokens=llm.DEFAULT_MAX_TOKENS,
+    output_field=None,
+    out=None,
+    show_code=False,
+)
+
+AGENT_DEFAULTS = agent_exec.AgentOptions(
+    source=None,
+    sample=False,
+    voice=DEFAULT_VOICE,
+    system_prompt=DEFAULT_PROMPT,
+    system_prompt_file=None,
+    greeting=DEFAULT_GREETING,
+    device=None,
+    output_field=None,
+    show_code=False,
+)
+
+SPEAK_DEFAULTS = speak_exec.SpeakOptions(
+    text=None,
+    voice=[],
+    language=speak_exec.DEFAULT_LANGUAGE,
+    sample_rate=None,
+    out=None,
+)
+
+LLM_DEFAULTS = llm_exec.LlmOptions(
+    prompt=None,
+    model=llm.DEFAULT_MODEL,
+    transcript_id=None,
+    system=None,
+    follow=False,
+    output_field=None,
+    max_tokens=llm.DEFAULT_MAX_TOKENS,
+)
+
+
+@pytest.mark.parametrize(
+    "defaults",
+    [TRANSCRIBE_DEFAULTS, AGENT_DEFAULTS, SPEAK_DEFAULTS, LLM_DEFAULTS],
+    ids=["transcribe", "agent", "speak", "llm"],
+)
+def test_options_are_immutable(defaults):
+    field_name = dataclasses.fields(defaults)[0].name
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        setattr(defaults, field_name, None)
+
+
+def test_run_transcribe_validates_flags_before_credentials():
+    # No API key configured: a flag conflict surfaces as a usage error, not
+    # NotAuthenticated — validation runs before any credential resolution.
+    with pytest.raises(UsageError):
+        transcribe_exec.run_transcribe(
+            dataclasses.replace(
+                TRANSCRIBE_DEFAULTS, language_code="en_us", language_detection=True
+            ),
+            AppState(),
+            json_mode=False,
+        )
+
+
+def test_transcribe_flags_drop_unset_speaker_labels():
+    # The boolean --speaker-labels flag maps to None when unset (so the request
+    # omits the field entirely), and True only when explicitly enabled.
+    assert TRANSCRIBE_DEFAULTS.flags(None)["speaker_labels"] is None
+    enabled = dataclasses.replace(TRANSCRIBE_DEFAULTS, speaker_labels=True)
+    assert enabled.flags(None)["speaker_labels"] is True
+
+
+def test_run_agent_session_config_without_cli(monkeypatch):
+    config.set_api_key("default", "sk_live")
+    seen = {}
+
+    def fake_run_session(api_key, *, renderer, player, mic, config):
+        seen["api_key"] = api_key
+        seen["config"] = config
+
+    monkeypatch.setattr(agent_exec, "run_session", fake_run_session)
+    monkeypatch.setattr(agent_exec, "DuplexAudio", _FakeDuplex)
+
+    agent_exec.run_agent(
+        dataclasses.replace(AGENT_DEFAULTS, greeting="Ahoy"), AppState(), json_mode=True
+    )
+    assert seen["api_key"] == "sk_live"
+    run_config = seen["config"]
+    assert run_config.voice == DEFAULT_VOICE
+    assert run_config.greeting == "Ahoy"
+    assert run_config.full_duplex is True
+    assert run_config.exit_after_reply is False
+
+
+class _FakeDuplex:
+    def __init__(self, *, target_rate=None, device=None):
+        self.mic = object()
+        self.player = object()
+
+
+def test_run_agent_ctrl_c_stops_cleanly(monkeypatch):
+    # Ctrl-C is the normal "user hung up" signal: the session ends without an error.
+    config.set_api_key("default", "sk_live")
+
+    def raise_interrupt(api_key, *, renderer, player, mic, config):
+        raise KeyboardInterrupt
+
+    monkeypatch.setattr(agent_exec, "run_session", raise_interrupt)
+    monkeypatch.setattr(agent_exec, "DuplexAudio", _FakeDuplex)
+    agent_exec.run_agent(AGENT_DEFAULTS, AppState(), json_mode=True)  # no exception
+
+
+def test_run_agent_broken_pipe_exits_zero(monkeypatch):
+    # A closed downstream pipe (`assembly agent | head`) is a clean stop, not a failure.
+    config.set_api_key("default", "sk_live")
+
+    def raise_broken_pipe(api_key, *, renderer, player, mic, config):
+        raise BrokenPipeError
+
+    monkeypatch.setattr(agent_exec, "run_session", raise_broken_pipe)
+    monkeypatch.setattr(agent_exec, "DuplexAudio", _FakeDuplex)
+    with pytest.raises(typer.Exit) as exc:
+        agent_exec.run_agent(AGENT_DEFAULTS, AppState(), json_mode=True)
+    assert exc.value.exit_code == 0
+
+
+def test_run_speak_requires_sandbox():
+    # The active environment defaults to production, which has no streaming-TTS host.
+    with pytest.raises(CLIError) as exc:
+        speak_exec.run_speak(SPEAK_DEFAULTS, AppState(), json_mode=False)
+    assert exc.value.exit_code == 2
+    assert "--sandbox" in (exc.value.suggestion or "")
+
+
+def test_run_llm_follow_rejects_output_field():
+    with pytest.raises(UsageError):
+        llm_exec.run_llm(
+            dataclasses.replace(
+                LLM_DEFAULTS, follow=True, prompt="x", output_field=choices.TextOrJson.text
+            ),
+            AppState(),
+            json_mode=False,
+        )
diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py
index e89cb0c1..e66c4fd5 100644
--- a/tests/test_llm_command.py
+++ b/tests/test_llm_command.py
@@ -212,7 +212,7 @@ def test_llm_transcript_id_stdin_warning_suppressed_by_quiet(monkeypatch):
 
 def test_llm_transcript_id_no_warning_when_stdin_is_a_terminal(monkeypatch):
     _auth()
-    monkeypatch.setattr("aai_cli.commands.llm.stdio.stdin_is_piped", lambda: False)
+    monkeypatch.setattr("aai_cli.llm_exec.stdio.stdin_is_piped", lambda: False)
     monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload("s"))
     result = runner.invoke(app, ["llm", "summarize", "--transcript-id", "t_9"])
     assert result.exit_code == 0
@@ -388,7 +388,7 @@ def test_llm_follow_requires_a_prompt(monkeypatch):
 def test_llm_follow_requires_piped_stdin(monkeypatch):
     # Interactively (no pipe) --follow would block forever; reject it with guidance.
     _auth()
-    monkeypatch.setattr("aai_cli.commands.llm.stdio.stdin_is_piped", lambda: False)
+    monkeypatch.setattr("aai_cli.llm_exec.stdio.stdin_is_piped", lambda: False)
     monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
     result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"])
     assert result.exit_code == 2
@@ -424,9 +424,7 @@ def __iter__(self):
         def __next__(self):
             raise KeyboardInterrupt
 
-    monkeypatch.setattr(
-        "aai_cli.commands.llm.stdio.iter_piped_stdin_lines", lambda: _InterruptIter()
-    )
+    monkeypatch.setattr("aai_cli.llm_exec.stdio.iter_piped_stdin_lines", lambda: _InterruptIter())
     monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload())
     result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="")
     assert result.exit_code == 0
diff --git a/tests/test_replay_e2e.py b/tests/test_replay_e2e.py
index db07670b..f1ef49c5 100644
--- a/tests/test_replay_e2e.py
+++ b/tests/test_replay_e2e.py
@@ -35,7 +35,7 @@ def test_transcribe_sample_renders_real_transcript(monkeypatch, mocker):
     _with_api_key()
     _human(monkeypatch)
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=rf.transcript("transcribe_sample"),
     )
diff --git a/tests/test_source_validation.py b/tests/test_source_validation.py
index aac8277e..94b888e6 100644
--- a/tests/test_source_validation.py
+++ b/tests/test_source_validation.py
@@ -46,7 +46,7 @@ def test_resolve_audio_source_source_plus_sample_rejected_even_without_checks():
 
 def test_transcribe_source_plus_sample_exits_2(mocker, tmp_path):
     # No key configured: the conflict must fail before credential resolution.
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     clip = tmp_path / "clip.mp3"
     clip.write_bytes(b"fake")
     result = runner.invoke(app, ["transcribe", str(clip), "--sample"])
@@ -69,7 +69,7 @@ def test_resolve_audio_source_rejects_directory(tmp_path):
 def test_transcribe_directory_source_fails_before_credentials(mocker, tmp_path):
     # No key configured: a directory is batch mode, and an empty one must read as
     # "no audio files", never trigger a login (or an upload attempt).
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", str(tmp_path)])
     assert result.exit_code == 2
     # Rich may wrap the long tmp path mid-token (even inside a word), so compare with
@@ -122,7 +122,7 @@ def test_transcripts_get_rejects_path_traversal_id():
 def test_transcribe_missing_file_fails_before_credentials(mocker):
     # No key is configured: the path check must fire first, so the user sees
     # "file not found" instead of a login prompt (or a keyring error).
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", "missing.wav"])
     assert result.exit_code == 2
     assert "File not found: missing.wav" in result.output
@@ -145,7 +145,7 @@ def test_transcribe_empty_stdin_exits_2():
 def test_stream_missing_file_fails_before_credentials(monkeypatch):
     called = {"stream": False}
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda *a, **k: called.__setitem__("stream", True),
     )
     result = runner.invoke(app, ["stream", "missing.wav"])
diff --git a/tests/test_speak.py b/tests/test_speak.py
index 5f510c6e..b25cc92b 100644
--- a/tests/test_speak.py
+++ b/tests/test_speak.py
@@ -47,7 +47,7 @@ def test_production_env_is_rejected_with_sandbox_hint():
 def test_plays_audio_by_default(monkeypatch, fake_synthesize):
     played: dict = {}
     monkeypatch.setattr(
-        "aai_cli.commands.speak.audio.play_pcm",
+        "aai_cli.speak_exec.audio.play_pcm",
         lambda pcm, rate, **_: played.update(pcm=pcm, rate=rate),
     )
     result = runner.invoke(app, ["--sandbox", "speak", "Hello there"])
@@ -63,12 +63,12 @@ def test_plays_audio_by_default(monkeypatch, fake_synthesize):
 
 def test_out_writes_wav_and_does_not_play(monkeypatch, tmp_path, fake_synthesize):
     monkeypatch.setattr(
-        "aai_cli.commands.speak.audio.play_pcm",
+        "aai_cli.speak_exec.audio.play_pcm",
         lambda *a, **k: pytest.fail("should not play when --out is given"),
     )
     written: dict = {}
     monkeypatch.setattr(
-        "aai_cli.commands.speak.audio.write_wav",
+        "aai_cli.speak_exec.audio.write_wav",
         lambda path, pcm, rate: written.update(path=path, pcm=pcm, rate=rate),
     )
     out = tmp_path / "x.wav"
@@ -82,7 +82,7 @@ def test_out_writes_wav_and_does_not_play(monkeypatch, tmp_path, fake_synthesize
 
 
 def test_reads_text_from_stdin_when_arg_omitted(monkeypatch, fake_synthesize):
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(app, ["--sandbox", "speak"], input="piped text\n")
     assert result.exit_code == 0
     assert fake_synthesize["cfg"].text == "piped text"
@@ -104,7 +104,7 @@ def test_blank_arg_does_not_fall_back_to_stdin(monkeypatch):
 
 
 def test_voice_and_language_flow_into_config(monkeypatch, fake_synthesize):
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(
         app, ["--sandbox", "speak", "Hi", "--voice", "jane", "--language", "English"]
     )
@@ -116,7 +116,7 @@ def test_voice_and_language_flow_into_config(monkeypatch, fake_synthesize):
 
 
 def test_json_mode_emits_metadata_object_on_stdout(monkeypatch, fake_synthesize):
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(app, ["--sandbox", "speak", "Hi", "--voice", "jane", "--json"])
     assert result.exit_code == 0
     # The behavioral split: --json yields a parseable object, not human prose.
@@ -131,7 +131,7 @@ def test_json_mode_emits_metadata_object_on_stdout(monkeypatch, fake_synthesize)
 
 
 def test_human_mode_keeps_stdout_clean(monkeypatch, fake_synthesize):
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(app, ["--sandbox", "speak", "Hi"])
     assert result.exit_code == 0
     # Human summary goes to stderr; stdout stays empty (audio went to the speaker).
@@ -150,7 +150,7 @@ def _fake(api_key, segments, *, language=None, sample_rate=None, connect=None, o
         )
 
     monkeypatch.setattr(session, "synthesize_dialogue", _fake)
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     return calls
 
 
@@ -203,7 +203,7 @@ def test_dialogue_json_reports_speaker_voice_map(fake_dialogue):
 def test_dialogue_json_out_path_is_reported(fake_dialogue, monkeypatch, tmp_path):
     # With --out, the multi JSON reports the file path (not null) — pins the
     # `str(out) if out is not None else None` branch in _emit_multi.
-    monkeypatch.setattr("aai_cli.commands.speak.audio.write_wav", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.write_wav", lambda *a, **k: None)
     out = tmp_path / "dialogue.wav"
     text = "Speaker A: One.\nSpeaker B: Two."
     result = runner.invoke(app, ["--sandbox", "speak", "--out", str(out), "--json"], input=text)
@@ -223,7 +223,7 @@ def test_empty_speaker_labels_raises_usage_error():
 
 def test_unlabeled_text_still_uses_single_voice_path(fake_synthesize, monkeypatch):
     # A bare --voice still selects the single-voice voice for ordinary prose.
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(app, ["--sandbox", "speak", "Just prose.", "--voice", "mary"])
     assert result.exit_code == 0
     assert fake_synthesize["cfg"].voice == "mary"
@@ -235,7 +235,7 @@ def test_unlabeled_text_still_uses_single_voice_path(fake_synthesize, monkeypatc
 def test_speaker_mappings_on_unlabeled_input_warn_not_silently_drop(fake_synthesize, monkeypatch):
     # The mirror of the bare-voice-in-dialogue note: SPEAKER=VOICE mappings can't
     # apply to plain prose, and the user is told instead of the flag vanishing.
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(app, ["--sandbox", "speak", "Just prose.", "--voice", "A=vera"])
     assert result.exit_code == 0
     assert "Ignoring --voice SPEAKER=VOICE mappings" in result.stderr
@@ -245,7 +245,7 @@ def test_speaker_mappings_on_unlabeled_input_warn_not_silently_drop(fake_synthes
 
 
 def test_speaker_mappings_warning_is_structured_in_json_mode(fake_synthesize, monkeypatch):
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(
         app, ["--sandbox", "speak", "Just prose.", "--voice", "A=vera", "--json"]
     )
@@ -267,7 +267,7 @@ def test_sample_rate_must_be_positive():
 
 def test_sample_rate_floor_accepts_one(fake_synthesize, monkeypatch):
     # min=1 exactly: 1 Hz is degenerate but valid (the server enforces its own floor).
-    monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None)
+    monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None)
     result = runner.invoke(app, ["--sandbox", "speak", "Hi", "--sample-rate", "1"])
     assert result.exit_code == 0
     assert fake_synthesize["cfg"].sample_rate == 1
diff --git a/tests/test_stream_command.py b/tests/test_stream_command.py
index 897c309d..0f5f381f 100644
--- a/tests/test_stream_command.py
+++ b/tests/test_stream_command.py
@@ -43,7 +43,7 @@ def test_stream_help_lists_command():
 
 def test_stream_mic_renders_turns(monkeypatch):
     config.set_api_key("default", "sk_live")
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _drive_turns)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _drive_turns)
     result = runner.invoke(app, ["stream", "--json"])
     assert result.exit_code == 0
     lines = [json.loads(x) for x in result.output.splitlines() if x.strip()]
@@ -60,7 +60,7 @@ def fake_stream_audio(
         seen["source_type"] = type(source).__name__
         seen["rate"] = params.sample_rate
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     import wave
 
     p = tmp_path / "a.wav"
@@ -90,7 +90,7 @@ def __iter__(self):
             captured["on_open"]()  # the SDK iterating us == the mic is now live
             return iter([b"\x00\x00"])
 
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
 
     order = []
 
@@ -103,7 +103,7 @@ def fake_stream_audio(
         list(source)  # consume the mic -> on_open fires -> "Listening…" prints
         order.append("consumed")
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream"])
     assert result.exit_code == 0
     assert "Listening" in result.output  # shown once the mic opened
@@ -118,7 +118,7 @@ def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination
         if on_begin:
             on_begin(types.SimpleNamespace(id="x"))
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake)
     import wave
 
     p = tmp_path / "a.wav"
@@ -141,7 +141,7 @@ def fake_stream_audio(
     ):
         raise AssertionError(f"streaming should not start after auto-login: {api_key}")
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "--json"])
     assert result.exit_code == 4
     assert config.get_api_key("default") == "sk_from_oauth"
@@ -162,7 +162,7 @@ def test_stream_sample_uses_hosted_clip(monkeypatch):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.streaming.sources.shutil.which", lambda _n: "/usr/bin/ffmpeg")
     seen = {}
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _capture_source(seen))
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _capture_source(seen))
     result = runner.invoke(app, ["stream", "--sample"])
     assert result.exit_code == 0
     assert type(seen["source"]).__name__ == "FileSource"
@@ -174,7 +174,7 @@ def test_stream_url_source_uses_filesource(monkeypatch):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr("aai_cli.streaming.sources.shutil.which", lambda _n: "/usr/bin/ffmpeg")
     seen = {}
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _capture_source(seen))
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _capture_source(seen))
     result = runner.invoke(app, ["stream", "https://example.com/clip.mp3"])
     assert result.exit_code == 0
     assert type(seen["source"]).__name__ == "FileSource"
@@ -187,7 +187,7 @@ def test_stream_ctrl_c_exits_cleanly(monkeypatch):
     def raise_kbd(*a, **k):
         raise KeyboardInterrupt
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", raise_kbd)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", raise_kbd)
     result = runner.invoke(app, ["stream"])
     assert result.exit_code == 0
 
@@ -199,7 +199,7 @@ def test_stream_ctrl_c_human_mode_prints_stopped(monkeypatch):
     def raise_kbd(*a, **k):
         raise KeyboardInterrupt
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", raise_kbd)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", raise_kbd)
     result = runner.invoke(app, ["stream"])
     assert result.exit_code == 0
     assert "Stopped." in result.output
@@ -211,7 +211,7 @@ def test_stream_broken_pipe_exits_zero(monkeypatch):
     def raise_broken_pipe(*a, **k):
         raise BrokenPipeError
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", raise_broken_pipe)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", raise_broken_pipe)
     result = runner.invoke(app, ["stream"])
     assert result.exit_code == 0
 
@@ -232,7 +232,7 @@ def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination
         if on_termination:
             on_termination(types.SimpleNamespace(audio_duration_seconds=2.0))
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake)
     p = tmp_path / "a.wav"
     with wave.open(str(p), "wb") as w:
         w.setnchannels(1)
@@ -254,7 +254,7 @@ def test_stream_prompt_biases_speech_model(monkeypatch):
     def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None):
         seen["prompt"] = params.prompt
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake)
     result = runner.invoke(app, ["stream", "--prompt", "expect crypto jargon", "--json"])
     assert result.exit_code == 0
     # --prompt is the speech-model prompt, forwarded to the streaming session.
@@ -271,14 +271,14 @@ def test_stream_youtube_url_downloads_then_streams(monkeypatch, tmp_path):
         w.setsampwidth(2)
         w.setframerate(16000)
         w.writeframes(b"\x00\x01" * 100)
-    monkeypatch.setattr("aai_cli.commands.stream.youtube.download_audio", lambda url, d: fake)
+    monkeypatch.setattr("aai_cli.stream_exec.youtube.download_audio", lambda url, d: fake)
     seen = {}
 
     def fake_stream(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None):
         seen["source_type"] = type(source).__name__
         seen["src"] = getattr(source, "source", None)
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream)
     result = runner.invoke(app, ["stream", "https://youtu.be/abc"])
     assert result.exit_code == 0
     assert seen["source_type"] == "FileSource"  # streamed the downloaded local file
@@ -295,14 +295,14 @@ def test_stream_podcast_page_url_downloads_then_streams(monkeypatch, tmp_path):
         w.setsampwidth(2)
         w.setframerate(16000)
         w.writeframes(b"\x00\x01" * 100)
-    monkeypatch.setattr("aai_cli.commands.stream.youtube.download_audio", lambda url, d: fake)
+    monkeypatch.setattr("aai_cli.stream_exec.youtube.download_audio", lambda url, d: fake)
     seen = {}
 
     def fake_stream(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None):
         seen["source_type"] = type(source).__name__
         seen["src"] = getattr(source, "source", None)
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream)
     result = runner.invoke(app, ["stream", "https://www.spreaker.com/episode/12345"])
     assert result.exit_code == 0
     assert seen["source_type"] == "FileSource"  # streamed the downloaded local file
@@ -317,11 +317,11 @@ def test_stream_downloadable_url_resolves_credentials_before_downloading(monkeyp
     monkeypatch.setattr("aai_cli.context._interactive_session", lambda: False)
     downloads = []
     monkeypatch.setattr(
-        "aai_cli.commands.stream.youtube.download_audio",
+        "aai_cli.stream_exec.youtube.download_audio",
         lambda url, dest: downloads.append(url),
     )
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda *a, **k: pytest.fail("must not stream without credentials"),
     )
     result = runner.invoke(app, ["stream", "https://youtu.be/abc"])
@@ -351,7 +351,7 @@ def fake_stream_audio(
         seen["rate"] = params.sample_rate
         b"".join(source)  # drain the StdinSource
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "-", "--sample-rate", "1"], input=b"\x00\x00")
     assert result.exit_code == 0
     assert seen["rate"] == 1
@@ -367,7 +367,7 @@ def fake_stream_audio(
         seen["rate"] = params.sample_rate
         seen["audio"] = b"".join(source)  # consume the StdinSource
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "-"], input=b"\x01\x02" * 100)
     assert result.exit_code == 0
     assert seen["rate"] == 16000  # default raw-PCM rate
@@ -398,9 +398,9 @@ def fake_stream_audio(
             raise APIError("mic failed")
         time.sleep(0.2)
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "--system-audio", "--json"])
     assert result.exit_code == 1
     assert "mic failed" in result.output
@@ -417,7 +417,7 @@ def fake_stream_audio(
             on_turn(types.SimpleNamespace(transcript="partial", end_of_turn=False))
             on_turn(types.SimpleNamespace(transcript="hello world", end_of_turn=True))
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "-", "-o", "text"], input=b"\x00\x00")
     assert result.exit_code == 0
     # Final turn only, plain text; partials and JSON envelopes are not on stdout.
diff --git a/tests/test_stream_command_flags.py b/tests/test_stream_command_flags.py
index 35527c3c..d8144589 100644
--- a/tests/test_stream_command_flags.py
+++ b/tests/test_stream_command_flags.py
@@ -20,7 +20,7 @@ def fake_stream_audio(
     ):
         captured["params"] = params
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
 
     runner.invoke(
         app,
@@ -43,7 +43,7 @@ def test_stream_config_escape_hatch(monkeypatch):
     config.set_api_key("default", "sk_live")
     captured = {}
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda api_key, source, *, params, **kw: captured.update(params=params),
     )
 
@@ -55,7 +55,7 @@ def test_stream_maps_webhook_auth_header(monkeypatch):
     config.set_api_key("default", "sk_live")
     captured = {}
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda api_key, source, *, params, **kw: captured.update(params=params),
     )
 
@@ -79,7 +79,7 @@ def test_stream_format_turns_tristate(monkeypatch):
     config.set_api_key("default", "sk_live")
     captured = {}
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda api_key, source, *, params, **kw: captured.update(params=params),
     )
 
@@ -134,7 +134,7 @@ def test_stream_file_source_with_sample_rejected(monkeypatch, tmp_path):
     def _boom(*a, **k):
         raise AssertionError("must not stream a conflicting source")
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _boom)
     wav = tmp_path / "a.wav"
     wav.write_bytes(b"RIFF")
     result = runner.invoke(app, ["stream", str(wav), "--sample"])
diff --git a/tests/test_stream_exec.py b/tests/test_stream_exec.py
new file mode 100644
index 00000000..2e945522
--- /dev/null
+++ b/tests/test_stream_exec.py
@@ -0,0 +1,117 @@
+"""Direct tests of the `assembly stream` options/run seam (aai_cli.stream_exec).
+
+The command module only parses argv into a StreamOptions; everything after that is
+run_stream, a plain function of data. These tests drive validation, flag mapping,
+and session wiring by constructing options directly — no CliRunner argv round-trip,
+no merged-stream output parsing.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+
+import pytest
+
+from aai_cli import config, llm, stream_exec
+from aai_cli.commands.stream import DEFAULT_SPEECH_MODEL
+from aai_cli.context import AppState
+from aai_cli.errors import UsageError
+
+# The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace.
+DEFAULTS = stream_exec.StreamOptions(
+    source=None,
+    sample=False,
+    sample_rate=None,
+    device=None,
+    system_audio=False,
+    system_audio_only=False,
+    speech_model=DEFAULT_SPEECH_MODEL,
+    encoding=None,
+    language_detection=None,
+    domain=None,
+    prompt=None,
+    keyterms_prompt=None,
+    end_of_turn_confidence_threshold=None,
+    min_turn_silence=None,
+    max_turn_silence=None,
+    vad_threshold=None,
+    format_turns=None,
+    include_partial_turns=None,
+    speaker_labels=None,
+    max_speakers=None,
+    voice_focus=None,
+    voice_focus_threshold=None,
+    inactivity_timeout=None,
+    filter_profanity=None,
+    redact_pii=None,
+    redact_pii_policy=None,
+    redact_pii_sub=None,
+    webhook_url=None,
+    webhook_auth_header=None,
+    llm_prompt=None,
+    llm_interval=10.0,
+    model=llm.DEFAULT_MODEL,
+    max_tokens=llm.DEFAULT_MAX_TOKENS,
+    config_kv=None,
+    config_file=None,
+    output_field=None,
+    show_code=False,
+)
+
+
+class FakeMic:
+    """Mirrors MicrophoneSource's keyword signature (see microphone.py)."""
+
+    def __init__(self, *, target_rate=None, device=None, capture_rate=None, on_open=None):
+        self.sample_rate = capture_rate or 16000
+        self.device = device
+
+    def __iter__(self):
+        return iter([b"\x00\x00"])
+
+
+def test_run_stream_maps_flags_to_params_without_cli(monkeypatch):
+    # The seam's payoff: assert the flag->StreamingParameters mapping by constructing
+    # options directly, instead of threading a giant argv through CliRunner.
+    config.set_api_key("default", "sk_live")
+    seen = {}
+
+    def fake_stream_audio(api_key, source, *, params, **_kwargs):
+        seen["api_key"] = api_key
+        seen["params"] = params
+
+    monkeypatch.setattr(stream_exec.client, "stream_audio", fake_stream_audio)
+    monkeypatch.setattr(stream_exec, "MicrophoneSource", FakeMic)
+
+    stream_exec.run_stream(
+        dataclasses.replace(
+            DEFAULTS,
+            domain="medical-v1",
+            prompt="expect drug names",
+            keyterms_prompt=["AssemblyAI"],
+        ),
+        AppState(),
+        json_mode=True,
+    )
+    assert seen["api_key"] == "sk_live"
+    params = seen["params"]
+    assert params.domain == "medical-v1"
+    assert params.prompt == "expect drug names"
+    assert params.keyterms_prompt == ["AssemblyAI"]
+
+
+def test_run_stream_validates_before_resolving_credentials():
+    # No API key is configured: a flag conflict must surface as a usage error, not
+    # as NotAuthenticated — validation runs before any credential resolution.
+    with pytest.raises(UsageError):
+        stream_exec.run_stream(
+            dataclasses.replace(DEFAULTS, system_audio=True, system_audio_only=True),
+            AppState(),
+            json_mode=False,
+        )
+
+
+def test_stream_options_are_immutable():
+    field_name = "sample"
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        setattr(DEFAULTS, field_name, True)
diff --git a/tests/test_stream_llm.py b/tests/test_stream_llm.py
index 9806b143..503d23a4 100644
--- a/tests/test_stream_llm.py
+++ b/tests/test_stream_llm.py
@@ -27,8 +27,8 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens):
         seen["max_tokens"] = max_tokens
         return f"answer:{transcript_text}"
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake)
-    monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake)
+    monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain)
     result = runner.invoke(
         app,
         [
@@ -67,8 +67,8 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens):
         seen["prompts"] = prompts
         return "done"
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake)
-    monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake)
+    monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain)
     result = runner.invoke(
         app, ["stream", "--llm", "summarize", "--llm", "translate to french", "--json"]
     )
@@ -79,7 +79,7 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens):
 def test_stream_llm_rejects_output_text(monkeypatch):
     config.set_api_key("default", "sk_live")
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not stream")),
     )
     result = runner.invoke(app, ["stream", "--llm", "summarize", "-o", "text"])
@@ -100,8 +100,8 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens):
         called["ran"] = True
         return "x"
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake)
-    monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake)
+    monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain)
     result = runner.invoke(app, ["stream", "--json"])
     assert result.exit_code == 0
     assert called["ran"] is False  # no --llm -> no gateway call
@@ -111,7 +111,7 @@ def test_stream_show_code_with_llm_emits_follow_loop(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not stream")
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _boom)
     result = runner.invoke(app, ["stream", "--llm", "summarize", "--show-code"])
     assert result.exit_code == 0
     assert "from openai import OpenAI" in result.output
@@ -127,9 +127,9 @@ def _eot_turn(text):
 def _llm_session(*, interval, clock, monkeypatch, emitted):
     import io
 
-    from aai_cli.commands.stream import StreamSession
     from aai_cli.follow import FollowRenderer
     from aai_cli.streaming.render import StreamRenderer
+    from aai_cli.streaming.session import StreamSession
 
     # Capture each follow refresh (json mode emits one NDJSON object per refresh) and
     # make run_chain echo the transcript it summarized so assertions read the cadence.
@@ -218,8 +218,8 @@ def test_maybe_summarize_is_noop_without_follow():
     # is never run (no gateway call) regardless of transcript content.
     import io
 
-    from aai_cli.commands.stream import StreamSession
     from aai_cli.streaming.render import StreamRenderer
+    from aai_cli.streaming.session import StreamSession
 
     session = StreamSession(
         api_key="sk",
diff --git a/tests/test_stream_session.py b/tests/test_stream_session.py
index 2bb4d830..73226d84 100644
--- a/tests/test_stream_session.py
+++ b/tests/test_stream_session.py
@@ -24,8 +24,8 @@ def test_stream_session_listening_notice_latches(monkeypatch):
     # callback fires repeatedly (pins the `self._listening_started = True` latch).
     import io
 
-    from aai_cli.commands.stream import StreamSession
     from aai_cli.streaming.render import StreamRenderer
+    from aai_cli.streaming.session import StreamSession
 
     renderer = StreamRenderer(json_mode=False, out=io.StringIO())
     calls = {"n": 0}
@@ -53,9 +53,9 @@ def test_stream_session_closes_renderer_on_error(monkeypatch):
 
     import pytest
 
-    from aai_cli.commands.stream import StreamSession
     from aai_cli.errors import CLIError
     from aai_cli.streaming.render import StreamRenderer
+    from aai_cli.streaming.session import StreamSession
 
     renderer = StreamRenderer(json_mode=False, out=io.StringIO())
     closed = {"n": 0}
@@ -64,7 +64,7 @@ def test_stream_session_closes_renderer_on_error(monkeypatch):
     def boom(*_args, **_kwargs):
         raise CLIError("stream blew up")
 
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", boom)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", boom)
     session = StreamSession(
         api_key="sk",
         base_flags={},
@@ -122,9 +122,9 @@ def fake_stream_audio(
         if on_turn:
             on_turn(types.SimpleNamespace(transcript=source_type, end_of_turn=True))
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "--system-audio", "--json"])
     assert result.exit_code == 0
     assert set(source_types) == {"FakeSystemAudio", "FakeMic"}
@@ -154,9 +154,9 @@ def __iter__(self):
     def fail_mic(**_kwargs):
         raise AssertionError("system-audio-only must not open the microphone")
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", fail_mic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _capture_source(seen))
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", fail_mic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _capture_source(seen))
     result = runner.invoke(app, ["stream", "--system-audio-only", "--json"])
     assert result.exit_code == 0
     assert type(seen["source"]).__name__ == "FakeSystemAudio"
@@ -195,9 +195,9 @@ def fake_stream_audio(
     ):
         list(source)
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(
         app,
         ["stream", "--system-audio", "--device", "2", "--sample-rate", "44100", "--json"],
@@ -235,10 +235,10 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens):
         transcript_inputs.append(transcript_text)
         return "summary"
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
-    monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain)
     result = runner.invoke(app, ["stream", "--system-audio", "--llm", "summarize", "--json"])
     assert result.exit_code == 0
     assert any("System: FakeSystemAudio" in value for value in transcript_inputs)
@@ -271,9 +271,9 @@ def fake_stream_audio(
         chunk = next(iter(source))
         speaker_labels_by_chunk[chunk] = params.speaker_labels
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     result = runner.invoke(app, ["stream", "--system-audio", "--speaker-labels", "--json"])
     assert result.exit_code == 0
     assert speaker_labels_by_chunk[b"system"] is True
@@ -319,9 +319,9 @@ def fake_stream_audio(
     ):
         raise APIError(f"{type(source).__name__} failed")
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", ImmediateThread)
     result = runner.invoke(app, ["stream", "--system-audio", "--json"])
     assert result.exit_code == 1
@@ -367,9 +367,9 @@ def join(self, timeout=None):
     def fake_stream_audio(api_key, source, *, params, **_kwargs):
         raise RuntimeError("event parsing blew up")
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
-    monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio)
     monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", ImmediateThread)
     result = runner.invoke(app, ["stream", "--system-audio", "--json"])
     assert result.exit_code == 1
@@ -398,8 +398,8 @@ def __init__(self, *, target, args, daemon):
         def start(self):
             raise KeyboardInterrupt
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
     monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", InterruptingThread)
     result = runner.invoke(app, ["stream", "--system-audio"])
     assert result.exit_code == 0
@@ -424,8 +424,8 @@ def __init__(self, *, target, args, daemon):
         def start(self):
             raise BrokenPipeError
 
-    monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio)
-    monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic)
+    monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio)
+    monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic)
     monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", BrokenPipeThread)
     result = runner.invoke(app, ["stream", "--system-audio"])
     assert result.exit_code == 0
diff --git a/tests/test_stream_show_code.py b/tests/test_stream_show_code.py
index c2f2f675..79e1589d 100644
--- a/tests/test_stream_show_code.py
+++ b/tests/test_stream_show_code.py
@@ -15,7 +15,7 @@ def test_stream_show_code_prints_without_streaming(monkeypatch):
     # Print-only: emits the mic-streaming script, never opens audio or streams, no auth.
     called = []
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         lambda *a, **k: called.append(True),
     )
     result = runner.invoke(app, ["stream", "--show-code"])
@@ -101,7 +101,7 @@ def _boom(*a, **k):
         raise AssertionError("must not stream")
 
     monkeypatch.setattr(
-        "aai_cli.commands.stream.client.stream_audio",
+        "aai_cli.stream_exec.client.stream_audio",
         _boom,
     )
     result = runner.invoke(app, ["stream", "--show-code", "--json"])
diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py
index fb7bea2a..a847c0ad 100644
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@@ -57,7 +57,7 @@ def _fake_transcript(mocker):
 def test_transcribe_sample_prints_text(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -71,7 +71,7 @@ def test_transcribe_sample_prints_text(mocker):
 def test_transcribe_json_output(mocker):
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -83,7 +83,7 @@ def test_transcribe_unauthenticated_runs_login_then_transcribes(monkeypatch, moc
     monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True)
     monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result)
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -97,7 +97,7 @@ def test_transcribe_unauthenticated_runs_login_then_transcribes(monkeypatch, moc
 def test_transcribe_output_text_field(mocker):
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -109,7 +109,7 @@ def test_transcribe_output_text_field(mocker):
 def test_transcribe_output_id_field(mocker):
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -122,7 +122,7 @@ def test_transcribe_output_srt_field(mocker):
     _auth()
     t = _fake_transcript(mocker)
     t.export_subtitles_srt.return_value = "1\n00:00:00,000 --> 00:00:02,000\nhello world\n"
-    mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True, return_value=t)
+    mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "-o", "srt"])
     assert result.exit_code == 0
     assert "00:00:00,000 --> 00:00:02,000" in result.output  # SRT body, pipe-friendly
@@ -132,7 +132,7 @@ def test_transcribe_output_srt_field(mocker):
 def test_transcribe_output_invalid_exits_2(mocker):
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -151,7 +151,7 @@ def fake_transcribe(api_key, audio, *, config):
         seen["bytes"] = pathlib.Path(audio).read_bytes()
         return _fake_transcript(mocker)
 
-    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", fake_transcribe)
+    monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", fake_transcribe)
     result = runner.invoke(app, ["transcribe", "-", "-o", "text"], input=b"RIFFfake-wav-bytes")
     assert result.exit_code == 0
     assert result.output.strip() == "hello world"
@@ -165,7 +165,7 @@ def test_transcribe_status_renders_enum_value(mocker):
     t = _fake_transcript(mocker)
     t.status = aai.TranscriptStatus.completed
     t.json_response = None
-    mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True, return_value=t)
+    mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "--json"])
     assert result.exit_code == 0
     assert '"status": "completed"' in result.output
@@ -182,7 +182,7 @@ def fake_transform(api_key, *, prompt, model, transcript_id, max_tokens, transcr
         return "a short summary"
 
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -211,7 +211,7 @@ def fake_transform(
         return f"out({prompt})"
 
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -243,7 +243,7 @@ def test_transcribe_prompt_human_shows_only_transform(monkeypatch, mocker):
     _auth()
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -263,7 +263,7 @@ def test_transcribe_chained_prompts_human_labels_each_step(monkeypatch, mocker):
     _auth()
     monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False)
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -290,7 +290,7 @@ def test_transcribe_youtube_url_downloads_then_transcribes(monkeypatch, mocker,
         lambda url, d, *, download_sections=None: fake,
     )
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -312,7 +312,7 @@ def _capture(url, d, *, download_sections=None):
 
     monkeypatch.setattr("aai_cli.transcribe_exec.youtube.download_audio", _capture)
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -335,7 +335,7 @@ def test_transcribe_podcast_page_url_downloads_then_transcribes(monkeypatch, moc
         lambda url, d, *, download_sections=None: fake,
     )
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -354,7 +354,7 @@ def _no_download(url, d, *, download_sections=None):
 
     monkeypatch.setattr("aai_cli.transcribe_exec.youtube.download_audio", _no_download)
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -369,7 +369,7 @@ def test_transcribe_renders_summary_human(monkeypatch, mocker):
     t = _fake_transcript(mocker)
     t.summary = "three bullet summary"
     t.chapters = []
-    mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True, return_value=t)
+    mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "--summarization"])
     assert result.exit_code == 0
     assert "Summary:" in result.output
diff --git a/tests/test_transcribe_batch.py b/tests/test_transcribe_batch.py
index 69705071..3cc2e367 100644
--- a/tests/test_transcribe_batch.py
+++ b/tests/test_transcribe_batch.py
@@ -17,7 +17,7 @@
 
 runner = CliRunner()
 
-_TRANSCRIBE = "aai_cli.commands.transcribe.client.transcribe"
+_TRANSCRIBE = "aai_cli.transcribe_exec.client.transcribe"
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/test_transcribe_batch_sources.py b/tests/test_transcribe_batch_sources.py
index e6836d21..5553e993 100644
--- a/tests/test_transcribe_batch_sources.py
+++ b/tests/test_transcribe_batch_sources.py
@@ -16,7 +16,7 @@
 
 runner = CliRunner()
 
-_TRANSCRIBE = "aai_cli.commands.transcribe.client.transcribe"
+_TRANSCRIBE = "aai_cli.transcribe_exec.client.transcribe"
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/test_transcribe_flags.py b/tests/test_transcribe_flags.py
index 5087ea02..4fec6677 100644
--- a/tests/test_transcribe_flags.py
+++ b/tests/test_transcribe_flags.py
@@ -52,7 +52,7 @@ def _enum_or_str(value):
 def test_transcribe_passes_speaker_labels(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -63,7 +63,7 @@ def test_transcribe_passes_speaker_labels(mocker):
 def test_transcribe_prompt_biases_speech_model(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -76,7 +76,7 @@ def test_transcribe_prompt_biases_speech_model(mocker):
 def test_transcribe_maps_analysis_flags(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -102,7 +102,7 @@ def test_transcribe_maps_analysis_flags(mocker):
 def test_transcribe_redact_pii_policy_csv(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -127,7 +127,7 @@ def test_transcribe_redact_pii_policy_csv(mocker):
 def test_transcribe_config_escape_hatch(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -138,7 +138,7 @@ def test_transcribe_config_escape_hatch(mocker):
 def test_transcribe_unknown_config_field_exits_2(mocker):
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -150,7 +150,7 @@ def test_transcribe_unknown_config_field_exits_2(mocker):
 def test_transcribe_webhook_auth_header(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -173,7 +173,7 @@ def test_transcribe_webhook_auth_header(mocker):
 
 def test_transcribe_negative_audio_start_exits_2(mocker):
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-start", "-100"])
     assert result.exit_code == 2
     tx.assert_not_called()
@@ -181,7 +181,7 @@ def test_transcribe_negative_audio_start_exits_2(mocker):
 
 def test_transcribe_language_code_with_detection_exits_2(mocker):
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(
         app,
         ["transcribe", "audio.mp3", "--language-code", "en_us", "--language-detection"],
@@ -196,7 +196,7 @@ def test_transcribe_language_flags_alone_are_accepted(mocker):
     # Only the combination is contradictory; each flag works on its own.
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -210,7 +210,7 @@ def test_transcribe_language_flags_alone_are_accepted(mocker):
 
 def test_transcribe_speakers_expected_without_labels_exits_2(mocker):
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "--speakers-expected", "2"])
     assert result.exit_code == 2
     assert "--speakers-expected only applies when diarization is enabled." in result.output
@@ -221,7 +221,7 @@ def test_transcribe_speakers_expected_without_labels_exits_2(mocker):
 def test_transcribe_speakers_expected_with_labels_is_accepted(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -237,7 +237,7 @@ def test_transcribe_speakers_expected_with_config_speaker_labels_is_accepted(moc
     # runs on the merged config, not just the curated flag.
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -254,7 +254,7 @@ def test_transcribe_temperature_out_of_range_exits_2(mocker, value):
     # The API documents temperature as 0 (most deterministic) to 1 (least); reject
     # out-of-range values client-side instead of letting them flow to the request.
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "--temperature", value])
     assert result.exit_code == 2
     tx.assert_not_called()
@@ -264,7 +264,7 @@ def test_transcribe_temperature_out_of_range_exits_2(mocker, value):
 def test_transcribe_temperature_bounds_are_inclusive(mocker, value):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -275,7 +275,7 @@ def test_transcribe_temperature_bounds_are_inclusive(mocker, value):
 
 def test_transcribe_negative_audio_end_exits_2(mocker):
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-end", "-100"])
     assert result.exit_code == 2
     tx.assert_not_called()
@@ -284,7 +284,7 @@ def test_transcribe_negative_audio_end_exits_2(mocker):
 def test_transcribe_audio_end_zero_is_accepted(mocker):
     _auth()
     tx = mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -297,7 +297,7 @@ def test_transcribe_json_with_non_json_output_field_exits_2(mocker):
     # --json means "the full JSON payload" (same as -o json); -o text contradicts it
     # and must not silently win.
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(app, ["transcribe", "audio.mp3", "-o", "text", "--json"])
     assert result.exit_code == 2
     assert "--json and -o text can't be combined." in result.output
@@ -310,7 +310,7 @@ def test_transcribe_json_with_o_json_is_accepted(mocker):
 
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -323,7 +323,7 @@ def test_transcribe_warns_on_non_audio_extension(mocker, tmp_path):
     _auth()
     (tmp_path / "notes.txt").write_bytes(b"fake")
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -337,7 +337,7 @@ def test_transcribe_non_audio_warning_suppressed_by_quiet(mocker, tmp_path):
     _auth()
     (tmp_path / "notes.txt").write_bytes(b"fake")
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -352,7 +352,7 @@ def test_transcribe_non_audio_warning_is_structured_under_json(mocker, tmp_path)
     _auth()
     (tmp_path / "notes.txt").write_bytes(b"fake")
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -370,7 +370,7 @@ def test_transcribe_no_warning_for_audio_or_extensionless_files(mocker, tmp_path
     _auth()
     (tmp_path / name).write_bytes(b"fake")
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -384,7 +384,7 @@ def test_transcribe_no_warning_for_urls_or_sample(mocker, argv):
     # Remote sources aren't local files; the extension heuristic doesn't apply.
     _auth()
     mocker.patch(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         autospec=True,
         return_value=_fake_transcript(mocker),
     )
@@ -395,7 +395,7 @@ def test_transcribe_no_warning_for_urls_or_sample(mocker, argv):
 
 def test_transcribe_unknown_pii_policy_exits_2_and_lists_valid(mocker):
     _auth()
-    tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True)
+    tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True)
     result = runner.invoke(
         app,
         ["transcribe", "audio.mp3", "--redact-pii", "--redact-pii-policy", "not_a_policy"],
diff --git a/tests/test_transcribe_out.py b/tests/test_transcribe_out.py
index 25591081..69abb69a 100644
--- a/tests/test_transcribe_out.py
+++ b/tests/test_transcribe_out.py
@@ -18,7 +18,7 @@ def audio_file(tmp_path, monkeypatch):
     (tmp_path / "audio.mp3").write_bytes(b"fake-audio")
 
 
-_TRANSCRIBE = "aai_cli.commands.transcribe.client.transcribe"
+_TRANSCRIBE = "aai_cli.transcribe_exec.client.transcribe"
 
 
 def _auth():
diff --git a/tests/test_transcribe_show_code.py b/tests/test_transcribe_show_code.py
index d28e1aaf..b5fe3032 100644
--- a/tests/test_transcribe_show_code.py
+++ b/tests/test_transcribe_show_code.py
@@ -15,7 +15,7 @@ def test_transcribe_show_code_prints_without_transcribing(monkeypatch):
     # Print-only: emits code, never calls the API, needs no auth.
     called = []
     monkeypatch.setattr(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         lambda *a, **k: called.append(True),
     )
     result = runner.invoke(app, ["transcribe", "--sample", "--speaker-labels", "--show-code"])
@@ -34,7 +34,7 @@ def test_transcribe_show_code_includes_download_sections(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not transcribe")
 
-    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom)
     result = runner.invoke(
         app,
         ["transcribe", "https://youtu.be/abc", "--download-sections", "*0:00-5:00", "--show-code"],
@@ -50,7 +50,7 @@ def test_transcribe_show_code_without_source_uses_placeholder(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not transcribe")
 
-    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom)
     result = runner.invoke(app, ["transcribe", "--show-code"])
     assert result.exit_code == 0
     assert "import assemblyai as aai" in result.output
@@ -63,7 +63,7 @@ def _boom(*a, **k):
         raise AssertionError("must not transcribe")
 
     monkeypatch.setattr(
-        "aai_cli.commands.transcribe.client.transcribe",
+        "aai_cli.transcribe_exec.client.transcribe",
         _boom,
     )
     result = runner.invoke(app, ["transcribe", "--sample", "--show-code", "--json"])
@@ -77,7 +77,7 @@ def test_transcribe_show_code_includes_llm_gateway_without_running(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not call the API")
 
-    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom)
     monkeypatch.setattr("aai_cli.commands.transcribe.llm.transform_transcript", _boom)
     result = runner.invoke(
         app,
@@ -94,7 +94,7 @@ def test_transcribe_show_code_output_srt_generates_export(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not transcribe")
 
-    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom)
     result = runner.invoke(app, ["transcribe", "--sample", "-o", "srt", "--show-code"])
     assert result.exit_code == 0
     compile(result.output, "<generated>", "exec")  # the emitted script is runnable
@@ -106,7 +106,7 @@ def test_transcribe_show_code_output_utterances_generates_loop(monkeypatch):
     def _boom(*a, **k):
         raise AssertionError("must not transcribe")
 
-    monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom)
+    monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom)
     result = runner.invoke(app, ["transcribe", "--sample", "-o", "utterances", "--show-code"])
     assert result.exit_code == 0
     compile(result.output, "<generated>", "exec")