diff --git a/.importlinter b/.importlinter
index 15dd3336..9a666fdc 100644
--- a/.importlinter
+++ b/.importlinter
@@ -20,6 +20,7 @@ source_modules =
     aai_cli.context
     aai_cli.debuglog
     aai_cli.dictate_exec
+    aai_cli.dub_exec
     aai_cli.environments
     aai_cli.errors
     aai_cli.eval_data
@@ -65,6 +66,7 @@ modules =
     aai_cli.commands.dev
     aai_cli.commands.dictate
     aai_cli.commands.doctor
+    aai_cli.commands.dub
     aai_cli.commands.evaluate
     aai_cli.commands.init
     aai_cli.commands.keys
diff --git a/README.md b/README.md
index 50b32b85..a83cb5bb 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,14 @@ assembly transcribe "https://www.youtube.com/watch?v=awmCtXzFsJo" --speaker-labe
 
 `speak` auto-detects `Speaker A:` labels, merges each speaker's turns, and rotates voices. (`speak` is sandbox-only today, hence `--sandbox`.)
 
+**Dub a video into another language** — the whole platform in one command: transcription with utterance timestamps, per-utterance LLM translation, TTS for each line (one voice per speaker), and ffmpeg laying the new track over the original video:
+
+```sh
+assembly --sandbox dub talk.mp4 --lang de
+```
+
+The video stream is copied untouched; each dubbed line lands at its original start time. (Sandbox-only, like `speak`.)
+
 **Turn a podcast into audio** — Apple and Spotify podcast pages work too (yt-dlp ingestion):
 
 ```sh
@@ -183,6 +191,7 @@ assembly init                  # scaffold a starter app
 - **Voice agent**: `assembly agent` runs a full-duplex spoken conversation in your terminal.
 - **LLM Gateway**: `assembly llm` prompts an LLM over a transcript, stdin, or a live stream (`assembly stream --llm "summarize as I talk"`).
 - **Transcript-driven clipping**: `assembly clip` cuts an audio/video file (or a YouTube/podcast URL) with ffmpeg by diarized speaker (`--speaker A`), text match (`--search "pricing"`), LLM pick (`--llm "the three best moments"`), or explicit time range (`--range 1:30-2:45`) — transcribing on the fly, reusing a finished transcript with `-t ID`, or reading one from a pipe (`assembly transcribe x.mp4 --speaker-labels --json | assembly clip x.mp4 -t - --llm "…"`). Clip boundaries snap into nearby silence (ffmpeg `silencedetect`) so cuts don't land mid-word; `--no-snap` cuts at the exact selected times.
+- **Dubbing**: `assembly dub` re-voices an audio/video file in another language (`assembly --sandbox dub talk.mp4 --lang de`): diarized transcription, per-utterance LLM translation, streaming TTS per speaker, and an ffmpeg track-swap that leaves the video untouched. Sandbox-only today, like `speak`.
 - **Model evaluation**: `assembly eval` transcribes a Hugging Face dataset (with built-in aliases for common benchmarks: `assembly eval tedlium`) or a local `.csv`/`.jsonl` manifest and scores WER against its references — handy for picking a speech model.
 - **Starter apps**: `assembly init` scaffolds a self-contained FastAPI + HTML app (`audio-transcription`, `live-captions`, `voice-agent`); `assembly dev` runs it, `assembly share` exposes it on a public URL, and `assembly deploy` ships it to Vercel, Railway, or Fly.io.
 - **Webhook testing**: `assembly webhooks listen` opens a public dev URL (cloudflared quick tunnel) that prints webhook deliveries as they arrive and can forward them to your local app with `--forward-to`.
diff --git a/aai_cli/commands/dub.py b/aai_cli/commands/dub.py
new file mode 100644
index 00000000..1d3dc552
--- /dev/null
+++ b/aai_cli/commands/dub.py
@@ -0,0 +1,106 @@
+from __future__ import annotations
+
+from pathlib import Path
+
+import typer
+
+from aai_cli import dub_exec, help_panels, llm, options
+from aai_cli.context import run_command
+from aai_cli.help_text import examples_epilog
+
+app = typer.Typer()
+
+
+@app.command(
+    rich_help_panel=help_panels.TRANSCRIPTION,
+    # --sandbox is a root flag, so it must come before the subcommand in every example.
+    epilog=examples_epilog(
+        [
+            ("Dub a talk into German (sandbox only)", "assembly --sandbox dub talk.mp4 --lang de"),
+            ("Use a language name instead of a code", "assembly --sandbox dub talk.mp4 -l Spanish"),
+            (
+                "Dub every speaker with one voice",
+                "assembly --sandbox dub talk.mp4 -l fr --voice paul",
+            ),
+            (
+                "Pin a voice per diarized speaker",
+                "assembly --sandbox dub panel.mp4 -l de --voice A=jane --voice B=paul",
+            ),
+            (
+                "Reuse a finished transcript instead of re-transcribing",
+                "assembly --sandbox dub talk.mp4 -l de -t TRANSCRIPT_ID",
+            ),
+            (
+                "Choose the output file",
+                "assembly --sandbox dub talk.mp4 -l de --out talk-german.mp4",
+            ),
+        ]
+    ),
+)
+def dub(
+    ctx: typer.Context,
+    media: str = typer.Argument(
+        ...,
+        help="Local audio/video file to dub (the video stream is copied untouched).",
+    ),
+    lang: str = typer.Option(
+        ...,
+        "--lang",
+        "-l",
+        help="Target language: an ISO code (de, fr, es, …) or a language name (German).",
+    ),
+    transcript_id: str | None = typer.Option(
+        None,
+        "--transcript-id",
+        "-t",
+        help="Reuse an existing diarized transcript of this media instead of "
+        "transcribing it again.",
+    ),
+    voice: list[str] = typer.Option(
+        [],
+        "--voice",
+        help="Voice id for every speaker (e.g. jane, michael, paul), or SPEAKER=VOICE "
+        "to pin a diarized speaker (repeatable, e.g. --voice A=jane).",
+    ),
+    model: str = typer.Option(
+        llm.DEFAULT_MODEL,
+        "--model",
+        help="LLM Gateway model that translates the utterances.",
+        rich_help_panel=help_panels.OPT_LLM,
+        autocompletion=llm.complete_model,
+    ),
+    max_tokens: int = typer.Option(
+        llm.DEFAULT_MAX_TOKENS,
+        "--max-tokens",
+        help="Max tokens per utterance translation.",
+        rich_help_panel=help_panels.OPT_LLM,
+    ),
+    out: Path | None = typer.Option(
+        None, "--out", help="Output file (default: <name>.dub.<lang><ext> next to the input)."
+    ),
+    json_out: bool = options.json_option("Emit JSON describing the dubbed file."),
+) -> None:
+    """Dub a video or audio file into another language (sandbox only).
+
+    The whole platform in one command: the media is transcribed with diarized
+    utterance timestamps, each utterance is translated by an LLM Gateway model,
+    the translations are synthesized with streaming TTS (one voice per
+    speaker), and ffmpeg lays the new audio over the original — video copied
+    untouched. Streaming TTS only exists in the sandbox today — run it as
+    'assembly --sandbox dub' (--sandbox goes before the subcommand). Requires
+    ffmpeg.
+    """
+    opts = dub_exec.DubOptions(
+        media=media,
+        language=lang,
+        transcript_id=transcript_id,
+        voice=voice,
+        model=model,
+        max_tokens=max_tokens,
+        out=out,
+    )
+    run_command(
+        ctx,
+        lambda state, json_mode: dub_exec.run_dub(opts, state, json_mode=json_mode),
+        json=json_out,
+    )
diff --git a/aai_cli/dub_exec.py b/aai_cli/dub_exec.py
new file mode 100644
index 00000000..26040d49
--- /dev/null
+++ b/aai_cli/dub_exec.py
@@ -0,0 +1,420 @@
+"""Run logic for `assembly dub`: transcribe → translate → synthesize → ffmpeg track-swap.
+
+The command module (aai_cli/commands/dub.py) only parses argv — it builds a
+``DubOptions`` and hands it to ``run_dub`` via ``context.run_command`` (the
+options/run split, see AGENTS.md), so tests drive the whole pipeline by
+constructing options directly.
+
+The pipeline runs the platform end to end in one command: the media is
+transcribed with diarized utterance timestamps, each utterance is translated to
+the target language by an LLM Gateway model, each translation is synthesized
+with streaming TTS (one voice per speaker), the segments are laid out on a
+silence timeline at their original start times, and ffmpeg swaps the new track
+over the original media (video stream copied untouched). Streaming TTS only
+exists in the sandbox today, so — like `assembly speak` — the command is
+sandbox-only.
+"""
+
+from __future__ import annotations
+
+import re
+import shutil
+import subprocess
+import tempfile
+from dataclasses import dataclass
+from pathlib import Path
+
+import assemblyai as aai
+from rich.markup import escape
+
+from aai_cli import client, environments, jsonshape, output
+from aai_cli import llm as gateway
+from aai_cli.context import AppState
+from aai_cli.errors import APIError, CLIError, UsageError
+from aai_cli.tts import audio, dialogue, session
+from aai_cli.tts.session import SpeakConfig
+
+# ISO-639-1 codes accepted by --lang, mapped to the language *name* both the
+# translation prompt and the streaming-TTS `language` param expect. A value not
+# listed passes through as typed, so a full name ("German") — or an unlisted
+# language the gateway can translate to — still works.
+LANGUAGE_NAMES = {
+    "ar": "Arabic",
+    "de": "German",
+    "en": "English",
+    "es": "Spanish",
+    "fr": "French",
+    "hi": "Hindi",
+    "it": "Italian",
+    "ja": "Japanese",
+    "ko": "Korean",
+    "nl": "Dutch",
+    "pl": "Polish",
+    "pt": "Portuguese",
+    "ru": "Russian",
+    "tr": "Turkish",
+    "vi": "Vietnamese",
+    "zh": "Chinese",
+}
+
+# System prompt for the per-utterance translation calls. Length matters: the dub
+# replaces speech that occupied a fixed window, so the model is told to keep the
+# spoken length close to the original.
+TRANSLATION_SYSTEM_TEMPLATE = (
+    "You translate dialogue for dubbing. Translate the user's text to {language}. "
+    "Keep the meaning and register, and stay close to the original spoken length so "
+    "the dub fits the original timing. Reply with only the translated text — no "
+    "quotes, notes, or extra commentary."
+)
+
+
+@dataclass(frozen=True)
+class DubOptions:
+    """Every `assembly dub` flag as plain data (``--json`` excluded: run_command
+    resolves it into the ``json_mode`` argument)."""
+
+    media: str
+    language: str
+    transcript_id: str | None
+    voice: list[str]
+    model: str
+    max_tokens: int
+    out: Path | None
+
+
+def resolve_language(value: str) -> str:
+    """The target language name: an ISO code maps to its name, anything else
+    passes through as typed (the gateway accepts more languages than the map)."""
+    cleaned = value.strip()
+    if not cleaned:
+        raise UsageError(
+            "--lang needs a language.",
+            suggestion="Pass an ISO code (--lang de) or a language name (--lang German).",
+        )
+    return LANGUAGE_NAMES.get(cleaned.casefold(), cleaned)
+
+
+def default_out_path(media: Path, language: str) -> Path:
+    """The default output file: ``<stem>.dub.<lang><ext>`` next to the input."""
+    slug = re.sub(r"[^a-z0-9]+", "-", language.casefold()).strip("-")
+    return media.parent / f"{media.stem}.dub.{slug}{media.suffix}"
+
+
+def assemble_timeline(
+    placed: list[tuple[int, bytes]],
+    sample_rate: int,
+    total_seconds: float | None,
+) -> bytes:
+    """Lay each ``(start_ms, pcm)`` segment onto a silence timeline.
+
+    Gaps before a segment's start are filled with silence; a segment whose
+    predecessor overran its start time is appended immediately (the dub drifts
+    rather than dropping speech). The tail is padded out to ``total_seconds``
+    (the source duration) so the dubbed track never ends early.
+    """
+    pcm = bytearray()
+    for start_ms, segment in placed:
+        gap = start_ms / 1000 - _pcm_seconds(pcm, sample_rate)
+        if gap > 0:
+            pcm.extend(audio.silence(sample_rate, gap))
+        pcm.extend(segment)
+    if total_seconds is not None:
+        tail = total_seconds - _pcm_seconds(pcm, sample_rate)
+        if tail > 0:
+            pcm.extend(audio.silence(sample_rate, tail))
+    return bytes(pcm)
+
+
+def _pcm_seconds(pcm: bytes | bytearray, sample_rate: int) -> float:
+    """Seconds of audio in 16-bit mono PCM: two bytes per sample."""
+    return len(pcm) / 2 / sample_rate
+
+
+def _require_sandbox() -> None:
+    """`assembly dub` synthesizes with streaming TTS, which is sandbox-only today."""
+    if not session.is_available():
+        raise CLIError(
+            "assembly dub is only available in the sandbox (it uses streaming TTS).",
+            error_type="unsupported_environment",
+            exit_code=2,
+            suggestion="Re-run as: assembly --sandbox dub … "
+            f"(--sandbox goes before the command; or use --env {environments.SANDBOX_ENV}).",
+        )
+
+
+def _validate_media(media: Path) -> None:
+    """Reject a missing local source before credential resolution, so a typo'd
+    path reads as "file not found", never as a login prompt or an ffmpeg error."""
+    if not media.exists():
+        raise CLIError(
+            f"File not found: {media}",
+            error_type="file_not_found",
+            exit_code=2,
+            suggestion="Check the path. assembly dub needs a local audio/video file.",
+        )
+    if not media.is_file():
+        raise CLIError(
+            f"Not a file: {media}",
+            error_type="not_a_file",
+            exit_code=2,
+            suggestion="Pass a media file, not a directory.",
+        )
+
+
+def _validate_out(out: Path, media: Path) -> None:
+    """The dub must never overwrite its own input: ffmpeg would read and write the
+    same file concurrently, corrupting it."""
+    if out.resolve() == media.resolve():
+        raise UsageError(
+            "--out would overwrite the input file.",
+            suggestion="Pick a different output path.",
+        )
+
+
+def _require_ffmpeg() -> str:
+    """The ffmpeg executable; checked before any (billed) transcription work."""
+    path = shutil.which("ffmpeg")
+    if path is None:
+        raise CLIError(
+            "ffmpeg is required to write the dubbed file, but it isn't on PATH.",
+            error_type="missing_dependency",
+            suggestion="Install it (brew install ffmpeg / apt install ffmpeg) and re-run.",
+        )
+    return path
+
+
+def _run_ffmpeg(args: list[str]) -> subprocess.CompletedProcess[str]:
+    """Boundary seam for tests: one ffmpeg invocation, output captured."""
+    return subprocess.run(args, capture_output=True, text=True, check=False)
+
+
+def _mux(ffmpeg: str, media: Path, track: Path, out: Path) -> None:
+    """Swap ``track`` in as the audio of ``media``, writing ``out``.
+
+    ``-map 0:v?`` carries the video stream over untouched (``-c:v copy``) when
+    there is one, and maps nothing for audio-only input, so the same invocation
+    dubs both a video and a plain audio file. ``-y`` makes a re-run overwrite
+    its own earlier output instead of stalling on ffmpeg's prompt.
+    """
+    result = _run_ffmpeg(
+        [
+            ffmpeg,
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-y",
+            "-i",
+            str(media),
+            "-i",
+            str(track),
+            "-map",
+            "0:v?",
+            "-map",
+            "1:a",
+            "-c:v",
+            "copy",
+            str(out),
+        ]
+    )
+    if result.returncode != 0:
+        detail = result.stderr.strip().splitlines()
+        reason = detail[-1] if detail else f"ffmpeg exited with code {result.returncode}"
+        raise CLIError(
+            f"Could not write {out.name}: {reason}",
+            error_type="dub_failed",
+            suggestion="Check that the input is a readable audio/video file.",
+        )
+
+
+@dataclass(frozen=True)
+class _Utterance:
+    """One diarized utterance reduced to the fields the dub pipeline needs."""
+
+    start_ms: int
+    speaker: str
+    text: str
+
+
+def _resolve_transcript(
+    opts: DubOptions, media: Path, state: AppState, *, json_mode: bool
+) -> object:
+    """The diarized transcript driving the dub: fetched by id, or made fresh from
+    the (already local) media file — always with speaker labels, so each speaker
+    can keep a distinct voice in the dub."""
+    if opts.transcript_id is not None:
+        return client.get_transcript(state.resolve_api_key(), opts.transcript_id)
+    config = aai.TranscriptionConfig(speaker_labels=True)
+    api_key = state.resolve_api_key()
+    with output.status("Transcribing for dubbing…", json_mode=json_mode, quiet=state.quiet):
+        return client.transcribe(api_key, str(media), config=config)
+
+
+def _utterances_of(transcript: object) -> list[_Utterance]:
+    """The transcript's spoken utterances, with empty-text ones dropped."""
+    utterances = [
+        _Utterance(
+            start_ms=jsonshape.as_int(getattr(item, "start", 0)),
+            speaker=str(getattr(item, "speaker", None) or "A"),
+            text=str(getattr(item, "text", "") or "").strip(),
+        )
+        for item in jsonshape.object_list(getattr(transcript, "utterances", None))
+    ]
+    spoken = [utterance for utterance in utterances if utterance.text]
+    if not spoken:
+        transcript_id = str(getattr(transcript, "id", ""))
+        raise CLIError(
+            f"Transcript {transcript_id} has no utterances to dub.",
+            error_type="no_utterances",
+            exit_code=2,
+            suggestion=(
+                "Dubbing needs a diarized transcript. Pass a --transcript-id created "
+                "with --speaker-labels, or drop -t to let dub transcribe the file."
+            ),
+        )
+    return spoken
+
+
+def _total_seconds(transcript: object) -> float | None:
+    """The source duration in seconds (used to pad the dubbed track's tail)."""
+    duration = getattr(transcript, "audio_duration", None)
+    if isinstance(duration, int | float) and not isinstance(duration, bool):
+        return float(duration)
+    return None
+
+
+def _translate(
+    api_key: str,
+    utterances: list[_Utterance],
+    language: str,
+    opts: DubOptions,
+    *,
+    json_mode: bool,
+    quiet: bool,
+) -> list[str]:
+    """Translate each utterance to ``language`` with the LLM Gateway, in order.
+
+    One call per utterance keeps the translation↔timestamp alignment exact —
+    no reply-parsing step that could shift a line against its window.
+    """
+    system = TRANSLATION_SYSTEM_TEMPLATE.format(language=language)
+    translating = f"Translating {len(utterances)} utterance(s) to {language} with {opts.model}…"
+    translations: list[str] = []
+    with output.status(translating, json_mode=json_mode, quiet=quiet):
+        for index, utterance in enumerate(utterances, 1):
+            messages = gateway.build_messages(utterance.text, system=system)
+            response = gateway.complete(
+                api_key, model=opts.model, messages=messages, max_tokens=opts.max_tokens
+            )
+            translated = gateway.content_of(response).strip()
+            if not translated:
+                raise APIError(
+                    f"The model returned an empty translation for utterance {index} "
+                    f"({utterance.text[:50]!r})."
+                )
+            translations.append(translated)
+    return translations
+
+
+def _synthesize(
+    api_key: str,
+    segments: list[tuple[str, str]],
+    language: str,
+    *,
+    json_mode: bool,
+    quiet: bool,
+) -> tuple[list[bytes], int]:
+    """Synthesize each ``(voice, text)`` segment; returns the PCM list + sample rate.
+
+    Every segment must come back at one rate — the timeline math places segments
+    by sample position, so a mid-run rate change would silently shift timing.
+    """
+    synthesizing = f"Synthesizing {len(segments)} segment(s)…"
+    with output.status(synthesizing, json_mode=json_mode, quiet=quiet):
+        results = [
+            session.synthesize(
+                api_key,
+                SpeakConfig(text=text, voice=voice, language=language),
+                on_warning=lambda m: output.emit_warning(m, json_mode=json_mode),
+            )
+            for voice, text in segments
+        ]
+    rates = {result.sample_rate for result in results}
+    if len(rates) > 1:
+        raise APIError(f"TTS service returned mixed sample rates ({sorted(rates)}).")
+    # `segments` is never empty (_utterances_of raised otherwise), so results[0] exists.
+    return [result.pcm for result in results], results[0].sample_rate
+
+
+def _assign_voices(
+    utterances: list[_Utterance],
+    translations: list[str],
+    voice_values: list[str],
+) -> tuple[list[tuple[str, str]], dict[str, str]]:
+    """Resolve each translated utterance to ``(voice, text)`` plus the speaker→voice map.
+
+    A bare ``--voice`` dubs every speaker with that one voice; ``SPEAKER=VOICE``
+    mappings pin individual speakers; everyone else takes the rotation in
+    first-appearance order (the same rules as `assembly speak`).
+    """
+    bare_voice, overrides = dialogue.parse_voice_overrides(voice_values)
+    rotation = (bare_voice,) if bare_voice is not None else dialogue.DEFAULT_VOICE_ROTATION
+    segments = [
+        dialogue.Segment(utterance.speaker, translated)
+        # strict=True is an invariant guard only: _translate returns exactly one
+        # translation per utterance, so the lengths can never differ.
+        for utterance, translated in zip(utterances, translations, strict=True)  # pragma: no mutate
+    ]
+    return dialogue.assign_voices(segments, rotation, overrides)
+
+
+def run_dub(opts: DubOptions, state: AppState, *, json_mode: bool) -> None:
+    """Execute one `assembly dub` invocation from already-parsed flags."""
+    language = resolve_language(opts.language)
+    _require_sandbox()
+    media = Path(opts.media)
+    _validate_media(media)
+    out = opts.out if opts.out is not None else default_out_path(media, language)
+    _validate_out(out, media)
+    ffmpeg = _require_ffmpeg()
+
+    transcript = _resolve_transcript(opts, media, state, json_mode=json_mode)
+    transcript_id = str(getattr(transcript, "id", ""))
+    utterances = _utterances_of(transcript)
+    api_key = state.resolve_api_key()
+    translations = _translate(
+        api_key, utterances, language, opts, json_mode=json_mode, quiet=state.quiet
+    )
+    resolved, speakers = _assign_voices(utterances, translations, opts.voice)
+    pcm_segments, sample_rate = _synthesize(
+        api_key, resolved, language, json_mode=json_mode, quiet=state.quiet
+    )
+
+    # strict=True is an invariant guard only: _synthesize returns one PCM per segment.
+    starts = (u.start_ms for u in utterances)
+    placed = list(zip(starts, pcm_segments, strict=True))  # pragma: no mutate
+    track = assemble_timeline(placed, sample_rate, _total_seconds(transcript))
+    with tempfile.TemporaryDirectory(prefix="aai-dub-") as tmp:
+        wav = Path(tmp) / "dub.wav"
+        audio.write_wav(wav, track, sample_rate)
+        with output.status("Writing the dubbed file…", json_mode=json_mode, quiet=state.quiet):
+            _mux(ffmpeg, media, wav, out)
+
+    duration = round(_pcm_seconds(track, sample_rate), 3)
+    voices = ", ".join(f"{speaker}={voice}" for speaker, voice in speakers.items())
+    payload: dict[str, object] = {
+        "source": opts.media,
+        "out": str(out),
+        "language": language,
+        "transcript_id": transcript_id,
+        "utterances": len(utterances),
+        "speakers": speakers,
+        "sample_rate": sample_rate,
+        "audio_duration_seconds": duration,
+    }
+    output.emit(
+        payload,
+        lambda _: output.success(
+            f"{escape(str(out))}  dubbed to {language} ({len(utterances)} utterances, {voices})"
+        ),
+        json_mode=json_mode,
+    )
diff --git a/aai_cli/main.py b/aai_cli/main.py
index e3d9def5..001c2434 100644
--- a/aai_cli/main.py
+++ b/aai_cli/main.py
@@ -30,6 +30,7 @@
     dev,
     dictate,
     doctor,
+    dub,
     evaluate,
     init,
     keys,
@@ -72,6 +73,7 @@
     "speak",
     "llm",
     "clip",
+    "dub",
     "eval",
     "webhooks",
     # Setup & Tools — get set up & maintain
@@ -412,6 +414,7 @@ def main(
 app.add_typer(speak.app)
 app.add_typer(llm.app)
 app.add_typer(clip.app)
+app.add_typer(dub.app)
 app.add_typer(evaluate.app)  # eval
 app.add_typer(account.app)  # balance, usage, limits
 app.add_typer(login.app)  # login, logout, whoami
diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr
index 98c8b63d..b4476fdd 100644
--- a/tests/__snapshots__/test_snapshots_help_run.ambr
+++ b/tests/__snapshots__/test_snapshots_help_run.ambr
@@ -212,6 +212,68 @@
   
   
   
+  '''
+# ---
+# name: test_command_help_matches_snapshot[dub]
+  '''
+  
+   Usage: assembly dub [OPTIONS] MEDIA
+  
+   Dub a video or audio file into another language (sandbox only).
+  
+   The whole platform in one command: the media is transcribed with diarized
+   utterance timestamps, each utterance is translated by an LLM Gateway model,
+   the translations are synthesized with streaming TTS (one voice per
+   speaker), and ffmpeg lays the new audio over the original — video copied
+   untouched. Streaming TTS only exists in the sandbox today — run it as
+   'assembly --sandbox dub' (--sandbox goes before the subcommand). Requires
+   ffmpeg.
+  
+  ╭─ Arguments ──────────────────────────────────────────────────────────────────╮
+  │ *    media      TEXT  Local audio/video file to dub (the video stream is     │
+  │                       copied untouched).                                     │
+  │                       [required]                                             │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ Options ────────────────────────────────────────────────────────────────────╮
+  │ *  --lang           -l      TEXT  Target language: an ISO code (de, fr, es,  │
+  │                                   …) or a language name (German).            │
+  │                                   [required]                                 │
+  │    --transcript-id  -t      TEXT  Reuse an existing diarized transcript of   │
+  │                                   this media instead of transcribing it      │
+  │                                   again.                                     │
+  │    --voice                  TEXT  Voice id for every speaker (e.g. jane,     │
+  │                                   michael, paul), or SPEAKER=VOICE to pin a  │
+  │                                   diarized speaker (repeatable, e.g. --voice │
+  │                                   A=jane).                                   │
+  │    --out                    PATH  Output file (default:                      │
+  │                                   <name>.dub.<lang><ext> next to the input). │
+  │    --json           -j            Emit JSON describing the dubbed file.      │
+  │    --help                         Show this message and exit.                │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  ╭─ LLM Transform ──────────────────────────────────────────────────────────────╮
+  │ --model             TEXT     LLM Gateway model that translates the           │
+  │                              utterances.                                     │
+  │                              [default: claude-haiku-4-5-20251001]            │
+  │ --max-tokens        INTEGER  Max tokens per utterance translation.           │
+  │                              [default: 1000]                                 │
+  ╰──────────────────────────────────────────────────────────────────────────────╯
+  
+   Examples
+   Dub a talk into German (sandbox only)
+   $ assembly --sandbox dub talk.mp4 --lang de
+   Use a language name instead of a code
+   $ assembly --sandbox dub talk.mp4 -l Spanish
+   Dub every speaker with one voice
+   $ assembly --sandbox dub talk.mp4 -l fr --voice paul
+   Pin a voice per diarized speaker
+   $ assembly --sandbox dub panel.mp4 -l de --voice A=jane --voice B=paul
+   Reuse a finished transcript instead of re-transcribing
+   $ assembly --sandbox dub talk.mp4 -l de -t TRANSCRIPT_ID
+   Choose the output file
+   $ assembly --sandbox dub talk.mp4 -l de --out talk-german.mp4
+  
+  
+  
   '''
 # ---
 # name: test_command_help_matches_snapshot[eval]
diff --git a/tests/_dub_helpers.py b/tests/_dub_helpers.py
new file mode 100644
index 00000000..d1670d9e
--- /dev/null
+++ b/tests/_dub_helpers.py
@@ -0,0 +1,129 @@
+"""Shared builders for the `assembly dub` test modules.
+
+The dub suite is split across test_dub_exec.py (pure helpers + validation),
+test_dub_pipeline.py (the faked transcribe → translate → synthesize → mux
+runs), and test_dub_command.py (argv parsing); the option defaults, transcript
+fakes, and boundary recorders they share live here.
+"""
+
+from __future__ import annotations
+
+import re
+import subprocess
+import wave
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from aai_cli import client, config, dub_exec, llm
+from aai_cli.dub_exec import DubOptions
+from aai_cli.tts import session
+from aai_cli.tts.session import SpeakResult
+
+# The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace.
+DEFAULTS = DubOptions(
+    media="talk.mp4",
+    language="de",
+    transcript_id=None,
+    voice=[],
+    model=llm.DEFAULT_MODEL,
+    max_tokens=llm.DEFAULT_MAX_TOKENS,
+    out=None,
+)
+
+SAMPLE_RATE = 100  # tiny rate keeps the timeline byte math exact and readable
+
+_ANSI_SGR = re.compile(r"\x1b\[[0-9;]*m")
+
+
+def plain(text: str) -> str:
+    """Strip SGR color codes (CI forces color on, splitting flags like --lang
+    with style sequences) for substring assertions."""
+    return _ANSI_SGR.sub("", text)
+
+
+def utterance(start, speaker, text):
+    return SimpleNamespace(start=start, end=None, speaker=speaker, text=text)
+
+
+def fake_transcript(utterances, *, audio_duration=5):
+    return SimpleNamespace(id="tr_dub", utterances=utterances, audio_duration=audio_duration)
+
+
+def completion(text):
+    """The slice of an OpenAI ChatCompletion that gateway.content_of reads."""
+    return SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content=text))])
+
+
+def write_media(tmp_path: Path) -> Path:
+    path = tmp_path / "talk.mp4"
+    path.write_bytes(b"\x00fake-media")
+    return path
+
+
+def enable_sandbox(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(session, "is_available", lambda: True)
+
+
+def patch_api_key(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(config, "resolve_api_key", lambda **_: "test-key")
+
+
+def record_transcribe(monkeypatch: pytest.MonkeyPatch) -> dict[str, object]:
+    """Record the transcription request and return a two-speaker transcript."""
+    calls: dict[str, object] = {}
+
+    def _fake(api_key, audio, *, config):
+        calls["api_key"] = api_key
+        calls["audio"] = audio
+        calls["config"] = config
+        return fake_transcript([utterance(1000, "A", "Hello."), utterance(3000, "B", "World.")])
+
+    monkeypatch.setattr(client, "transcribe", _fake)
+    return calls
+
+
+def record_translate(monkeypatch: pytest.MonkeyPatch) -> list[dict[str, object]]:
+    """Record each gateway call and reply with a marked 'DE:<text>' translation."""
+    calls: list[dict[str, object]] = []
+
+    def _fake(api_key, *, model, messages, max_tokens=llm.DEFAULT_MAX_TOKENS, transcript_id=None):
+        calls.append({"model": model, "messages": messages, "max_tokens": max_tokens})
+        return completion(f"DE:{messages[-1]['content']}")
+
+    monkeypatch.setattr(llm, "complete", _fake)
+    return calls
+
+
+def record_synthesize(monkeypatch: pytest.MonkeyPatch) -> list[object]:
+    """Record each TTS request; segment i comes back as 100 bytes of 0xA1+i."""
+    calls: list[object] = []
+
+    def _fake(api_key, cfg, *, connect=None, on_warning=None):
+        calls.append(cfg)
+        pcm = bytes([0xA0 + len(calls)]) * 100
+        return SpeakResult(pcm=pcm, sample_rate=SAMPLE_RATE, audio_duration_seconds=0.5)
+
+    monkeypatch.setattr(session, "synthesize", _fake)
+    return calls
+
+
+def record_ffmpeg(monkeypatch: pytest.MonkeyPatch) -> dict[str, object]:
+    """Resolve ffmpeg and record the invocation plus the WAV it was handed.
+
+    The temp WAV is deleted right after the mux, so its contents are captured
+    here, while the file still exists.
+    """
+    monkeypatch.setattr("shutil.which", lambda name: f"/usr/bin/{name}")
+    recorded: dict[str, object] = {}
+
+    def run(args: list[str]) -> subprocess.CompletedProcess[str]:
+        recorded["args"] = args
+        with wave.open(args[8], "rb") as wav:  # args[8] is the dub.wav input
+            recorded["wav_params"] = wav.getparams()
+            recorded["wav_frames"] = wav.readframes(wav.getnframes())
+        return subprocess.CompletedProcess(args=args, returncode=0, stdout="", stderr="")
+
+    monkeypatch.setattr(dub_exec, "_run_ffmpeg", run)
+    return recorded
diff --git a/tests/_snapshot_surface.py b/tests/_snapshot_surface.py
index 8d0c26b9..447a63d1 100644
--- a/tests/_snapshot_surface.py
+++ b/tests/_snapshot_surface.py
@@ -24,7 +24,18 @@
 HELP_GROUPS: dict[str, frozenset[str]] = {
     "build": frozenset({"onboard", "init", "dev", "share", "deploy"}),
     "run": frozenset(
-        {"transcribe", "stream", "dictate", "agent", "speak", "llm", "clip", "eval", "webhooks"}
+        {
+            "transcribe",
+            "stream",
+            "dictate",
+            "agent",
+            "speak",
+            "llm",
+            "clip",
+            "dub",
+            "eval",
+            "webhooks",
+        }
     ),
     "tools": frozenset({"doctor", "setup", "telemetry", "_update-check"}),
     "history": frozenset({"transcripts", "sessions"}),
diff --git a/tests/test_dub_command.py b/tests/test_dub_command.py
new file mode 100644
index 00000000..b30cb784
--- /dev/null
+++ b/tests/test_dub_command.py
@@ -0,0 +1,97 @@
+"""Argv parsing tests for `assembly dub` (aai_cli/commands/dub.py): the command
+module only builds a DubOptions and hands it to dub_exec.run_dub, so these
+tests pin the flag -> options mapping and the end-to-end sandbox guard; the
+pipeline itself is covered in test_dub_exec.py."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import pytest
+from typer.testing import CliRunner
+
+from aai_cli import dub_exec, llm
+from aai_cli.main import app
+from tests._dub_helpers import plain
+
+runner = CliRunner()
+
+
+@pytest.fixture
+def captured_run(monkeypatch: pytest.MonkeyPatch):
+    """Capture the (opts, json_mode) the command hands to run_dub."""
+    seen: dict[str, object] = {}
+
+    def fake_run(opts, state, *, json_mode):
+        seen["opts"] = opts
+        seen["json_mode"] = json_mode
+
+    monkeypatch.setattr(dub_exec, "run_dub", fake_run)
+    return seen
+
+
+def test_lang_is_required():
+    result = runner.invoke(app, ["dub", "talk.mp4"])
+    assert result.exit_code == 2
+    assert "--lang" in plain(result.output)
+
+
+def test_production_env_is_rejected_with_sandbox_hint():
+    result = runner.invoke(app, ["dub", "talk.mp4", "--lang", "de"])  # default = production
+    assert result.exit_code == 2
+    output = plain(result.output)
+    assert "only available in the sandbox" in output
+    # The suggestion spells out the exact corrected invocation: --sandbox is a root
+    # flag, so it must go before the command, not after it.
+    assert "Re-run as: assembly --sandbox dub" in output
+
+
+def test_defaults_map_to_options(captured_run):
+    result = runner.invoke(app, ["dub", "talk.mp4", "--lang", "de"])
+    assert result.exit_code == 0
+    assert captured_run["json_mode"] is False
+    assert captured_run["opts"] == dub_exec.DubOptions(
+        media="talk.mp4",
+        language="de",
+        transcript_id=None,
+        voice=[],
+        model=llm.DEFAULT_MODEL,
+        max_tokens=llm.DEFAULT_MAX_TOKENS,
+        out=None,
+    )
+
+
+def test_every_flag_maps_to_options(captured_run):
+    result = runner.invoke(
+        app,
+        [
+            "dub",
+            "talk.mp4",
+            "--lang",
+            "German",
+            "-t",
+            "tr_1",
+            "--voice",
+            "A=jane",
+            "--voice",
+            "paul",
+            "--model",
+            "gpt-5",
+            "--max-tokens",
+            "7",
+            "--out",
+            "dubbed.mp4",
+            "--json",
+        ],
+    )
+    assert result.exit_code == 0
+    assert captured_run["json_mode"] is True
+    assert captured_run["opts"] == dub_exec.DubOptions(
+        media="talk.mp4",
+        language="German",
+        transcript_id="tr_1",
+        voice=["A=jane", "paul"],
+        model="gpt-5",
+        max_tokens=7,
+        out=Path("dubbed.mp4"),
+    )
diff --git a/tests/test_dub_exec.py b/tests/test_dub_exec.py
new file mode 100644
index 00000000..99cf99fc
--- /dev/null
+++ b/tests/test_dub_exec.py
@@ -0,0 +1,238 @@
+"""Direct tests of the `assembly dub` options/run seam (aai_cli/dub_exec.py):
+the pure helpers (language resolution, output naming, timeline assembly,
+utterance extraction) and run_dub's validation order. Constructed-options
+tests (dataclasses.replace off the shared defaults) avoid any argv
+round-trip. The faked pipeline runs live in test_dub_pipeline.py; argv
+parsing in test_dub_command.py."""
+
+from __future__ import annotations
+
+import dataclasses
+import sys
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from aai_cli import dub_exec
+from aai_cli.context import AppState
+from aai_cli.errors import CLIError, UsageError
+from tests._dub_helpers import (
+    DEFAULTS,
+    enable_sandbox,
+    fake_transcript,
+    patch_api_key,
+    utterance,
+    write_media,
+)
+
+
+@pytest.fixture
+def media(tmp_path: Path) -> Path:
+    return write_media(tmp_path)
+
+
+@pytest.fixture
+def sandbox(monkeypatch: pytest.MonkeyPatch):
+    enable_sandbox(monkeypatch)
+
+
+@pytest.fixture(autouse=True)
+def _fake_key(monkeypatch: pytest.MonkeyPatch):
+    patch_api_key(monkeypatch)
+
+
+# --- records and pure helpers --------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "instance",
+    [DEFAULTS, dub_exec._Utterance(start_ms=0, speaker="A", text="hi")],
+    ids=["options", "utterance"],
+)
+def test_records_are_immutable(instance):
+    field_name = dataclasses.fields(instance)[0].name
+    with pytest.raises(dataclasses.FrozenInstanceError):
+        setattr(instance, field_name, None)
+
+
+def test_language_names_map_codes_to_names():
+    # An independent copy of the expected table: a silently edited entry in the
+    # shipped map must fail here, not just round-trip through itself.
+    assert dub_exec.LANGUAGE_NAMES == {
+        "ar": "Arabic",
+        "de": "German",
+        "en": "English",
+        "es": "Spanish",
+        "fr": "French",
+        "hi": "Hindi",
+        "it": "Italian",
+        "ja": "Japanese",
+        "ko": "Korean",
+        "nl": "Dutch",
+        "pl": "Polish",
+        "pt": "Portuguese",
+        "ru": "Russian",
+        "tr": "Turkish",
+        "vi": "Vietnamese",
+        "zh": "Chinese",
+    }
+
+
+@pytest.mark.parametrize(
+    ("value", "expected"),
+    [
+        ("de", "German"),
+        (" DE ", "German"),  # codes are trimmed and case-insensitive
+        ("German", "German"),  # a full name passes through
+        (" Klingon ", "Klingon"),  # unlisted languages pass through, trimmed
+    ],
+)
+def test_resolve_language(value, expected):
+    assert dub_exec.resolve_language(value) == expected
+
+
+def test_resolve_language_rejects_blank():
+    with pytest.raises(UsageError) as exc:
+        dub_exec.resolve_language("   ")
+    assert "--lang needs a language" in exc.value.message
+    assert "--lang de" in (exc.value.suggestion or "")
+
+
+@pytest.mark.parametrize(
+    ("language", "expected"),
+    [
+        ("German", "talk.dub.german.mp4"),
+        ("Brazilian Portuguese", "talk.dub.brazilian-portuguese.mp4"),
+    ],
+)
+def test_default_out_path(language, expected):
+    out = dub_exec.default_out_path(Path("/x/talk.mp4"), language)
+    assert out == Path("/x") / expected
+
+
+def test_assemble_timeline_fills_gaps_and_pads_tail():
+    # rate 1000: one second of 16-bit mono PCM is 2000 bytes.
+    track = dub_exec.assemble_timeline([(500, b"\x01\x02")], 1000, total_seconds=1.0)
+    # 0.5 s leading silence, the segment, then a 0.499 s tail pad to 1.0 s.
+    assert track == b"\x00" * 1000 + b"\x01\x02" + b"\x00" * 998
+
+
+def test_assemble_timeline_overlap_appends_without_silence():
+    # The first segment runs to 0.1 s; the second "starts" at 0.05 s, so it is
+    # appended immediately (the dub drifts) rather than overlapping or crashing.
+    placed = [(0, b"\x01" * 200), (50, b"\x02\x02")]
+    track = dub_exec.assemble_timeline(placed, 1000, total_seconds=None)
+    assert track == b"\x01" * 200 + b"\x02\x02"
+
+
+def test_assemble_timeline_skips_tail_when_track_is_long_enough():
+    track = dub_exec.assemble_timeline([(0, b"\x01" * 200)], 1000, total_seconds=0.05)
+    assert track == b"\x01" * 200
+
+
+def test_utterances_of_defaults_and_filtering():
+    transcript = fake_transcript(
+        [
+            SimpleNamespace(speaker=None, text="Hi"),  # no start attr, no speaker label
+            utterance(2000, "B", None),  # no text -> dropped
+            utterance(3000, "B", "   "),  # blank text -> dropped
+            utterance(4000, "C", "  Bye  "),
+        ]
+    )
+    assert dub_exec._utterances_of(transcript) == [
+        dub_exec._Utterance(start_ms=0, speaker="A", text="Hi"),
+        dub_exec._Utterance(start_ms=4000, speaker="C", text="Bye"),
+    ]
+
+
+@pytest.mark.parametrize(
+    "utterances",
+    [None, [], [utterance(0, "A", "")]],
+    ids=["missing", "empty", "all-blank"],
+)
+def test_utterances_of_requires_spoken_utterances(utterances):
+    with pytest.raises(CLIError) as exc:
+        dub_exec._utterances_of(SimpleNamespace(id="tr_x", utterances=utterances))
+    assert exc.value.error_type == "no_utterances"
+    assert exc.value.exit_code == 2
+    assert "Transcript tr_x has no utterances to dub" in exc.value.message
+    assert "--speaker-labels" in (exc.value.suggestion or "")
+
+
+@pytest.mark.parametrize(
+    ("duration", "expected"),
+    [(12, 12.0), (4.5, 4.5), (None, None), (True, None), ("90", None)],
+    ids=["int", "float", "none", "bool", "str"],
+)
+def test_total_seconds(duration, expected):
+    transcript = SimpleNamespace(audio_duration=duration)
+    assert dub_exec._total_seconds(transcript) == expected
+
+
+def test_run_ffmpeg_captures_output_and_does_not_raise():
+    # The real boundary (not the fake): output is captured as text and a non-zero
+    # exit must not raise — _mux turns the exit code into a CLIError itself.
+    result = dub_exec._run_ffmpeg(
+        [
+            sys.executable,
+            "-c",
+            "import sys; print('out'); print('err', file=sys.stderr); sys.exit(3)",
+        ]
+    )
+    assert result.returncode == 3
+    assert result.stdout == "out\n"
+    assert result.stderr == "err\n"
+
+
+# --- validation order (cheap local checks before any credential or network) ----
+
+
+def test_run_dub_rejects_blank_language_first():
+    opts = dataclasses.replace(DEFAULTS, language=" ")
+    with pytest.raises(UsageError):  # not the sandbox CLIError: language wins
+        dub_exec.run_dub(opts, AppState(), json_mode=False)
+
+
+def test_run_dub_requires_sandbox():
+    # The active environment defaults to production, which has no streaming-TTS host.
+    with pytest.raises(CLIError) as exc:
+        dub_exec.run_dub(DEFAULTS, AppState(), json_mode=False)
+    assert exc.value.error_type == "unsupported_environment"
+    assert exc.value.exit_code == 2
+    assert "only available in the sandbox" in exc.value.message
+    assert "Re-run as: assembly --sandbox dub" in (exc.value.suggestion or "")
+
+
+def test_run_dub_rejects_missing_file(sandbox, tmp_path):
+    opts = dataclasses.replace(DEFAULTS, media=str(tmp_path / "nope.mp4"))
+    with pytest.raises(CLIError) as exc:
+        dub_exec.run_dub(opts, AppState(), json_mode=False)
+    assert exc.value.error_type == "file_not_found"
+    assert exc.value.exit_code == 2
+    assert "local audio/video file" in (exc.value.suggestion or "")
+
+
+def test_run_dub_rejects_directory(sandbox, tmp_path):
+    opts = dataclasses.replace(DEFAULTS, media=str(tmp_path))
+    with pytest.raises(CLIError) as exc:
+        dub_exec.run_dub(opts, AppState(), json_mode=False)
+    assert exc.value.error_type == "not_a_file"
+    assert exc.value.exit_code == 2
+    assert "not a directory" in (exc.value.suggestion or "")
+
+
+def test_run_dub_refuses_to_overwrite_the_input(sandbox, media):
+    opts = dataclasses.replace(DEFAULTS, media=str(media), out=media)
+    with pytest.raises(UsageError) as exc:
+        dub_exec.run_dub(opts, AppState(), json_mode=False)
+    assert "overwrite the input file" in exc.value.message
+
+
+def test_run_dub_requires_ffmpeg(sandbox, media, monkeypatch):
+    monkeypatch.setattr("shutil.which", lambda name: None)
+    opts = dataclasses.replace(DEFAULTS, media=str(media))
+    with pytest.raises(CLIError) as exc:
+        dub_exec.run_dub(opts, AppState(), json_mode=False)
+    assert exc.value.error_type == "missing_dependency"
+    assert "ffmpeg" in exc.value.message
diff --git a/tests/test_dub_pipeline.py b/tests/test_dub_pipeline.py
new file mode 100644
index 00000000..78990d0b
--- /dev/null
+++ b/tests/test_dub_pipeline.py
@@ -0,0 +1,281 @@
+"""Faked end-to-end runs of the `assembly dub` pipeline (aai_cli/dub_exec.py):
+the transcribe → translate → synthesize → ffmpeg mux orchestration, voice
+assignment, and the failure modes of each boundary. The LLM Gateway, streaming
+TTS, and ffmpeg are faked at the modules dub_exec calls into (`llm.complete`,
+`session.synthesize`, `client.transcribe`) and at `dub_exec._run_ffmpeg`; the
+pure helpers and validation order live in test_dub_exec.py."""
+
+from __future__ import annotations
+
+import dataclasses
+import json
+import subprocess
+from pathlib import Path
+from types import SimpleNamespace
+
+import pytest
+
+from aai_cli import client, dub_exec, llm
+from aai_cli.context import AppState
+from aai_cli.errors import APIError, CLIError
+from aai_cli.tts import session
+from aai_cli.tts.session import SpeakResult
+from tests._dub_helpers import (
+    DEFAULTS,
+    SAMPLE_RATE,
+    completion,
+    enable_sandbox,
+    fake_transcript,
+    patch_api_key,
+    plain,
+    record_ffmpeg,
+    record_synthesize,
+    record_transcribe,
+    record_translate,
+    utterance,
+    write_media,
+)
+
+
+@pytest.fixture
+def media(tmp_path: Path) -> Path:
+    return write_media(tmp_path)
+
+
+@pytest.fixture(autouse=True)
+def _sandbox_and_key(monkeypatch: pytest.MonkeyPatch):
+    enable_sandbox(monkeypatch)
+    patch_api_key(monkeypatch)
+
+
+@pytest.fixture
+def fake_transcribe(monkeypatch: pytest.MonkeyPatch):
+    return record_transcribe(monkeypatch)
+
+
+@pytest.fixture
+def fake_translate(monkeypatch: pytest.MonkeyPatch):
+    return record_translate(monkeypatch)
+
+
+@pytest.fixture
+def fake_synthesize(monkeypatch: pytest.MonkeyPatch):
+    return record_synthesize(monkeypatch)
+
+
+@pytest.fixture
+def fake_ffmpeg(monkeypatch: pytest.MonkeyPatch):
+    return record_ffmpeg(monkeypatch)
+
+
+def _run(opts, *, json_mode):
+    dub_exec.run_dub(opts, AppState(), json_mode=json_mode)
+
+
+def test_run_dub_pipeline_end_to_end(
+    media, fake_transcribe, fake_translate, fake_synthesize, fake_ffmpeg, capsys
+):
+    opts = dataclasses.replace(DEFAULTS, media=str(media))
+    _run(opts, json_mode=True)
+
+    # Transcription: the local file, diarized so speakers keep distinct voices.
+    assert fake_transcribe["audio"] == str(media)
+    assert fake_transcribe["config"].speaker_labels is True
+
+    # Translation: one gateway call per utterance, in order, with the dubbing
+    # system prompt naming the resolved language ("de" -> "German").
+    assert [c["messages"][-1]["content"] for c in fake_translate] == ["Hello.", "World."]
+    for call in fake_translate:
+        assert call["model"] == llm.DEFAULT_MODEL
+        assert call["max_tokens"] == llm.DEFAULT_MAX_TOKENS
+        system = call["messages"][0]
+        assert system["role"] == "system"
+        assert "dubbing" in system["content"]
+        assert "German" in system["content"]
+
+    # Synthesis: the translated text, rotation voices in speaker order, target language.
+    assert [(cfg.voice, cfg.text) for cfg in fake_synthesize] == [
+        ("jane", "DE:Hello."),
+        ("michael", "DE:World."),
+    ]
+    assert all(cfg.language == "German" for cfg in fake_synthesize)
+
+    # The dubbed track: silence to 1.0 s, segment 1, silence to 3.0 s, segment 2,
+    # then a tail pad out to the source's 5 s duration (rate 100 -> 200 bytes/s).
+    expected_track = b"\x00" * 200 + b"\xa1" * 100 + b"\x00" * 300 + b"\xa2" * 100 + b"\x00" * 300
+    assert fake_ffmpeg["wav_frames"] == expected_track
+    params = fake_ffmpeg["wav_params"]
+    assert (params.nchannels, params.sampwidth, params.framerate) == (1, 2, SAMPLE_RATE)
+
+    # The mux: video copied, WAV swapped in as the only audio, default out path.
+    out = media.parent / "talk.dub.german.mp4"
+    wav_path = fake_ffmpeg["args"][8]
+    assert fake_ffmpeg["args"] == [
+        "/usr/bin/ffmpeg",
+        "-hide_banner",
+        "-loglevel",
+        "error",
+        "-y",
+        "-i",
+        str(media),
+        "-i",
+        wav_path,
+        "-map",
+        "0:v?",
+        "-map",
+        "1:a",
+        "-c:v",
+        "copy",
+        str(out),
+    ]
+    assert wav_path.endswith("dub.wav")
+
+    payload = json.loads(capsys.readouterr().out)
+    assert payload == {
+        "source": str(media),
+        "out": str(out),
+        "language": "German",
+        "transcript_id": "tr_dub",
+        "utterances": 2,
+        "speakers": {"A": "jane", "B": "michael"},
+        "sample_rate": SAMPLE_RATE,
+        "audio_duration_seconds": 5.0,
+    }
+
+
+def test_run_dub_human_summary(
+    media, fake_transcribe, fake_translate, fake_synthesize, fake_ffmpeg, capsys
+):
+    # A short --out keeps the one-line summary under the 80-column console width:
+    # with the default (tmp_path-prefixed) out path, Rich would hard-wrap the line
+    # mid-word and these substring asserts would depend on where the break lands.
+    opts = dataclasses.replace(DEFAULTS, media=str(media), out=Path("dub.de.mp4"))
+    _run(opts, json_mode=False)
+    # plain(): under FORCE_COLOR (CI) Rich's repr highlighter interleaves style
+    # codes inside the line ("(2 utterances" renders with the 2 colored).
+    out = plain(capsys.readouterr().out)
+    assert "dub.de.mp4" in out
+    assert "dubbed to German" in out
+    assert "2 utterances" in out
+    assert "A=jane, B=michael" in out
+
+
+def test_bare_voice_dubs_every_speaker(
+    media, fake_transcribe, fake_translate, fake_synthesize, fake_ffmpeg
+):
+    opts = dataclasses.replace(DEFAULTS, media=str(media), voice=["paul"])
+    _run(opts, json_mode=True)
+    assert [cfg.voice for cfg in fake_synthesize] == ["paul", "paul"]
+
+
+def test_voice_overrides_pin_speakers_without_consuming_rotation(
+    media, fake_transcribe, fake_translate, fake_synthesize, fake_ffmpeg
+):
+    opts = dataclasses.replace(DEFAULTS, media=str(media), voice=["A=mary"])
+    _run(opts, json_mode=True)
+    # A is pinned; B still takes the first rotation voice (overrides don't consume slots).
+    assert [cfg.voice for cfg in fake_synthesize] == ["mary", "jane"]
+
+
+def test_transcript_id_reuses_existing_transcript(
+    media, fake_translate, fake_ffmpeg, monkeypatch, capsys
+):
+    fetched: dict[str, str] = {}
+
+    def get_transcript(api_key, transcript_id):
+        fetched["id"] = transcript_id
+        return SimpleNamespace(
+            id=transcript_id,
+            utterances=[utterance(0, "A", "Hello.")],
+            audio_duration=None,  # duration unknown -> no tail pad
+        )
+
+    monkeypatch.setattr(client, "get_transcript", get_transcript)
+    monkeypatch.setattr(
+        client,
+        "transcribe",
+        lambda *a, **k: pytest.fail("must not re-transcribe with --transcript-id"),
+    )
+    monkeypatch.setattr(
+        session,
+        "synthesize",
+        lambda api_key, cfg, **_: SpeakResult(
+            pcm=b"\xaa" * 2000, sample_rate=300, audio_duration_seconds=0.0
+        ),
+    )
+
+    opts = dataclasses.replace(DEFAULTS, media=str(media), transcript_id="tr_99")
+    _run(opts, json_mode=True)
+    assert fetched["id"] == "tr_99"
+    payload = json.loads(capsys.readouterr().out)
+    assert payload["transcript_id"] == "tr_99"
+    # 1000 samples at 300 Hz, rounded to milliseconds: 3.3333... -> 3.333.
+    assert payload["audio_duration_seconds"] == 3.333
+
+
+def test_empty_translation_is_an_api_error(media, fake_synthesize, fake_ffmpeg, monkeypatch):
+    long_text = "a" * 50 + "TAIL!"
+    transcript = fake_transcript([utterance(0, "A", "Hello."), utterance(1000, "B", long_text)])
+    monkeypatch.setattr(client, "transcribe", lambda *a, **k: transcript)
+    replies = iter(["Hallo.", "   "])
+    monkeypatch.setattr(llm, "complete", lambda *a, **k: completion(next(replies)))
+
+    opts = dataclasses.replace(DEFAULTS, media=str(media))
+    with pytest.raises(APIError) as exc:
+        _run(opts, json_mode=False)
+    # The 1-based index and the (50-char) text preview pin which utterance failed.
+    assert f"empty translation for utterance 2 ({'a' * 50!r})." in exc.value.message
+
+
+def test_mixed_sample_rates_are_an_api_error(
+    media, fake_transcribe, fake_translate, fake_ffmpeg, monkeypatch
+):
+    rates = iter([100, 200])
+    monkeypatch.setattr(
+        session,
+        "synthesize",
+        lambda api_key, cfg, **_: SpeakResult(
+            pcm=b"\x01\x02", sample_rate=next(rates), audio_duration_seconds=0.0
+        ),
+    )
+    opts = dataclasses.replace(DEFAULTS, media=str(media))
+    with pytest.raises(APIError) as exc:
+        _run(opts, json_mode=False)
+    assert "mixed sample rates ([100, 200])" in exc.value.message
+
+
+def test_ffmpeg_failure_reports_last_stderr_line(
+    media, fake_transcribe, fake_translate, fake_synthesize, monkeypatch
+):
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/ffmpeg")
+    monkeypatch.setattr(
+        dub_exec,
+        "_run_ffmpeg",
+        lambda args: subprocess.CompletedProcess(
+            args=args, returncode=1, stdout="", stderr="noise\nInvalid data found\n"
+        ),
+    )
+    opts = dataclasses.replace(DEFAULTS, media=str(media))
+    with pytest.raises(CLIError) as exc:
+        _run(opts, json_mode=False)
+    assert exc.value.error_type == "dub_failed"
+    assert "Could not write talk.dub.german.mp4" in exc.value.message
+    # The last stderr line is the reason ffmpeg gives; earlier noise is dropped.
+    assert "Invalid data found" in exc.value.message
+    assert "noise" not in exc.value.message
+    assert "readable audio/video file" in (exc.value.suggestion or "")
+
+
+def test_ffmpeg_silent_failure_reports_exit_code(
+    media, fake_transcribe, fake_translate, fake_synthesize, monkeypatch
+):
+    monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/ffmpeg")
+    monkeypatch.setattr(
+        dub_exec,
+        "_run_ffmpeg",
+        lambda args: subprocess.CompletedProcess(args=args, returncode=3, stdout="", stderr=""),
+    )
+    opts = dataclasses.replace(DEFAULTS, media=str(media))
+    with pytest.raises(CLIError) as exc:
+        _run(opts, json_mode=False)
+    assert "ffmpeg exited with code 3" in exc.value.message
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
index 48e5bc79..bc16fee1 100644
--- a/tests/test_smoke.py
+++ b/tests/test_smoke.py
@@ -158,6 +158,7 @@ def test_help_lists_commands_in_workflow_order():
         "speak",
         "llm",
         "clip",
+        "dub",
         "eval",
         "webhooks",
         # Setup & Tools