Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion aai_cli/commands/dub.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@
"Dub the full video from YouTube",
'assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video',
),
(
"Dub only the first 15 minutes of a YouTube video",
'assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video '
'--download-sections "*0:00-15:00"',
),
(
"Dub every speaker with one voice",
"assembly --sandbox dub talk.mp4 -l fr --voice paul",
Expand Down Expand Up @@ -90,6 +95,13 @@ def dub(
help="Download the full video (not just the audio track) for a URL source, "
"so the dub keeps the picture. Local files keep their video already.",
),
download_sections: list[str] = typer.Option(
[],
"--download-sections",
help="For a URL source, download (and dub) only part of it (yt-dlp "
'"--download-sections" syntax, e.g. "*0:00-15:00" for the first fifteen '
"minutes; repeatable).",
),
json_out: bool = options.json_option("Emit JSON describing the dubbed file."),
) -> None:
"""Dub a video or audio file into another language (sandbox only).
Expand All @@ -99,7 +111,8 @@ def dub(
the translations are synthesized with streaming TTS (one voice per
speaker), and ffmpeg lays the new audio over the original — video copied
untouched. A YouTube/media-page URL is downloaded first (audio only, or
the full video with --video). Streaming TTS only exists in the sandbox
the full video with --video; --download-sections fetches and dubs only a
time slice of it). Streaming TTS only exists in the sandbox
today — run it as 'assembly --sandbox dub' (--sandbox goes before the
subcommand). Requires ffmpeg.
"""
Expand All @@ -112,6 +125,7 @@ def dub(
max_tokens=max_tokens,
out=out,
video=video,
download_sections=download_sections,
)
run_command(
ctx,
Expand Down
14 changes: 11 additions & 3 deletions aai_cli/dub_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class DubOptions:
max_tokens: int
out: Path | None
video: bool
download_sections: list[str]


def resolve_language(value: str) -> str:
Expand Down Expand Up @@ -377,16 +378,23 @@ def run_dub(opts: DubOptions, state: AppState, *, json_mode: bool) -> None:
language = resolve_language(opts.language)
_require_sandbox()
youtube.validate_video_flag(opts.media, video=opts.video)
youtube.validate_sections_flag(opts.media, opts.download_sections)
if youtube.is_downloadable_url(opts.media):
# A media-page URL (YouTube, podcast page, …) is downloaded once — the
# audio track by default, the full video with --video so the dub keeps
# the picture — and dubbed locally. ffmpeg is checked before the
# download so a missing dependency fails before any fetch.
# the picture, only the --download-sections slices when given — and
# dubbed locally. ffmpeg is checked before the download so a missing
# dependency fails before any fetch.
ffmpeg = _require_ffmpeg()
downloading = "Downloading video…" if opts.video else "Downloading audio…"
with tempfile.TemporaryDirectory(prefix="aai-dub-src-") as td:
with output.status(downloading, json_mode=json_mode, quiet=state.quiet):
local = youtube.download_media(opts.media, Path(td), video=opts.video)
local = youtube.download_media(
opts.media,
Path(td),
video=opts.video,
download_sections=opts.download_sections,
)
# The download dir is temporary, so the default output lands in the
# current directory — never next to the temp file.
out = (
Expand Down
16 changes: 16 additions & 0 deletions aai_cli/youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,22 @@ def validate_video_flag(source: str, *, video: bool) -> None:
)


def validate_sections_flag(source: str, sections: list[str]) -> None:
"""Reject ``--download-sections`` for a source that isn't a downloadable URL.

The specs select which parts of a media-page download yt-dlp fetches; a local
file (or a direct URL the API fetches itself) is never downloaded, so the flag
would be a silent no-op there — and a requested flag is never dropped silently.
"""
if sections and not is_downloadable_url(source):
raise UsageError(
"--download-sections only applies to a downloadable URL source "
"(YouTube, media pages, …).",
suggestion="Cut a local file first (assembly clip <file> --range START-END), "
"then use the cut.",
)


def _ytdlp_options(
dest_dir: Path, *, video: bool, download_sections: list[str] | None
) -> dict[str, object]:
Expand Down
50 changes: 30 additions & 20 deletions tests/__snapshots__/test_snapshots_help_run.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,8 @@
the translations are synthesized with streaming TTS (one voice per
speaker), and ffmpeg lays the new audio over the original — video copied
untouched. A YouTube/media-page URL is downloaded first (audio only, or
the full video with --video). Streaming TTS only exists in the sandbox
the full video with --video; --download-sections fetches and dubs only a
time slice of it). Streaming TTS only exists in the sandbox
today — run it as 'assembly --sandbox dub' (--sandbox goes before the
subcommand). Requires ffmpeg.

Expand All @@ -305,25 +306,31 @@
│ [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ * --lang -l TEXT Target language: an ISO code (de, fr, es, │
│ …) or a language name (German). │
│ [required] │
│ --transcript-id -t TEXT Reuse an existing diarized transcript of │
│ this media instead of transcribing it │
│ again. │
│ --voice TEXT Voice id for every speaker (e.g. jane, │
│ michael, paul), or SPEAKER=VOICE to pin a │
│ diarized speaker (repeatable, e.g. --voice │
│ A=jane). Default: the target language's │
│ native voice(s). │
│ --out PATH Output file (default: │
│ <name>.dub.<lang><ext> next to the input). │
│ --video Download the full video (not just the │
│ audio track) for a URL source, so the dub │
│ keeps the picture. Local files keep their │
│ video already. │
│ --json -j Emit JSON describing the dubbed file. │
│ --help Show this message and exit. │
│ * --lang -l TEXT Target language: an ISO code (de, fr, │
│ es, …) or a language name (German). │
│ [required] │
│ --transcript-id -t TEXT Reuse an existing diarized transcript │
│ of this media instead of transcribing │
│ it again. │
│ --voice TEXT Voice id for every speaker (e.g. jane, │
│ michael, paul), or SPEAKER=VOICE to │
│ pin a diarized speaker (repeatable, │
│ e.g. --voice A=jane). Default: the │
│ target language's native voice(s). │
│ --out PATH Output file (default: │
│ <name>.dub.<lang><ext> next to the │
│ input). │
│ --video Download the full video (not just the │
│ audio track) for a URL source, so the │
│ dub keeps the picture. Local files │
│ keep their video already. │
│ --download-sections TEXT For a URL source, download (and dub) │
│ only part of it (yt-dlp │
│ "--download-sections" syntax, e.g. │
│ "*0:00-15:00" for the first fifteen │
│ minutes; repeatable). │
│ --json -j Emit JSON describing the dubbed file. │
│ --help Show this message and exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ LLM Transform ──────────────────────────────────────────────────────────────╮
│ --model TEXT LLM Gateway model that translates the │
Expand All @@ -340,6 +347,9 @@
$ assembly --sandbox dub talk.mp4 -l Spanish
Dub the full video from YouTube
$ assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video
Dub only the first 15 minutes of a YouTube video
$ assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video
--download-sections "*0:00-15:00"
Dub every speaker with one voice
$ assembly --sandbox dub talk.mp4 -l fr --voice paul
Pin a voice per diarized speaker
Expand Down
1 change: 1 addition & 0 deletions tests/_dub_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
max_tokens=llm.DEFAULT_MAX_TOKENS,
out=None,
video=False,
download_sections=[],
)

SAMPLE_RATE = 100 # tiny rate keeps the timeline byte math exact and readable
Expand Down
6 changes: 6 additions & 0 deletions tests/test_dub_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def test_defaults_map_to_options(captured_run):
max_tokens=llm.DEFAULT_MAX_TOKENS,
out=None,
video=False,
download_sections=[],
)


Expand All @@ -83,6 +84,10 @@ def test_every_flag_maps_to_options(captured_run):
"--out",
"dubbed.mp4",
"--video",
"--download-sections",
"*0:00-15:00",
"--download-sections",
"intro",
"--json",
],
)
Expand All @@ -97,4 +102,5 @@ def test_every_flag_maps_to_options(captured_run):
max_tokens=7,
out=Path("dubbed.mp4"),
video=True,
download_sections=["*0:00-15:00", "intro"],
)
35 changes: 33 additions & 2 deletions tests/test_dub_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,10 @@ def fake_download(monkeypatch: pytest.MonkeyPatch):
"""Stand in for yt-dlp: 'download' a fixed media file into the temp dir."""
seen: dict[str, object] = {}

def download(url, dest_dir, *, video=False):
def download(url, dest_dir, *, video=False, download_sections=None):
seen["url"] = url
seen["video"] = video
seen["download_sections"] = download_sections
path = dest_dir / ("vid123.mp4" if video else "vid123.m4a")
path.write_bytes(b"\x00media")
seen["path"] = path
Expand All @@ -339,9 +340,11 @@ def test_run_dub_youtube_downloads_and_dubs_into_cwd(
monkeypatch.chdir(tmp_path)
opts = dataclasses.replace(DEFAULTS, media=YT_URL)
_run(opts, json_mode=True)
# Audio-only download by default; the downloaded temp file feeds the pipeline.
# Audio-only download by default — the whole source, no section slicing —
# and the downloaded temp file feeds the pipeline.
assert fake_download["url"] == YT_URL
assert fake_download["video"] is False
assert fake_download["download_sections"] == []
assert fake_transcribe["audio"] == str(fake_download["path"])
# ffmpeg muxes over the downloaded file; the default output lands in the cwd,
# named after the download (the temp dir is gone after the run).
Expand Down Expand Up @@ -420,6 +423,34 @@ def test_run_dub_youtube_honors_explicit_out(
assert fake_ffmpeg["args"][-1] == str(out)


def test_run_dub_youtube_download_sections_slice_the_download(
tmp_path,
fake_download,
fake_transcribe,
fake_translate,
fake_synthesize,
fake_ffmpeg,
capsys,
monkeypatch,
):
monkeypatch.chdir(tmp_path)
opts = dataclasses.replace(DEFAULTS, media=YT_URL, download_sections=["*0:00-15:00"])
_run(opts, json_mode=True)
# The specs reach yt-dlp verbatim, so only that slice is fetched (and dubbed).
assert fake_download["download_sections"] == ["*0:00-15:00"]


def test_run_dub_download_sections_require_a_url_source(media, monkeypatch):
# A local file is never downloaded, so the slice specs would be a silent
# no-op — they are rejected instead, with the local-file alternative named.
monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/ffmpeg")
opts = dataclasses.replace(DEFAULTS, media=str(media), download_sections=["*0:00-15:00"])
with pytest.raises(UsageError) as exc:
_run(opts, json_mode=False)
assert "--download-sections only applies to a downloadable URL source" in exc.value.message
assert "assembly clip" in (exc.value.suggestion or "")


def test_run_dub_video_requires_a_url_source(media, monkeypatch):
# A local file's video stream is already copied into the dub, so --video
# would be a silent no-op — it is rejected instead.
Expand Down
20 changes: 20 additions & 0 deletions tests/test_youtube.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,26 @@ def test_validate_video_flag_without_video_is_a_no_op(source):
youtube.validate_video_flag(source, video=False) # no exception


def test_validate_sections_flag_accepts_downloadable_urls():
youtube.validate_sections_flag("https://youtu.be/abc123", ["*0:00-15:00"]) # no exception


@pytest.mark.parametrize("source", ["talk.mp4", "https://example.com/episode.mp3"])
def test_validate_sections_flag_rejects_non_downloadable_sources(source):
# The specs only shape what a media-page download fetches; a local file (or a
# direct URL the API fetches itself) is never downloaded, so the flag would be
# silently dropped — and a requested flag is never dropped silently.
with pytest.raises(UsageError) as exc:
youtube.validate_sections_flag(source, ["*0:00-15:00"])
assert "--download-sections only applies to a downloadable URL source" in exc.value.message
assert "assembly clip" in (exc.value.suggestion or "")


@pytest.mark.parametrize("source", ["talk.mp4", "https://youtu.be/abc123"])
def test_validate_sections_flag_without_sections_is_a_no_op(source):
youtube.validate_sections_flag(source, []) # no exception


def test_download_media_routes_ytdlp_output_to_silent_logger(tmp_path, monkeypatch, capsys):
# yt-dlp's default logger writes its own "ERROR: …" line to stderr before the CLI's
# clean error, duplicating the message; the passed logger must swallow everything.
Expand Down
Loading