diff --git a/aai_cli/commands/dub.py b/aai_cli/commands/dub.py index d7ba6418..9db04c36 100644 --- a/aai_cli/commands/dub.py +++ b/aai_cli/commands/dub.py @@ -22,6 +22,11 @@ "Dub the full video from YouTube", 'assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video', ), + ( + "Dub only the first 15 minutes of a YouTube video", + 'assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video ' + '--download-sections "*0:00-15:00"', + ), ( "Dub every speaker with one voice", "assembly --sandbox dub talk.mp4 -l fr --voice paul", @@ -90,6 +95,13 @@ def dub( help="Download the full video (not just the audio track) for a URL source, " "so the dub keeps the picture. Local files keep their video already.", ), + download_sections: list[str] = typer.Option( + [], + "--download-sections", + help="For a URL source, download (and dub) only part of it (yt-dlp " + '"--download-sections" syntax, e.g. "*0:00-15:00" for the first fifteen ' + "minutes; repeatable).", + ), json_out: bool = options.json_option("Emit JSON describing the dubbed file."), ) -> None: """Dub a video or audio file into another language (sandbox only). @@ -99,7 +111,8 @@ def dub( the translations are synthesized with streaming TTS (one voice per speaker), and ffmpeg lays the new audio over the original — video copied untouched. A YouTube/media-page URL is downloaded first (audio only, or - the full video with --video). Streaming TTS only exists in the sandbox + the full video with --video; --download-sections fetches and dubs only a + time slice of it). Streaming TTS only exists in the sandbox today — run it as 'assembly --sandbox dub' (--sandbox goes before the subcommand). Requires ffmpeg. """ @@ -112,6 +125,7 @@ def dub( max_tokens=max_tokens, out=out, video=video, + download_sections=download_sections, ) run_command( ctx, diff --git a/aai_cli/dub_exec.py b/aai_cli/dub_exec.py index d59349ee..5440fcc0 100644 --- a/aai_cli/dub_exec.py +++ b/aai_cli/dub_exec.py @@ -82,6 +82,7 @@ class DubOptions: max_tokens: int out: Path | None video: bool + download_sections: list[str] def resolve_language(value: str) -> str: @@ -377,16 +378,23 @@ def run_dub(opts: DubOptions, state: AppState, *, json_mode: bool) -> None: language = resolve_language(opts.language) _require_sandbox() youtube.validate_video_flag(opts.media, video=opts.video) + youtube.validate_sections_flag(opts.media, opts.download_sections) if youtube.is_downloadable_url(opts.media): # A media-page URL (YouTube, podcast page, …) is downloaded once — the # audio track by default, the full video with --video so the dub keeps - # the picture — and dubbed locally. ffmpeg is checked before the - # download so a missing dependency fails before any fetch. + # the picture, only the --download-sections slices when given — and + # dubbed locally. ffmpeg is checked before the download so a missing + # dependency fails before any fetch. ffmpeg = _require_ffmpeg() downloading = "Downloading video…" if opts.video else "Downloading audio…" with tempfile.TemporaryDirectory(prefix="aai-dub-src-") as td: with output.status(downloading, json_mode=json_mode, quiet=state.quiet): - local = youtube.download_media(opts.media, Path(td), video=opts.video) + local = youtube.download_media( + opts.media, + Path(td), + video=opts.video, + download_sections=opts.download_sections, + ) # The download dir is temporary, so the default output lands in the # current directory — never next to the temp file. out = ( diff --git a/aai_cli/youtube.py b/aai_cli/youtube.py index bfc9afeb..2b84725c 100644 --- a/aai_cli/youtube.py +++ b/aai_cli/youtube.py @@ -158,6 +158,22 @@ def validate_video_flag(source: str, *, video: bool) -> None: ) +def validate_sections_flag(source: str, sections: list[str]) -> None: + """Reject ``--download-sections`` for a source that isn't a downloadable URL. + + The specs select which parts of a media-page download yt-dlp fetches; a local + file (or a direct URL the API fetches itself) is never downloaded, so the flag + would be a silent no-op there — and a requested flag is never dropped silently. + """ + if sections and not is_downloadable_url(source): + raise UsageError( + "--download-sections only applies to a downloadable URL source " + "(YouTube, media pages, …).", + suggestion="Cut a local file first (assembly clip --range START-END), " + "then use the cut.", + ) + + def _ytdlp_options( dest_dir: Path, *, video: bool, download_sections: list[str] | None ) -> dict[str, object]: diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index 29b6a7ed..c0bdd962 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -294,7 +294,8 @@ the translations are synthesized with streaming TTS (one voice per speaker), and ffmpeg lays the new audio over the original — video copied untouched. A YouTube/media-page URL is downloaded first (audio only, or - the full video with --video). Streaming TTS only exists in the sandbox + the full video with --video; --download-sections fetches and dubs only a + time slice of it). Streaming TTS only exists in the sandbox today — run it as 'assembly --sandbox dub' (--sandbox goes before the subcommand). Requires ffmpeg. @@ -305,25 +306,31 @@ │ [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ - │ * --lang -l TEXT Target language: an ISO code (de, fr, es, │ - │ …) or a language name (German). │ - │ [required] │ - │ --transcript-id -t TEXT Reuse an existing diarized transcript of │ - │ this media instead of transcribing it │ - │ again. │ - │ --voice TEXT Voice id for every speaker (e.g. jane, │ - │ michael, paul), or SPEAKER=VOICE to pin a │ - │ diarized speaker (repeatable, e.g. --voice │ - │ A=jane). Default: the target language's │ - │ native voice(s). │ - │ --out PATH Output file (default: │ - │ .dub. next to the input). │ - │ --video Download the full video (not just the │ - │ audio track) for a URL source, so the dub │ - │ keeps the picture. Local files keep their │ - │ video already. │ - │ --json -j Emit JSON describing the dubbed file. │ - │ --help Show this message and exit. │ + │ * --lang -l TEXT Target language: an ISO code (de, fr, │ + │ es, …) or a language name (German). │ + │ [required] │ + │ --transcript-id -t TEXT Reuse an existing diarized transcript │ + │ of this media instead of transcribing │ + │ it again. │ + │ --voice TEXT Voice id for every speaker (e.g. jane, │ + │ michael, paul), or SPEAKER=VOICE to │ + │ pin a diarized speaker (repeatable, │ + │ e.g. --voice A=jane). Default: the │ + │ target language's native voice(s). │ + │ --out PATH Output file (default: │ + │ .dub. next to the │ + │ input). │ + │ --video Download the full video (not just the │ + │ audio track) for a URL source, so the │ + │ dub keeps the picture. Local files │ + │ keep their video already. │ + │ --download-sections TEXT For a URL source, download (and dub) │ + │ only part of it (yt-dlp │ + │ "--download-sections" syntax, e.g. │ + │ "*0:00-15:00" for the first fifteen │ + │ minutes; repeatable). │ + │ --json -j Emit JSON describing the dubbed file. │ + │ --help Show this message and exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ LLM Transform ──────────────────────────────────────────────────────────────╮ │ --model TEXT LLM Gateway model that translates the │ @@ -340,6 +347,9 @@ $ assembly --sandbox dub talk.mp4 -l Spanish Dub the full video from YouTube $ assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video + Dub only the first 15 minutes of a YouTube video + $ assembly --sandbox dub "https://youtube.com/watch?v=ID" -l de --video + --download-sections "*0:00-15:00" Dub every speaker with one voice $ assembly --sandbox dub talk.mp4 -l fr --voice paul Pin a voice per diarized speaker diff --git a/tests/_dub_helpers.py b/tests/_dub_helpers.py index 4ee57eee..1845be24 100644 --- a/tests/_dub_helpers.py +++ b/tests/_dub_helpers.py @@ -31,6 +31,7 @@ max_tokens=llm.DEFAULT_MAX_TOKENS, out=None, video=False, + download_sections=[], ) SAMPLE_RATE = 100 # tiny rate keeps the timeline byte math exact and readable diff --git a/tests/test_dub_command.py b/tests/test_dub_command.py index c0f418b1..644d6fac 100644 --- a/tests/test_dub_command.py +++ b/tests/test_dub_command.py @@ -59,6 +59,7 @@ def test_defaults_map_to_options(captured_run): max_tokens=llm.DEFAULT_MAX_TOKENS, out=None, video=False, + download_sections=[], ) @@ -83,6 +84,10 @@ def test_every_flag_maps_to_options(captured_run): "--out", "dubbed.mp4", "--video", + "--download-sections", + "*0:00-15:00", + "--download-sections", + "intro", "--json", ], ) @@ -97,4 +102,5 @@ def test_every_flag_maps_to_options(captured_run): max_tokens=7, out=Path("dubbed.mp4"), video=True, + download_sections=["*0:00-15:00", "intro"], ) diff --git a/tests/test_dub_pipeline.py b/tests/test_dub_pipeline.py index 66a0b244..a7216328 100644 --- a/tests/test_dub_pipeline.py +++ b/tests/test_dub_pipeline.py @@ -314,9 +314,10 @@ def fake_download(monkeypatch: pytest.MonkeyPatch): """Stand in for yt-dlp: 'download' a fixed media file into the temp dir.""" seen: dict[str, object] = {} - def download(url, dest_dir, *, video=False): + def download(url, dest_dir, *, video=False, download_sections=None): seen["url"] = url seen["video"] = video + seen["download_sections"] = download_sections path = dest_dir / ("vid123.mp4" if video else "vid123.m4a") path.write_bytes(b"\x00media") seen["path"] = path @@ -339,9 +340,11 @@ def test_run_dub_youtube_downloads_and_dubs_into_cwd( monkeypatch.chdir(tmp_path) opts = dataclasses.replace(DEFAULTS, media=YT_URL) _run(opts, json_mode=True) - # Audio-only download by default; the downloaded temp file feeds the pipeline. + # Audio-only download by default — the whole source, no section slicing — + # and the downloaded temp file feeds the pipeline. assert fake_download["url"] == YT_URL assert fake_download["video"] is False + assert fake_download["download_sections"] == [] assert fake_transcribe["audio"] == str(fake_download["path"]) # ffmpeg muxes over the downloaded file; the default output lands in the cwd, # named after the download (the temp dir is gone after the run). @@ -420,6 +423,34 @@ def test_run_dub_youtube_honors_explicit_out( assert fake_ffmpeg["args"][-1] == str(out) +def test_run_dub_youtube_download_sections_slice_the_download( + tmp_path, + fake_download, + fake_transcribe, + fake_translate, + fake_synthesize, + fake_ffmpeg, + capsys, + monkeypatch, +): + monkeypatch.chdir(tmp_path) + opts = dataclasses.replace(DEFAULTS, media=YT_URL, download_sections=["*0:00-15:00"]) + _run(opts, json_mode=True) + # The specs reach yt-dlp verbatim, so only that slice is fetched (and dubbed). + assert fake_download["download_sections"] == ["*0:00-15:00"] + + +def test_run_dub_download_sections_require_a_url_source(media, monkeypatch): + # A local file is never downloaded, so the slice specs would be a silent + # no-op — they are rejected instead, with the local-file alternative named. + monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/ffmpeg") + opts = dataclasses.replace(DEFAULTS, media=str(media), download_sections=["*0:00-15:00"]) + with pytest.raises(UsageError) as exc: + _run(opts, json_mode=False) + assert "--download-sections only applies to a downloadable URL source" in exc.value.message + assert "assembly clip" in (exc.value.suggestion or "") + + def test_run_dub_video_requires_a_url_source(media, monkeypatch): # A local file's video stream is already copied into the dub, so --video # would be a silent no-op — it is rejected instead. diff --git a/tests/test_youtube.py b/tests/test_youtube.py index de05e0fa..e4cdafa7 100644 --- a/tests/test_youtube.py +++ b/tests/test_youtube.py @@ -178,6 +178,26 @@ def test_validate_video_flag_without_video_is_a_no_op(source): youtube.validate_video_flag(source, video=False) # no exception +def test_validate_sections_flag_accepts_downloadable_urls(): + youtube.validate_sections_flag("https://youtu.be/abc123", ["*0:00-15:00"]) # no exception + + +@pytest.mark.parametrize("source", ["talk.mp4", "https://example.com/episode.mp3"]) +def test_validate_sections_flag_rejects_non_downloadable_sources(source): + # The specs only shape what a media-page download fetches; a local file (or a + # direct URL the API fetches itself) is never downloaded, so the flag would be + # silently dropped — and a requested flag is never dropped silently. + with pytest.raises(UsageError) as exc: + youtube.validate_sections_flag(source, ["*0:00-15:00"]) + assert "--download-sections only applies to a downloadable URL source" in exc.value.message + assert "assembly clip" in (exc.value.suggestion or "") + + +@pytest.mark.parametrize("source", ["talk.mp4", "https://youtu.be/abc123"]) +def test_validate_sections_flag_without_sections_is_a_no_op(source): + youtube.validate_sections_flag(source, []) # no exception + + def test_download_media_routes_ytdlp_output_to_silent_logger(tmp_path, monkeypatch, capsys): # yt-dlp's default logger writes its own "ERROR: …" line to stderr before the CLI's # clean error, duplicating the message; the passed logger must swallow everything.