diff --git a/README.md b/README.md index 68693ada..e3117282 100644 --- a/README.md +++ b/README.md @@ -134,16 +134,33 @@ assembly stream -o text | grep --line-buffered -i alex \ assembly transcribe --sample --llm "summarize" --llm "translate the summary to French" ``` -**Score diarization quality across several videos** — pipe a list of URLs into batch mode (`--from-stdin`), transcribe them in parallel with speaker labels, have an LLM judge each transcript, then use `--llm-reduce` to run one prompt over all the results for a single aggregate verdict: +**Score diarization quality across several videos** — pass a hand-picked list of URLs straight on the command line (batch mode), transcribe them in parallel with speaker labels, have an LLM judge each transcript, then use `--llm-reduce` to run one prompt over all the results for a single aggregate verdict: ```sh -printf '%s\n' \ +assembly transcribe \ https://youtu.be/RC5zRvqnRm8 \ https://youtu.be/u9S41Kplsbs \ https://youtu.be/mP31CdpGzUY \ -| assembly transcribe --from-stdin --concurrency 3 --speaker-labels \ - --llm 'Judge diarization quality; output JSON {speaker_count, issues, score}' \ - --llm-reduce 'Rank these videos worst-to-best and summarize the failure modes' + --concurrency 3 --speaker-labels \ + --llm 'Judge diarization quality; output JSON {speaker_count, issues, score}' \ + --llm-reduce 'Rank these videos worst-to-best and summarize the failure modes' +``` + +(Prefer to stream a generated list in? `--from-stdin` reads one source per line, so `find . -name '*.wav' | assembly transcribe --from-stdin …` works too.) + +**Map-reduce a batch of talks** — extract structured notes from each video (`--llm`, a map), then reduce across all of them with a stronger model (`--model`): + +```sh +assembly transcribe \ + https://youtu.be/LCEmiRjPEtQ \ + https://youtu.be/1yvBqasHLZs \ + https://youtu.be/MiqLoAZFRSE \ + https://youtu.be/s7_NlkBwdj8 \ + https://youtu.be/60iW8FZ7MJU \ + https://youtu.be/V979Wd1gmTU \ + --concurrency 6 \ + --llm 'Extract JSON {thesis, key_claims[]}' \ + --llm-reduce 'Where do the speakers disagree?' --model claude-opus-4-7 ``` **Summarize your recent transcripts and surface the themes** — pipe a list of past transcripts into `transcripts get`, summarize each (`--llm`, a map), then reduce them all into one answer (`--llm-reduce`): @@ -285,10 +302,12 @@ assembly transcribe video.mp4 -o srt # captions assembly transcribe call.mp3 --speaker-labels --summarization --json ``` -Transcribe in batches — a directory, a glob, a piped list, or a whole podcast -RSS feed (every episode becomes one source), resumable on re-run: +Transcribe in batches — a hand-picked list, a directory, a glob, a piped list, +or a whole podcast RSS feed (every episode becomes one source), resumable on +re-run: ```sh +assembly transcribe a.mp3 b.mp3 https://youtu.be/dtp6b76pMak # a hand-picked list assembly transcribe ./recordings assembly transcribe "s3://bucket/calls/*.mp3" # needs: pip install s3fs assembly transcribe "https://feeds.simplecast.com/54nAGcIl" # every episode in the feed diff --git a/aai_cli/app/transcribe/run.py b/aai_cli/app/transcribe/run.py index e26458bf..2b190dc5 100644 --- a/aai_cli/app/transcribe/run.py +++ b/aai_cli/app/transcribe/run.py @@ -185,7 +185,7 @@ class TranscribeOptions: ``json_mode`` argument), so a test can describe an invocation without argv. """ - source: str | None + sources: list[str] sample: bool from_stdin: bool concurrency: int @@ -237,6 +237,17 @@ class TranscribeOptions: out: Path | None show_code: bool + @property + def single_source(self) -> str | None: + """The lone source for the single-source path, or ``None``. + + The positional argument is variadic: zero sources (``--sample`` or audio + piped on stdin) or exactly one feed the single-source path; two or more + always route to batch (see ``expand_sources``), so this collapses the 0/1 + case to the scalar the single-source helpers expect. + """ + return self.sources[0] if len(self.sources) == 1 else None + def flags(self, pii_policies: list[str] | None) -> dict[str, object]: """The curated flags in TranscriptionConfig field names (None = unset).""" flags: dict[str, object] = { @@ -302,14 +313,15 @@ def _print_show_code(opts: TranscribeOptions, merged: dict[str, object]) -> None Raw stdout, so `--show-code > script.py` runs. No source/--sample needed — fall back to a placeholder path for a pure snippet. """ - if opts.source and remotefs.is_remote_url(opts.source): + source = opts.single_source + if source and remotefs.is_remote_url(source): raise UsageError( "--show-code does not support bucket URLs (s3://, gs://, …) yet.", suggestion="Download the audio first and pass the local file.", ) audio = ( - client.resolve_audio_source(opts.source, sample=opts.sample, check_local=False) - if opts.source or opts.sample + client.resolve_audio_source(source, sample=opts.sample, check_local=False) + if source or opts.sample else "your-audio-file.mp3" ) gateway = code_gen.gateway_options( @@ -348,22 +360,22 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) # --download-sections only slices a downloadable-URL fetch; for a local file, # stdin, remote bucket, or directory batch it would be dropped silently — reject # it up front like `clip`/`dub` rather than billing a full-file transcription. - youtube.validate_sections_flag(opts.source, list(opts.download_sections or [])) + youtube.validate_sections_flag(opts.single_source, list(opts.download_sections or [])) merged = config_builder.merge_transcribe_config( flags=flags, overrides=opts.config_kv, config_file=opts.config_file ) transcribe_validate.validate_speakers_expected(merged) - sources = transcribe_sources.expand_sources( - opts.source, + batch_sources = transcribe_sources.expand_sources( + opts.sources, from_stdin=opts.from_stdin, sample=opts.sample, # --show-code must never touch the network; skip the feed probe and treat a # URL as a single source for code generation. detect_feeds=not opts.show_code, ) - if sources is not None: + if batch_sources is not None: transcribe_sources.reject_single_source_flags( out=opts.out, output_field=opts.output_field, @@ -371,7 +383,7 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) ) transcribe_batch.run_batch( state.resolve_api_key(), - sources, + batch_sources, transcription_config=config_builder.construct_transcription_config(merged), concurrency=opts.concurrency, force=opts.force, @@ -388,16 +400,16 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) tc = config_builder.construct_transcription_config(merged) # A typo'd path must read as "file not found", not trigger a login. - check_source_exists(opts.source, sample=opts.sample) + check_source_exists(opts.single_source, sample=opts.sample) transcribe_validate.warn_unrecognized_extension( - opts.source, json_mode=json_mode, quiet=state.quiet + opts.single_source, json_mode=json_mode, quiet=state.quiet ) api_key = state.resolve_api_key() with output.status("Transcribing…", json_mode=json_mode, quiet=state.quiet): transcript = run_transcription( api_key, - opts.source, + opts.single_source, sample=opts.sample, transcription_config=tc, download_sections=list(opts.download_sections or []), diff --git a/aai_cli/app/transcribe/sources.py b/aai_cli/app/transcribe/sources.py index bead0a25..d5655e7e 100644 --- a/aai_cli/app/transcribe/sources.py +++ b/aai_cli/app/transcribe/sources.py @@ -50,20 +50,25 @@ def expand_sources( - source: str | None, *, from_stdin: bool, sample: bool, detect_feeds: bool = True + sources: list[str], *, from_stdin: bool, sample: bool, detect_feeds: bool = True ) -> list[str] | None: """The batch source list, or ``None`` when this is a single-source invocation. - Batch mode triggers on ``--from-stdin``, a directory (scanned recursively for - audio files), a glob pattern that names no existing file, a bucket URL that is - a glob or trailing-slash folder, or an http(s) URL that turns out to be a - podcast RSS/Atom feed (each episode becomes one batch source). A plain file, - direct media URL, ``-`` (audio piped on stdin), or ``--sample`` stays on the - single-source path. ``detect_feeds=False`` skips the feed probe (and its - network fetch) for paths that must not touch the network, e.g. ``--show-code``. + Batch mode triggers on ``--from-stdin``, **two or more positional sources** + (each taken literally — a hand-picked list, no glob/feed expansion), a + directory (scanned recursively for audio files), a glob pattern that names no + existing file, a bucket URL that is a glob or trailing-slash folder, or an + http(s) URL that turns out to be a podcast RSS/Atom feed (each episode becomes + one batch source). A lone plain file, direct media URL, ``-`` (audio piped on + stdin), or ``--sample`` stays on the single-source path. ``detect_feeds=False`` + skips the feed probe (and its network fetch) for paths that must not touch the + network, e.g. ``--show-code``. """ if from_stdin: - return _stdin_sources(source, sample=sample) + return _stdin_sources(sources, sample=sample) + if len(sources) > 1: + return _explicit_sources(sources, sample=sample) + source = sources[0] if sources else None # `not source` (rather than `is None`) also catches the empty string — e.g. an # unset shell variable in `assembly transcribe "$FILE"`. `Path("")` is `Path(".")`, # so it would otherwise fall into the directory branch and batch-transcribe the @@ -92,8 +97,20 @@ def _local_sources(source: str) -> list[str] | None: return None -def _stdin_sources(source: str | None, *, sample: bool) -> list[str]: - if source is not None or sample: +def _explicit_sources(sources: list[str], *, sample: bool) -> list[str]: + """Several explicit positional sources (``assembly transcribe a.mp3 b.mp3 …``). + + An as-is batch list, so a caller can hand-pick files/URLs without quoting a glob + or piping a stdin list. Each is taken literally — no per-source glob, directory + scan, or feed probe — since the user already enumerated exactly what to run. + """ + if sample: + raise UsageError("Pass either --sample or your own sources, not both.") + return list(dict.fromkeys(sources)) # dedupe, keep order + + +def _stdin_sources(sources: list[str], *, sample: bool) -> list[str]: + if sources or sample: raise UsageError( "--from-stdin reads sources from stdin; don't also pass a source or --sample." ) diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py index 139a4f14..b6f48655 100644 --- a/aai_cli/commands/transcribe.py +++ b/aai_cli/commands/transcribe.py @@ -27,6 +27,10 @@ ("Transcribe a local file", "assembly transcribe call.mp3"), ("Batch-transcribe a folder", "assembly transcribe ./recordings"), ("Batch-transcribe a glob", 'assembly transcribe "calls/*.mp3"'), + ( + "Batch-transcribe a hand-picked list", + "assembly transcribe a.mp3 https://youtu.be/dtp6b76pMak --concurrency 3", + ), ("Batch-transcribe an S3 prefix", 'assembly transcribe "s3://bucket/calls/*.mp3"'), ("Try it with the hosted sample", "assembly transcribe --sample"), ("Transcribe a YouTube video", "assembly transcribe https://youtu.be/dtp6b76pMak"), @@ -45,10 +49,11 @@ ) def transcribe( ctx: typer.Context, - source: str | None = typer.Argument( + sources: list[str] | None = typer.Argument( None, help="Audio file, URL, YouTube/podcast URL, podcast RSS feed, bucket URL " - "(s3://, gs://, …), or a directory/glob (batch mode)", + "(s3://, gs://, …), or a directory/glob (batch mode). Pass several to " + "batch-transcribe a hand-picked list", ), sample: bool = typer.Option(False, "--sample", help="Use the hosted wildfires.mp3 sample"), # batch mode @@ -365,12 +370,12 @@ def transcribe( Save with --out FILE, or pipe one field with -o text. YouTube and podcast-page URLs (any page yt-dlp can extract) are downloaded first, then transcribed. - Batch mode: pass a directory or glob (or pipe a list with --from-stdin) to - transcribe many sources concurrently. A podcast RSS/Atom feed URL also expands - to batch mode — every episode enclosure becomes one source. Each source gets a - .aai.json sidecar with the full result (including any --llm responses), and a - re-run skips sources already transcribed — with changed --llm prompts it - replays just the LLM step, never a second transcription. + Batch mode: pass several sources, a directory or glob (or pipe a list with + --from-stdin) to transcribe many sources concurrently. A podcast RSS/Atom feed + URL also expands to batch mode — every episode enclosure becomes one source. + Each source gets a .aai.json sidecar with the full result (including any --llm + responses), and a re-run skips sources already transcribed — with changed + --llm prompts it replays just the LLM step, never a second transcription. Bucket URLs (s3://, gs://, az://, sftp://, …) work for single files and for batches (a glob, or a folder ending in /); install the matching fsspec @@ -379,7 +384,7 @@ def transcribe( Curated flags cover common features; --config KEY=VALUE and --config-file reach every other field. Analysis (summary, chapters, ...) renders in human mode. """ opts = transcribe_exec.TranscribeOptions( - source=source, + sources=list(sources) if sources else [], sample=sample, from_stdin=from_stdin, concurrency=concurrency, diff --git a/aai_cli/skills/aai-cli/references/transcription.md b/aai_cli/skills/aai-cli/references/transcription.md index 291fd250..087fd0b2 100644 --- a/aai_cli/skills/aai-cli/references/transcription.md +++ b/aai_cli/skills/aai-cli/references/transcription.md @@ -5,11 +5,14 @@ Five commands. All accept `--json` (auto-enabled when piped); `transcribe`, `transcribe`, `stream`, and `agent` accept `--show-code` to print equivalent Python SDK code without calling the API. -## `assembly transcribe [SOURCE]` — file / URL / YouTube / podcast page / RSS feed +## `assembly transcribe [SOURCE]...` — file / URL / YouTube / podcast page / RSS feed `SOURCE` is a local file path, public URL, or a media-page URL yt-dlp can extract (YouTube, Apple Podcasts, Spreaker, SoundCloud, …) — those are downloaded first. -A podcast RSS/Atom feed URL expands into a resumable batch run over every episode +Pass **several sources** to batch-transcribe a hand-picked list on the command +line (each taken literally, no glob/feed expansion) — the clean alternative to +`--from-stdin`. A directory, glob, or bucket folder also expands to a batch, and a +podcast RSS/Atom feed URL expands into a resumable batch run over every episode enclosure (one `.aai.json` sidecar apiece). Use `--sample` for the hosted `wildfires.mp3`. Analysis results (summary, chapters, sentiment, …) render automatically in human mode. @@ -39,6 +42,7 @@ assembly transcribe --sample assembly transcribe call.mp3 --speaker-labels --speakers-expected 2 --redact-pii assembly transcribe call.mp3 -o text assembly transcribe call.mp3 --show-code +assembly transcribe a.mp3 b.mp3 https://youtu.be/dtp6b76pMak --concurrency 3 # hand-picked batch assembly transcribe "https://feeds.simplecast.com/54nAGcIl" # every episode in the feed ``` diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index 2e67662e..9de75cf5 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -887,7 +887,7 @@ # name: test_command_help_matches_snapshot[t] ''' - Usage: assembly t [OPTIONS] [SOURCE] + Usage: assembly t [OPTIONS] [SOURCES]... Transcribe a file, URL, or YouTube/podcast link — or a whole batch @@ -897,12 +897,12 @@ Save with --out FILE, or pipe one field with -o text. YouTube and podcast-page URLs (any page yt-dlp can extract) are downloaded first, then transcribed. - Batch mode: pass a directory or glob (or pipe a list with --from-stdin) to - transcribe many sources concurrently. A podcast RSS/Atom feed URL also expands - to batch mode — every episode enclosure becomes one source. Each source gets a - .aai.json sidecar with the full result (including any --llm responses), and a - re-run skips sources already transcribed — with changed --llm prompts it - replays just the LLM step, never a second transcription. + Batch mode: pass several sources, a directory or glob (or pipe a list with + --from-stdin) to transcribe many sources concurrently. A podcast RSS/Atom feed + URL also expands to batch mode — every episode enclosure becomes one source. + Each source gets a .aai.json sidecar with the full result (including any --llm + responses), and a re-run skips sources already transcribed — with changed + --llm prompts it replays just the LLM step, never a second transcription. Bucket URLs (s3://, gs://, az://, sftp://, …) work for single files and for batches (a glob, or a folder ending in /); install the matching fsspec @@ -913,9 +913,10 @@ mode. ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ - │ source [SOURCE] Audio file, URL, YouTube/podcast URL, podcast RSS │ - │ feed, bucket URL (s3://, gs://, …), or a │ - │ directory/glob (batch mode) │ + │ sources [SOURCES]... Audio file, URL, YouTube/podcast URL, podcast │ + │ RSS feed, bucket URL (s3://, gs://, …), or a │ + │ directory/glob (batch mode). Pass several to │ + │ batch-transcribe a hand-picked list │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --sample Use the hosted │ @@ -1072,7 +1073,7 @@ # name: test_command_help_matches_snapshot[transcribe] ''' - Usage: assembly transcribe [OPTIONS] [SOURCE] + Usage: assembly transcribe [OPTIONS] [SOURCES]... Transcribe a file, URL, or YouTube/podcast link — or a whole batch @@ -1082,12 +1083,12 @@ Save with --out FILE, or pipe one field with -o text. YouTube and podcast-page URLs (any page yt-dlp can extract) are downloaded first, then transcribed. - Batch mode: pass a directory or glob (or pipe a list with --from-stdin) to - transcribe many sources concurrently. A podcast RSS/Atom feed URL also expands - to batch mode — every episode enclosure becomes one source. Each source gets a - .aai.json sidecar with the full result (including any --llm responses), and a - re-run skips sources already transcribed — with changed --llm prompts it - replays just the LLM step, never a second transcription. + Batch mode: pass several sources, a directory or glob (or pipe a list with + --from-stdin) to transcribe many sources concurrently. A podcast RSS/Atom feed + URL also expands to batch mode — every episode enclosure becomes one source. + Each source gets a .aai.json sidecar with the full result (including any --llm + responses), and a re-run skips sources already transcribed — with changed + --llm prompts it replays just the LLM step, never a second transcription. Bucket URLs (s3://, gs://, az://, sftp://, …) work for single files and for batches (a glob, or a folder ending in /); install the matching fsspec @@ -1098,9 +1099,10 @@ mode. ╭─ Arguments ──────────────────────────────────────────────────────────────────╮ - │ source [SOURCE] Audio file, URL, YouTube/podcast URL, podcast RSS │ - │ feed, bucket URL (s3://, gs://, …), or a │ - │ directory/glob (batch mode) │ + │ sources [SOURCES]... Audio file, URL, YouTube/podcast URL, podcast │ + │ RSS feed, bucket URL (s3://, gs://, …), or a │ + │ directory/glob (batch mode). Pass several to │ + │ batch-transcribe a hand-picked list │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ │ --sample Use the hosted │ @@ -1253,6 +1255,8 @@ $ assembly transcribe ./recordings Batch-transcribe a glob $ assembly transcribe "calls/*.mp3" + Batch-transcribe a hand-picked list + $ assembly transcribe a.mp3 https://youtu.be/dtp6b76pMak --concurrency 3 Batch-transcribe an S3 prefix $ assembly transcribe "s3://bucket/calls/*.mp3" Try it with the hosted sample diff --git a/tests/test_command_options_seam.py b/tests/test_command_options_seam.py index 1a7d657d..88c03d70 100644 --- a/tests/test_command_options_seam.py +++ b/tests/test_command_options_seam.py @@ -27,7 +27,7 @@ # The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace. TRANSCRIBE_DEFAULTS = transcribe_exec.TranscribeOptions( - source=None, + sources=[], sample=False, from_stdin=False, concurrency=DEFAULT_BATCH_CONCURRENCY, @@ -135,6 +135,15 @@ def test_run_transcribe_validates_flags_before_credentials(): ) +def test_single_source_collapses_zero_or_one_positional(): + # The positional argument is variadic; the single-source path wants a scalar. + assert TRANSCRIBE_DEFAULTS.single_source is None # zero sources (e.g. --sample) + one = dataclasses.replace(TRANSCRIBE_DEFAULTS, sources=["a.mp3"]) + assert one.single_source == "a.mp3" + many = dataclasses.replace(TRANSCRIBE_DEFAULTS, sources=["a.mp3", "b.mp3"]) + assert many.single_source is None # two or more route to batch instead + + def test_transcribe_flags_drop_unset_speaker_labels(): # The boolean --speaker-labels flag maps to None when unset (so the request # omits the field entirely), and True only when explicitly enabled. diff --git a/tests/test_transcribe_batch_sources.py b/tests/test_transcribe_batch_sources.py index c7be8c0a..60b89068 100644 --- a/tests/test_transcribe_batch_sources.py +++ b/tests/test_transcribe_batch_sources.py @@ -133,7 +133,7 @@ def test_stdin_source_list_dedupes_preserving_order(monkeypatch): import io monkeypatch.setattr("sys.stdin", io.StringIO("b.mp3\na.mp3\nb.mp3\n")) - assert transcribe_sources.expand_sources(None, from_stdin=True, sample=False) == [ + assert transcribe_sources.expand_sources([], from_stdin=True, sample=False) == [ "b.mp3", "a.mp3", ] @@ -160,9 +160,33 @@ def test_from_stdin_rejects_sample(): assert "--from-stdin reads sources from stdin" in result.output -@pytest.mark.parametrize("source", ["-", "https://example.com/a.mp3", None, ""]) -def test_non_batch_sources_return_none(source): - assert transcribe_sources.expand_sources(source, from_stdin=False, sample=False) is None +@pytest.mark.parametrize("sources", [["-"], ["https://example.com/a.mp3"], [], [""]]) +def test_non_batch_sources_return_none(sources): + assert transcribe_sources.expand_sources(sources, from_stdin=False, sample=False) is None + + +def test_multiple_positional_sources_form_an_as_is_batch_list(): + # Several explicit positional sources are a hand-picked batch: deduped, order kept, + # and taken literally (a lone source of the same value would instead stay single). + assert transcribe_sources.expand_sources( + ["b.mp3", "a.mp3", "b.mp3"], from_stdin=False, sample=False + ) == ["b.mp3", "a.mp3"] + + +def test_multiple_sources_with_sample_is_rejected(): + with pytest.raises(UsageError, match="Pass either --sample or your own sources"): + transcribe_sources.expand_sources(["a.mp3", "b.mp3"], from_stdin=False, sample=True) + + +def test_multiple_positional_sources_transcribe_as_a_batch(tmp_path, mocker, monkeypatch): + _auth() + (tmp_path / "a.mp3").write_bytes(b"a") + (tmp_path / "b.mp3").write_bytes(b"b") + seen = _patch_transcribe(mocker, monkeypatch) + # The user's "pass a list of URLs in" ask: hand-pick the sources on argv, no stdin. + result = runner.invoke(app, ["transcribe", "a.mp3", "b.mp3", "--json"]) + assert result.exit_code == 0, result.output + assert sorted(seen) == ["a.mp3", "b.mp3"] def test_empty_source_is_rejected_not_treated_as_cwd(tmp_path, mocker, monkeypatch): @@ -179,13 +203,13 @@ def test_empty_source_is_rejected_not_treated_as_cwd(tmp_path, mocker, monkeypat def test_sample_returns_none_even_without_source(): - assert transcribe_sources.expand_sources(None, from_stdin=False, sample=True) is None + assert transcribe_sources.expand_sources([], from_stdin=False, sample=True) is None def test_expand_sources_directory_error_message_names_the_path(tmp_path): (tmp_path / "calls").mkdir() with pytest.raises(UsageError, match="No audio files found under calls"): - transcribe_sources.expand_sources("calls", from_stdin=False, sample=False) + transcribe_sources.expand_sources(["calls"], from_stdin=False, sample=False) @pytest.mark.parametrize( @@ -299,7 +323,7 @@ def test_remote_glob_without_matches_exits_2(memory_fs): def test_plain_remote_file_url_stays_single_source(memory_fs): # No glob and no trailing slash: a bucket URL is one file, like a local path. for url in ("memory://calls/a.mp3", "memory://calls"): - assert transcribe_sources.expand_sources(url, from_stdin=False, sample=False) is None + assert transcribe_sources.expand_sources([url], from_stdin=False, sample=False) is None def test_sidecar_path_for_remote_url_is_slug_plus_hash(): diff --git a/tests/test_transcribe_feed.py b/tests/test_transcribe_feed.py index 5b3bf879..0dd2e40f 100644 --- a/tests/test_transcribe_feed.py +++ b/tests/test_transcribe_feed.py @@ -232,7 +232,7 @@ def test_expand_sources_routes_feed_url_to_batch(monkeypatch): feed, "feed_episode_urls", lambda url: ["https://x/a.mp3", "https://x/b.mp3"] ) assert transcribe_sources.expand_sources( - "https://feeds.example.com/show", from_stdin=False, sample=False + ["https://feeds.example.com/show"], from_stdin=False, sample=False ) == ["https://x/a.mp3", "https://x/b.mp3"] @@ -243,7 +243,7 @@ def _boom(url): monkeypatch.setattr(feed, "feed_episode_urls", _boom) assert ( transcribe_sources.expand_sources( - "https://feeds.example.com/show", from_stdin=False, sample=False, detect_feeds=False + ["https://feeds.example.com/show"], from_stdin=False, sample=False, detect_feeds=False ) is None ) diff --git a/tests/test_transcribe_reduce.py b/tests/test_transcribe_reduce.py index 412b5be8..410a99b3 100644 --- a/tests/test_transcribe_reduce.py +++ b/tests/test_transcribe_reduce.py @@ -47,7 +47,7 @@ def _ndjson(result): _DEFAULT_OPTS = transcribe_run.TranscribeOptions( - source=None, + sources=[], sample=False, from_stdin=False, concurrency=2,