AssemblyAI · alexkroman · Jun 16, 2026 · Jun 16, 2026 · Jun 16, 2026
diff --git a/README.md b/README.md
@@ -134,16 +134,33 @@ assembly stream -o text | grep --line-buffered -i alex \
 assembly transcribe --sample --llm "summarize" --llm "translate the summary to French"
 ```
 
-**Score diarization quality across several videos** — pipe a list of URLs into batch mode (`--from-stdin`), transcribe them in parallel with speaker labels, have an LLM judge each transcript, then use `--llm-reduce` to run one prompt over all the results for a single aggregate verdict:
+**Score diarization quality across several videos** — pass a hand-picked list of URLs straight on the command line (batch mode), transcribe them in parallel with speaker labels, have an LLM judge each transcript, then use `--llm-reduce` to run one prompt over all the results for a single aggregate verdict:
 
 ```sh
-printf '%s\n' \
+assembly transcribe \
   https://youtu.be/RC5zRvqnRm8 \
   https://youtu.be/u9S41Kplsbs \
   https://youtu.be/mP31CdpGzUY \
-| assembly transcribe --from-stdin --concurrency 3 --speaker-labels \
-    --llm 'Judge diarization quality; output JSON {speaker_count, issues, score}' \
-    --llm-reduce 'Rank these videos worst-to-best and summarize the failure modes'
+  --concurrency 3 --speaker-labels \
+  --llm 'Judge diarization quality; output JSON {speaker_count, issues, score}' \
+  --llm-reduce 'Rank these videos worst-to-best and summarize the failure modes'
+```
+
+(Prefer to stream a generated list in? `--from-stdin` reads one source per line, so `find . -name '*.wav' | assembly transcribe --from-stdin …` works too.)
+
+**Map-reduce a batch of talks** — extract structured notes from each video (`--llm`, a map), then reduce across all of them with a stronger model (`--model`):
+
+```sh
+assembly transcribe \
+  https://youtu.be/LCEmiRjPEtQ \
+  https://youtu.be/1yvBqasHLZs \
+  https://youtu.be/MiqLoAZFRSE \
+  https://youtu.be/s7_NlkBwdj8 \
+  https://youtu.be/60iW8FZ7MJU \
+  https://youtu.be/V979Wd1gmTU \
+  --concurrency 6 \
+  --llm 'Extract JSON {thesis, key_claims[]}' \
+  --llm-reduce 'Where do the speakers disagree?' --model claude-opus-4-7
 ```
 
 **Summarize your recent transcripts and surface the themes** — pipe a list of past transcripts into `transcripts get`, summarize each (`--llm`, a map), then reduce them all into one answer (`--llm-reduce`):
@@ -285,10 +302,12 @@ assembly transcribe video.mp4 -o srt   # captions
 assembly transcribe call.mp3 --speaker-labels --summarization --json
 ```
 
-Transcribe in batches — a directory, a glob, a piped list, or a whole podcast
-RSS feed (every episode becomes one source), resumable on re-run:
+Transcribe in batches — a hand-picked list, a directory, a glob, a piped list,
+or a whole podcast RSS feed (every episode becomes one source), resumable on
+re-run:
 
 ```sh
+assembly transcribe a.mp3 b.mp3 https://youtu.be/dtp6b76pMak   # a hand-picked list
 assembly transcribe ./recordings
 assembly transcribe "s3://bucket/calls/*.mp3"   # needs: pip install s3fs
 assembly transcribe "https://feeds.simplecast.com/54nAGcIl"   # every episode in the feed

diff --git a/aai_cli/app/transcribe/run.py b/aai_cli/app/transcribe/run.py
@@ -185,7 +185,7 @@ class TranscribeOptions:
     ``json_mode`` argument), so a test can describe an invocation without argv.
     """
 
-    source: str | None
+    sources: list[str]
     sample: bool
     from_stdin: bool
     concurrency: int
@@ -237,6 +237,17 @@ class TranscribeOptions:
     out: Path | None
     show_code: bool
 
+    @property
+    def single_source(self) -> str | None:
+        """The lone source for the single-source path, or ``None``.
+
+        The positional argument is variadic: zero sources (``--sample`` or audio
+        piped on stdin) or exactly one feed the single-source path; two or more
+        always route to batch (see ``expand_sources``), so this collapses the 0/1
+        case to the scalar the single-source helpers expect.
+        """
+        return self.sources[0] if len(self.sources) == 1 else None
+
     def flags(self, pii_policies: list[str] | None) -> dict[str, object]:
         """The curated flags in TranscriptionConfig field names (None = unset)."""
         flags: dict[str, object] = {
@@ -302,14 +313,15 @@ def _print_show_code(opts: TranscribeOptions, merged: dict[str, object]) -> None
     Raw stdout, so `--show-code > script.py` runs. No source/--sample needed — fall
     back to a placeholder path for a pure snippet.
     """
-    if opts.source and remotefs.is_remote_url(opts.source):
+    source = opts.single_source
+    if source and remotefs.is_remote_url(source):
         raise UsageError(
             "--show-code does not support bucket URLs (s3://, gs://, …) yet.",
             suggestion="Download the audio first and pass the local file.",
         )
     audio = (
-        client.resolve_audio_source(opts.source, sample=opts.sample, check_local=False)
-        if opts.source or opts.sample
+        client.resolve_audio_source(source, sample=opts.sample, check_local=False)
+        if source or opts.sample
         else "your-audio-file.mp3"
     )
     gateway = code_gen.gateway_options(
@@ -348,30 +360,30 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool)
     # --download-sections only slices a downloadable-URL fetch; for a local file,
     # stdin, remote bucket, or directory batch it would be dropped silently — reject
     # it up front like `clip`/`dub` rather than billing a full-file transcription.
-    youtube.validate_sections_flag(opts.source, list(opts.download_sections or []))
+    youtube.validate_sections_flag(opts.single_source, list(opts.download_sections or []))
 
     merged = config_builder.merge_transcribe_config(
         flags=flags, overrides=opts.config_kv, config_file=opts.config_file
     )
     transcribe_validate.validate_speakers_expected(merged)
 
-    sources = transcribe_sources.expand_sources(
-        opts.source,
+    batch_sources = transcribe_sources.expand_sources(
+        opts.sources,
         from_stdin=opts.from_stdin,
         sample=opts.sample,
         # --show-code must never touch the network; skip the feed probe and treat a
         # URL as a single source for code generation.
         detect_feeds=not opts.show_code,
     )
-    if sources is not None:
+    if batch_sources is not None:
         transcribe_sources.reject_single_source_flags(
             out=opts.out,
             output_field=opts.output_field,
             show_code=opts.show_code,
         )
         transcribe_batch.run_batch(
             state.resolve_api_key(),
-            sources,
+            batch_sources,
             transcription_config=config_builder.construct_transcription_config(merged),
             concurrency=opts.concurrency,
             force=opts.force,
@@ -388,16 +400,16 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool)
     tc = config_builder.construct_transcription_config(merged)
 
     # A typo'd path must read as "file not found", not trigger a login.
-    check_source_exists(opts.source, sample=opts.sample)
+    check_source_exists(opts.single_source, sample=opts.sample)
     transcribe_validate.warn_unrecognized_extension(
-        opts.source, json_mode=json_mode, quiet=state.quiet
+        opts.single_source, json_mode=json_mode, quiet=state.quiet
     )
 
     api_key = state.resolve_api_key()
     with output.status("Transcribing…", json_mode=json_mode, quiet=state.quiet):
         transcript = run_transcription(
             api_key,
-            opts.source,
+            opts.single_source,
             sample=opts.sample,
             transcription_config=tc,
             download_sections=list(opts.download_sections or []),

diff --git a/aai_cli/app/transcribe/sources.py b/aai_cli/app/transcribe/sources.py
@@ -50,20 +50,25 @@
 
 
 def expand_sources(
-    source: str | None, *, from_stdin: bool, sample: bool, detect_feeds: bool = True
+    sources: list[str], *, from_stdin: bool, sample: bool, detect_feeds: bool = True
 ) -> list[str] | None:
     """The batch source list, or ``None`` when this is a single-source invocation.
 
-    Batch mode triggers on ``--from-stdin``, a directory (scanned recursively for
-    audio files), a glob pattern that names no existing file, a bucket URL that is
-    a glob or trailing-slash folder, or an http(s) URL that turns out to be a
-    podcast RSS/Atom feed (each episode becomes one batch source). A plain file,
-    direct media URL, ``-`` (audio piped on stdin), or ``--sample`` stays on the
-    single-source path. ``detect_feeds=False`` skips the feed probe (and its
-    network fetch) for paths that must not touch the network, e.g. ``--show-code``.
+    Batch mode triggers on ``--from-stdin``, **two or more positional sources**
+    (each taken literally — a hand-picked list, no glob/feed expansion), a
+    directory (scanned recursively for audio files), a glob pattern that names no
+    existing file, a bucket URL that is a glob or trailing-slash folder, or an
+    http(s) URL that turns out to be a podcast RSS/Atom feed (each episode becomes
+    one batch source). A lone plain file, direct media URL, ``-`` (audio piped on
+    stdin), or ``--sample`` stays on the single-source path. ``detect_feeds=False``
+    skips the feed probe (and its network fetch) for paths that must not touch the
+    network, e.g. ``--show-code``.
     """
     if from_stdin:
-        return _stdin_sources(source, sample=sample)
+        return _stdin_sources(sources, sample=sample)
+    if len(sources) > 1:
+        return _explicit_sources(sources, sample=sample)
+    source = sources[0] if sources else None
     # `not source` (rather than `is None`) also catches the empty string — e.g. an
     # unset shell variable in `assembly transcribe "$FILE"`. `Path("")` is `Path(".")`,
     # so it would otherwise fall into the directory branch and batch-transcribe the
@@ -92,8 +97,20 @@ def _local_sources(source: str) -> list[str] | None:
     return None
 
 
-def _stdin_sources(source: str | None, *, sample: bool) -> list[str]:
-    if source is not None or sample:
+def _explicit_sources(sources: list[str], *, sample: bool) -> list[str]:
+    """Several explicit positional sources (``assembly transcribe a.mp3 b.mp3 …``).
+
+    An as-is batch list, so a caller can hand-pick files/URLs without quoting a glob
+    or piping a stdin list. Each is taken literally — no per-source glob, directory
+    scan, or feed probe — since the user already enumerated exactly what to run.
+    """
+    if sample:
+        raise UsageError("Pass either --sample or your own sources, not both.")
+    return list(dict.fromkeys(sources))  # dedupe, keep order
+
+
+def _stdin_sources(sources: list[str], *, sample: bool) -> list[str]:
+    if sources or sample:
         raise UsageError(
             "--from-stdin reads sources from stdin; don't also pass a source or --sample."
         )

diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py
@@ -27,6 +27,10 @@
             ("Transcribe a local file", "assembly transcribe call.mp3"),
             ("Batch-transcribe a folder", "assembly transcribe ./recordings"),
             ("Batch-transcribe a glob", 'assembly transcribe "calls/*.mp3"'),
+            (
+                "Batch-transcribe a hand-picked list",
+                "assembly transcribe a.mp3 https://youtu.be/dtp6b76pMak --concurrency 3",
+            ),
             ("Batch-transcribe an S3 prefix", 'assembly transcribe "s3://bucket/calls/*.mp3"'),
             ("Try it with the hosted sample", "assembly transcribe --sample"),
             ("Transcribe a YouTube video", "assembly transcribe https://youtu.be/dtp6b76pMak"),
@@ -45,10 +49,11 @@
 )
 def transcribe(
     ctx: typer.Context,
-    source: str | None = typer.Argument(
+    sources: list[str] | None = typer.Argument(
         None,
         help="Audio file, URL, YouTube/podcast URL, podcast RSS feed, bucket URL "
-        "(s3://, gs://, …), or a directory/glob (batch mode)",
+        "(s3://, gs://, …), or a directory/glob (batch mode). Pass several to "
+        "batch-transcribe a hand-picked list",
     ),
     sample: bool = typer.Option(False, "--sample", help="Use the hosted wildfires.mp3 sample"),
     # batch mode
@@ -365,12 +370,12 @@ def transcribe(
     Save with --out FILE, or pipe one field with -o text. YouTube and podcast-page
     URLs (any page yt-dlp can extract) are downloaded first, then transcribed.
 
-    Batch mode: pass a directory or glob (or pipe a list with --from-stdin) to
-    transcribe many sources concurrently. A podcast RSS/Atom feed URL also expands
-    to batch mode — every episode enclosure becomes one source. Each source gets a
-    .aai.json sidecar with the full result (including any --llm responses), and a
-    re-run skips sources already transcribed — with changed --llm prompts it
-    replays just the LLM step, never a second transcription.
+    Batch mode: pass several sources, a directory or glob (or pipe a list with
+    --from-stdin) to transcribe many sources concurrently. A podcast RSS/Atom feed
+    URL also expands to batch mode — every episode enclosure becomes one source.
+    Each source gets a .aai.json sidecar with the full result (including any --llm
+    responses), and a re-run skips sources already transcribed — with changed
+    --llm prompts it replays just the LLM step, never a second transcription.
 
     Bucket URLs (s3://, gs://, az://, sftp://, …) work for single files and for
     batches (a glob, or a folder ending in /); install the matching fsspec
@@ -379,7 +384,7 @@ def transcribe(
     Curated flags cover common features; --config KEY=VALUE and --config-file reach every other field. Analysis (summary, chapters, ...) renders in human mode.
     """
     opts = transcribe_exec.TranscribeOptions(
-        source=source,
+        sources=list(sources) if sources else [],
         sample=sample,
         from_stdin=from_stdin,
         concurrency=concurrency,

diff --git a/aai_cli/skills/aai-cli/references/transcription.md b/aai_cli/skills/aai-cli/references/transcription.md
@@ -5,11 +5,14 @@ Five commands. All accept `--json` (auto-enabled when piped); `transcribe`,
 `transcribe`, `stream`, and `agent` accept `--show-code` to print equivalent
 Python SDK code without calling the API.
 
-## `assembly transcribe [SOURCE]` — file / URL / YouTube / podcast page / RSS feed
+## `assembly transcribe [SOURCE]...` — file / URL / YouTube / podcast page / RSS feed
 
 `SOURCE` is a local file path, public URL, or a media-page URL yt-dlp can extract
 (YouTube, Apple Podcasts, Spreaker, SoundCloud, …) — those are downloaded first.
-A podcast RSS/Atom feed URL expands into a resumable batch run over every episode
+Pass **several sources** to batch-transcribe a hand-picked list on the command
+line (each taken literally, no glob/feed expansion) — the clean alternative to
+`--from-stdin`. A directory, glob, or bucket folder also expands to a batch, and a
+podcast RSS/Atom feed URL expands into a resumable batch run over every episode
 enclosure (one `.aai.json` sidecar apiece). Use `--sample` for the hosted
 `wildfires.mp3`. Analysis results (summary, chapters, sentiment, …) render
 automatically in human mode.
@@ -39,6 +42,7 @@ assembly transcribe --sample
 assembly transcribe call.mp3 --speaker-labels --speakers-expected 2 --redact-pii
 assembly transcribe call.mp3 -o text
 assembly transcribe call.mp3 --show-code
+assembly transcribe a.mp3 b.mp3 https://youtu.be/dtp6b76pMak --concurrency 3   # hand-picked batch
 assembly transcribe "https://feeds.simplecast.com/54nAGcIl"   # every episode in the feed
 ```