From 61461de2ef83fc4caa017556e0e18197c2ce839b Mon Sep 17 00:00:00 2001
From: Alex Kroman <alex@assemblyai.com>
Date: Wed, 10 Jun 2026 07:57:20 -0700
Subject: [PATCH] Fix YouTube --show-code, Vercel deploy, stream poll default,
 --llm help

- code_gen: download YouTube audio with yt-dlp before upload (raw URL fails)
- init templates: ship vercel.json pinning the FastAPI framework preset so
  `aai deploy` doesn't hit Vercel's "services" framework error
- stream: default --llm-interval 30s -> 10s
- main: drop the redundant Authentication block from `aai --help`

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 aai_cli/code_gen/transcribe.py                | 39 ++++++++++++++-----
 aai_cli/commands/stream.py                    |  2 +-
 .../templates/audio-transcription/README.md   |  6 +--
 .../templates/audio-transcription/vercel.json |  4 ++
 .../init/templates/live-captions/README.md    |  9 +++--
 .../init/templates/live-captions/vercel.json  |  4 ++
 aai_cli/init/templates/voice-agent/README.md  |  9 +++--
 .../init/templates/voice-agent/vercel.json    |  4 ++
 aai_cli/main.py                               |  5 +--
 .../test_cli_output_snapshots.ambr            |  2 +-
 tests/test_code_gen.py                        | 32 +++++++++++++++
 tests/test_help_rendering.py                  | 10 -----
 tests/test_init_scaffold.py                   |  4 +-
 tests/test_init_template_contract.py          | 15 +++++++
 14 files changed, 108 insertions(+), 37 deletions(-)
 create mode 100644 aai_cli/init/templates/audio-transcription/vercel.json
 create mode 100644 aai_cli/init/templates/live-captions/vercel.json
 create mode 100644 aai_cli/init/templates/voice-agent/vercel.json

diff --git a/aai_cli/code_gen/transcribe.py b/aai_cli/code_gen/transcribe.py
index 8c1ab5fa..e191fe95 100644
--- a/aai_cli/code_gen/transcribe.py
+++ b/aai_cli/code_gen/transcribe.py
@@ -2,7 +2,7 @@
 
 from typing import cast
 
-from aai_cli import environments, llm
+from aai_cli import environments, llm, youtube
 from aai_cli.code_gen import serialize, snippets
 
 # ``-o/--output`` choice -> printed-result code, mirroring the run path's
@@ -40,21 +40,29 @@ def render(
     """
     if output is not None:
         llm_gateway = None  # `-o` returns before the chain runs in the real command
+    is_youtube = youtube.is_youtube_url(source)
     parts = (
-        _header_block(llm_gateway, output)
-        + _transcribe_block(merged, source)
+        _header_block(llm_gateway, output, is_youtube=is_youtube)
+        + _transcribe_block(merged, source, is_youtube=is_youtube)
         + _result_block(merged, llm_gateway, output)
     )
     parts.append("")
     return "\n".join(parts)
 
 
-def _header_block(llm_gateway: dict[str, object] | None, output: str | None) -> list[str]:
+def _header_block(
+    llm_gateway: dict[str, object] | None, output: str | None, *, is_youtube: bool
+) -> list[str]:
     """Imports plus the api-key (and non-default environment) settings lines."""
     stdlib_imports = ["import os"]
+    if is_youtube:
+        # The YouTube path downloads audio to a temp dir before uploading.
+        stdlib_imports += ["import tempfile"]
     if output == "json":
         stdlib_imports.insert(0, "import json")
     imports = ["import assemblyai as aai"]
+    if is_youtube:
+        imports.append("import yt_dlp")
     if llm_gateway:
         imports.append("from openai import OpenAI")
     parts = [
@@ -73,20 +81,33 @@ def _header_block(llm_gateway: dict[str, object] | None, output: str | None) ->
     return parts
 
 
-def _transcribe_block(merged: dict[str, object], source: str) -> list[str]:
+def _transcribe_block(merged: dict[str, object], source: str, *, is_youtube: bool) -> list[str]:
     """The transcriber setup, optional config, the transcribe call, and error check."""
     parts = ["", "transcriber = aai.Transcriber()"]
+    config_arg = ""
     if merged:
         kwargs = "\n".join(serialize.config_kwarg_lines(merged, indent=4))
         parts += ["", f"config = aai.TranscriptionConfig(\n{kwargs}\n)"]
-        call = f"transcript = transcriber.transcribe({source!r}, config=config)"
+        config_arg = ", config=config"
+    if is_youtube:
+        # AssemblyAI can't read a YouTube watch URL itself, so download the audio
+        # with yt-dlp into a temp dir and upload the local file — what the CLI does.
+        parts += [
+            "",
+            "# AssemblyAI can't fetch a YouTube URL itself; download the audio first.",
+            "with tempfile.TemporaryDirectory() as _tmp:",
+            "    with yt_dlp.YoutubeDL(",
+            '        {"format": "bestaudio/best", "outtmpl": f"{_tmp}/%(id)s.%(ext)s"}',
+            "    ) as _ydl:",
+            f"        _info = _ydl.extract_info({source!r}, download=True)",
+            "        _audio = _ydl.prepare_filename(_info)",
+            f"    transcript = transcriber.transcribe(_audio{config_arg})",
+        ]
     else:
-        call = f"transcript = transcriber.transcribe({source!r})"
+        parts += ["", f"transcript = transcriber.transcribe({source!r}{config_arg})"]
     return [
         *parts,
         "",
-        call,
-        "",
         "if transcript.status == aai.TranscriptStatus.error:",
         "    raise RuntimeError(transcript.error)",
         "",
diff --git a/aai_cli/commands/stream.py b/aai_cli/commands/stream.py
index dc74b610..d650bf15 100644
--- a/aai_cli/commands/stream.py
+++ b/aai_cli/commands/stream.py
@@ -290,7 +290,7 @@ def stream(
         rich_help_panel=help_panels.OPT_LLM,
     ),
     llm_interval: float = typer.Option(
-        30.0,
+        10.0,
         "--llm-interval",
         help="Seconds between --llm summary refreshes (0 refreshes on every turn).",
         min=0.0,
diff --git a/aai_cli/init/templates/audio-transcription/README.md b/aai_cli/init/templates/audio-transcription/README.md
index 83f559d7..628d5e40 100644
--- a/aai_cli/init/templates/audio-transcription/README.md
+++ b/aai_cli/init/templates/audio-transcription/README.md
@@ -16,9 +16,9 @@ aai dev   # installs deps if needed, starts the server, opens http://localhost:3
 
 Push this folder to a Git repo and import it on Vercel. Set `ASSEMBLYAI_API_KEY`
 as a Vercel environment variable (the local `.env` is git-ignored and not deployed).
-No extra config is needed (no `vercel.json`): Vercel runs `api/index.py` as the
-function, and that FastAPI app serves both the page and assets (from `static/`)
-and the API.
+The shipped `vercel.json` pins the FastAPI framework preset, so Vercel builds
+`api/index.py` as the function and routes every request to that FastAPI app, which
+serves both the page and assets (from `static/`) and the API.
 
 ## Deploy elsewhere
 
diff --git a/aai_cli/init/templates/audio-transcription/vercel.json b/aai_cli/init/templates/audio-transcription/vercel.json
new file mode 100644
index 00000000..10e8a7c1
--- /dev/null
+++ b/aai_cli/init/templates/audio-transcription/vercel.json
@@ -0,0 +1,4 @@
+{
+  "$schema": "https://openapi.vercel.sh/vercel.json",
+  "framework": "fastapi"
+}
diff --git a/aai_cli/init/templates/live-captions/README.md b/aai_cli/init/templates/live-captions/README.md
index 8186be7e..52336b0b 100644
--- a/aai_cli/init/templates/live-captions/README.md
+++ b/aai_cli/init/templates/live-captions/README.md
@@ -17,10 +17,11 @@ aai dev   # opens http://localhost:3000 (allow microphone access)
 ## Deploy to Vercel
 
 Push this folder to a Git repo and import it on Vercel. Set `ASSEMBLYAI_API_KEY` as a
-Vercel environment variable (the local `.env` is git-ignored). No extra config is needed
-(no `vercel.json`): Vercel runs `api/index.py` as the function, and that FastAPI app
-serves the page and assets (from `static/`) plus the `/api/token` route. The WebSocket
-runs browser → AssemblyAI, so nothing long-running is needed.
+Vercel environment variable (the local `.env` is git-ignored). The shipped `vercel.json`
+pins the FastAPI framework preset, so Vercel builds `api/index.py` as the function and
+routes every request to that FastAPI app, which serves the page and assets (from
+`static/`) plus the `/api/token` route. The WebSocket runs browser → AssemblyAI, so
+nothing long-running is needed.
 
 ## Deploy elsewhere
 
diff --git a/aai_cli/init/templates/live-captions/vercel.json b/aai_cli/init/templates/live-captions/vercel.json
new file mode 100644
index 00000000..10e8a7c1
--- /dev/null
+++ b/aai_cli/init/templates/live-captions/vercel.json
@@ -0,0 +1,4 @@
+{
+  "$schema": "https://openapi.vercel.sh/vercel.json",
+  "framework": "fastapi"
+}
diff --git a/aai_cli/init/templates/voice-agent/README.md b/aai_cli/init/templates/voice-agent/README.md
index 8f4103e7..d05141bf 100644
--- a/aai_cli/init/templates/voice-agent/README.md
+++ b/aai_cli/init/templates/voice-agent/README.md
@@ -18,10 +18,11 @@ The Voice Agent API requires a plan with access enabled.
 ## Deploy to Vercel
 
 Push this folder to a Git repo and import it on Vercel. Set `ASSEMBLYAI_API_KEY` as a
-Vercel environment variable (the local `.env` is git-ignored). No extra config is needed
-(no `vercel.json`): Vercel runs `api/index.py` as the function, and that FastAPI app
-serves the page and assets (from `static/`) plus the `/api/token` route. The WebSocket
-runs browser → AssemblyAI, so nothing long-running is needed.
+Vercel environment variable (the local `.env` is git-ignored). The shipped `vercel.json`
+pins the FastAPI framework preset, so Vercel builds `api/index.py` as the function and
+routes every request to that FastAPI app, which serves the page and assets (from
+`static/`) plus the `/api/token` route. The WebSocket runs browser → AssemblyAI, so
+nothing long-running is needed.
 
 ## Deploy elsewhere
 
diff --git a/aai_cli/init/templates/voice-agent/vercel.json b/aai_cli/init/templates/voice-agent/vercel.json
new file mode 100644
index 00000000..10e8a7c1
--- /dev/null
+++ b/aai_cli/init/templates/voice-agent/vercel.json
@@ -0,0 +1,4 @@
+{
+  "$schema": "https://openapi.vercel.sh/vercel.json",
+  "framework": "fastapi"
+}
diff --git a/aai_cli/main.py b/aai_cli/main.py
index ff881efe..11bd8f11 100644
--- a/aai_cli/main.py
+++ b/aai_cli/main.py
@@ -254,10 +254,7 @@ def _offer_or_help(ctx: typer.Context, state: AppState) -> None:
                 'aai transcribe call.mp3 --llm "summarize action items"',
             ),
         ]
-    )
-    + "\n\n[bold]Authentication[/bold]\n\n"
-    "Run 'aai login', or set ASSEMBLYAI_API_KEY (used before the stored key). "
-    "--env or AAI_ENV selects the backend: production, sandbox000.",
+    ),
 )
 def main(
     ctx: typer.Context,
diff --git a/tests/__snapshots__/test_cli_output_snapshots.ambr b/tests/__snapshots__/test_cli_output_snapshots.ambr
index 92fd6ac8..d41d7ae9 100644
--- a/tests/__snapshots__/test_cli_output_snapshots.ambr
+++ b/tests/__snapshots__/test_cli_output_snapshots.ambr
@@ -756,7 +756,7 @@
   │ --llm-interval        FLOAT RANGE [x>=0.0]  Seconds between --llm summary    │
   │                                             refreshes (0 refreshes on every  │
   │                                             turn).                           │
-  │                                             [default: 30.0]                  │
+  │                                             [default: 10.0]                  │
   │ --model               TEXT                  LLM Gateway model.               │
   │                                             [default:                        │
   │                                             claude-haiku-4-5-20251001]       │
diff --git a/tests/test_code_gen.py b/tests/test_code_gen.py
index dc4b69bd..70e2bb34 100644
--- a/tests/test_code_gen.py
+++ b/tests/test_code_gen.py
@@ -102,6 +102,38 @@ def test_transcribe_render_no_config_is_minimal():
     assert "TranscriptionConfig(" not in code  # no kwargs -> no config object
 
 
+def test_transcribe_render_youtube_downloads_before_upload():
+    # AssemblyAI can't fetch a YouTube watch URL itself, so the generated script must
+    # download the audio with yt-dlp first and upload the local file (mirroring the CLI),
+    # not hand the raw URL to transcribe() — which would fail with a download error.
+    code = code_gen.transcribe({}, source="https://www.youtube.com/watch?v=ZRcpnM26nJM")
+    ast.parse(code)
+    assert "import yt_dlp" in code
+    assert "import tempfile" in code
+    assert "yt_dlp.YoutubeDL(" in code
+    assert "extract_info('https://www.youtube.com/watch?v=ZRcpnM26nJM', download=True)" in code
+    # The transcribe call takes the downloaded local path, never the YouTube URL.
+    assert "transcriber.transcribe(_audio)" in code
+    assert "transcribe('https://www.youtube.com" not in code
+    assert 'transcribe("https://www.youtube.com' not in code
+
+
+def test_transcribe_render_youtube_passes_config_to_local_upload():
+    # With a config object the download still wraps the upload, and config flows through.
+    code = code_gen.transcribe({"speaker_labels": True}, source="https://youtu.be/abc123")
+    ast.parse(code)
+    assert "transcriber.transcribe(_audio, config=config)" in code
+
+
+def test_transcribe_render_plain_url_is_not_downloaded():
+    # A non-YouTube http(s) URL is uploaded straight through — no yt-dlp scaffolding.
+    code = code_gen.transcribe({}, source="https://assembly.ai/wildfires.mp3")
+    ast.parse(code)
+    assert "yt_dlp" not in code
+    assert "tempfile" not in code
+    assert "transcriber.transcribe('https://assembly.ai/wildfires.mp3')" in code
+
+
 def test_stream_render_parses_and_is_runnable_shape():
     from assemblyai.streaming.v3 import SpeechModel
 
diff --git a/tests/test_help_rendering.py b/tests/test_help_rendering.py
index eef39823..c467d816 100644
--- a/tests/test_help_rendering.py
+++ b/tests/test_help_rendering.py
@@ -37,16 +37,6 @@ def test_no_flag_name_is_clipped_at_80_columns(argv):
     assert not _CLIPPED_FLAG.search(plain), _CLIPPED_FLAG.search(plain)
 
 
-def test_root_help_documents_authentication():
-    # The shared "how do I authenticate / pick a backend" line: the env vars must be
-    # discoverable from the CLI itself, not only from external docs.
-    result = runner.invoke(app, ["--help"])
-    assert result.exit_code == 0
-    plain = _plain(result.output)
-    assert "ASSEMBLYAI_API_KEY" in plain
-    assert "AAI_ENV" in plain
-
-
 def test_unknown_flag_suggestion_renders_clean():
     # Vendored Click formats this as a stringified 1-tuple ("('(Possible options:
     # --json)',)"); main.py folds the suggestion into the message instead.
diff --git a/tests/test_init_scaffold.py b/tests/test_init_scaffold.py
index 2c6f25a7..f28d20f8 100644
--- a/tests/test_init_scaffold.py
+++ b/tests/test_init_scaffold.py
@@ -33,7 +33,9 @@ def test_scaffold_copies_files_and_renames_dotfiles(tmp_path):
     scaffold.scaffold("audio-transcription", target, api_key="sk-real-key")
     assert (target / "api" / "index.py").exists()
     assert (target / "static" / "index.html").exists()
-    assert not (target / "vercel.json").exists()
+    # vercel.json ships in the scaffold: it pins the FastAPI framework preset so the
+    # `aai deploy` -> `vercel deploy` path doesn't auto-detect the "services" framework.
+    assert (target / "vercel.json").exists()
     # dotfile templates are renamed to their dotted names
     assert (target / ".gitignore").exists()
     assert (target / ".env.example").exists()
diff --git a/tests/test_init_template_contract.py b/tests/test_init_template_contract.py
index 4468caeb..e4b85089 100644
--- a/tests/test_init_template_contract.py
+++ b/tests/test_init_template_contract.py
@@ -1,4 +1,5 @@
 import ast
+import json
 import re
 from pathlib import Path
 
@@ -36,10 +37,24 @@ def test_required_files_present(template_dir):
         "runtime.txt",
         "Dockerfile",
         "dockerignore",
+        "vercel.json",
     ):
         assert (template_dir / rel).exists(), f"{template_dir.name} missing {rel}"
 
 
+def test_vercel_json_pins_fastapi_framework(template_dir):
+    """Vercel's zero-config Python detection now resolves an `api/` + Dockerfile layout
+    to the multi-service `services` framework, which fails the deploy ("no services
+    declared"). Pinning the FastAPI preset makes Vercel build `api/index.py` and route
+    every request to the ASGI app — and stops auto-detection from ever picking
+    `services` again."""
+    config = json.loads((template_dir / "vercel.json").read_text())
+    assert config.get("framework") == "fastapi", (
+        f'{template_dir.name}: vercel.json must pin "framework": "fastapi" so Vercel '
+        f'never auto-detects the "services" framework; got {config.get("framework")!r}'
+    )
+
+
 def test_dockerfile_runs_uvicorn_on_platform_port(template_dir):
     """Fly/Railway/Render(Docker)/Cloudflare-Containers build this image. It must run
     uvicorn on the app, bind 0.0.0.0, and honor the platform's injected ${PORT}."""