From 61461de2ef83fc4caa017556e0e18197c2ce839b Mon Sep 17 00:00:00 2001 From: Alex Kroman Date: Wed, 10 Jun 2026 07:57:20 -0700 Subject: [PATCH] Fix YouTube --show-code, Vercel deploy, stream poll default, --llm help - code_gen: download YouTube audio with yt-dlp before upload (raw URL fails) - init templates: ship vercel.json pinning the FastAPI framework preset so `aai deploy` doesn't hit Vercel's "services" framework error - stream: default --llm-interval 30s -> 10s - main: drop the redundant Authentication block from `aai --help` Co-Authored-By: Claude Opus 4.8 (1M context) --- aai_cli/code_gen/transcribe.py | 39 ++++++++++++++----- aai_cli/commands/stream.py | 2 +- .../templates/audio-transcription/README.md | 6 +-- .../templates/audio-transcription/vercel.json | 4 ++ .../init/templates/live-captions/README.md | 9 +++-- .../init/templates/live-captions/vercel.json | 4 ++ aai_cli/init/templates/voice-agent/README.md | 9 +++-- .../init/templates/voice-agent/vercel.json | 4 ++ aai_cli/main.py | 5 +-- .../test_cli_output_snapshots.ambr | 2 +- tests/test_code_gen.py | 32 +++++++++++++++ tests/test_help_rendering.py | 10 ----- tests/test_init_scaffold.py | 4 +- tests/test_init_template_contract.py | 15 +++++++ 14 files changed, 108 insertions(+), 37 deletions(-) create mode 100644 aai_cli/init/templates/audio-transcription/vercel.json create mode 100644 aai_cli/init/templates/live-captions/vercel.json create mode 100644 aai_cli/init/templates/voice-agent/vercel.json diff --git a/aai_cli/code_gen/transcribe.py b/aai_cli/code_gen/transcribe.py index 8c1ab5fa..e191fe95 100644 --- a/aai_cli/code_gen/transcribe.py +++ b/aai_cli/code_gen/transcribe.py @@ -2,7 +2,7 @@ from typing import cast -from aai_cli import environments, llm +from aai_cli import environments, llm, youtube from aai_cli.code_gen import serialize, snippets # ``-o/--output`` choice -> printed-result code, mirroring the run path's @@ -40,21 +40,29 @@ def render( """ if output is not None: llm_gateway = None # `-o` returns before the chain runs in the real command + is_youtube = youtube.is_youtube_url(source) parts = ( - _header_block(llm_gateway, output) - + _transcribe_block(merged, source) + _header_block(llm_gateway, output, is_youtube=is_youtube) + + _transcribe_block(merged, source, is_youtube=is_youtube) + _result_block(merged, llm_gateway, output) ) parts.append("") return "\n".join(parts) -def _header_block(llm_gateway: dict[str, object] | None, output: str | None) -> list[str]: +def _header_block( + llm_gateway: dict[str, object] | None, output: str | None, *, is_youtube: bool +) -> list[str]: """Imports plus the api-key (and non-default environment) settings lines.""" stdlib_imports = ["import os"] + if is_youtube: + # The YouTube path downloads audio to a temp dir before uploading. + stdlib_imports += ["import tempfile"] if output == "json": stdlib_imports.insert(0, "import json") imports = ["import assemblyai as aai"] + if is_youtube: + imports.append("import yt_dlp") if llm_gateway: imports.append("from openai import OpenAI") parts = [ @@ -73,20 +81,33 @@ def _header_block(llm_gateway: dict[str, object] | None, output: str | None) -> return parts -def _transcribe_block(merged: dict[str, object], source: str) -> list[str]: +def _transcribe_block(merged: dict[str, object], source: str, *, is_youtube: bool) -> list[str]: """The transcriber setup, optional config, the transcribe call, and error check.""" parts = ["", "transcriber = aai.Transcriber()"] + config_arg = "" if merged: kwargs = "\n".join(serialize.config_kwarg_lines(merged, indent=4)) parts += ["", f"config = aai.TranscriptionConfig(\n{kwargs}\n)"] - call = f"transcript = transcriber.transcribe({source!r}, config=config)" + config_arg = ", config=config" + if is_youtube: + # AssemblyAI can't read a YouTube watch URL itself, so download the audio + # with yt-dlp into a temp dir and upload the local file — what the CLI does. + parts += [ + "", + "# AssemblyAI can't fetch a YouTube URL itself; download the audio first.", + "with tempfile.TemporaryDirectory() as _tmp:", + " with yt_dlp.YoutubeDL(", + ' {"format": "bestaudio/best", "outtmpl": f"{_tmp}/%(id)s.%(ext)s"}', + " ) as _ydl:", + f" _info = _ydl.extract_info({source!r}, download=True)", + " _audio = _ydl.prepare_filename(_info)", + f" transcript = transcriber.transcribe(_audio{config_arg})", + ] else: - call = f"transcript = transcriber.transcribe({source!r})" + parts += ["", f"transcript = transcriber.transcribe({source!r}{config_arg})"] return [ *parts, "", - call, - "", "if transcript.status == aai.TranscriptStatus.error:", " raise RuntimeError(transcript.error)", "", diff --git a/aai_cli/commands/stream.py b/aai_cli/commands/stream.py index dc74b610..d650bf15 100644 --- a/aai_cli/commands/stream.py +++ b/aai_cli/commands/stream.py @@ -290,7 +290,7 @@ def stream( rich_help_panel=help_panels.OPT_LLM, ), llm_interval: float = typer.Option( - 30.0, + 10.0, "--llm-interval", help="Seconds between --llm summary refreshes (0 refreshes on every turn).", min=0.0, diff --git a/aai_cli/init/templates/audio-transcription/README.md b/aai_cli/init/templates/audio-transcription/README.md index 83f559d7..628d5e40 100644 --- a/aai_cli/init/templates/audio-transcription/README.md +++ b/aai_cli/init/templates/audio-transcription/README.md @@ -16,9 +16,9 @@ aai dev # installs deps if needed, starts the server, opens http://localhost:3 Push this folder to a Git repo and import it on Vercel. Set `ASSEMBLYAI_API_KEY` as a Vercel environment variable (the local `.env` is git-ignored and not deployed). -No extra config is needed (no `vercel.json`): Vercel runs `api/index.py` as the -function, and that FastAPI app serves both the page and assets (from `static/`) -and the API. +The shipped `vercel.json` pins the FastAPI framework preset, so Vercel builds +`api/index.py` as the function and routes every request to that FastAPI app, which +serves both the page and assets (from `static/`) and the API. ## Deploy elsewhere diff --git a/aai_cli/init/templates/audio-transcription/vercel.json b/aai_cli/init/templates/audio-transcription/vercel.json new file mode 100644 index 00000000..10e8a7c1 --- /dev/null +++ b/aai_cli/init/templates/audio-transcription/vercel.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://openapi.vercel.sh/vercel.json", + "framework": "fastapi" +} diff --git a/aai_cli/init/templates/live-captions/README.md b/aai_cli/init/templates/live-captions/README.md index 8186be7e..52336b0b 100644 --- a/aai_cli/init/templates/live-captions/README.md +++ b/aai_cli/init/templates/live-captions/README.md @@ -17,10 +17,11 @@ aai dev # opens http://localhost:3000 (allow microphone access) ## Deploy to Vercel Push this folder to a Git repo and import it on Vercel. Set `ASSEMBLYAI_API_KEY` as a -Vercel environment variable (the local `.env` is git-ignored). No extra config is needed -(no `vercel.json`): Vercel runs `api/index.py` as the function, and that FastAPI app -serves the page and assets (from `static/`) plus the `/api/token` route. The WebSocket -runs browser → AssemblyAI, so nothing long-running is needed. +Vercel environment variable (the local `.env` is git-ignored). The shipped `vercel.json` +pins the FastAPI framework preset, so Vercel builds `api/index.py` as the function and +routes every request to that FastAPI app, which serves the page and assets (from +`static/`) plus the `/api/token` route. The WebSocket runs browser → AssemblyAI, so +nothing long-running is needed. ## Deploy elsewhere diff --git a/aai_cli/init/templates/live-captions/vercel.json b/aai_cli/init/templates/live-captions/vercel.json new file mode 100644 index 00000000..10e8a7c1 --- /dev/null +++ b/aai_cli/init/templates/live-captions/vercel.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://openapi.vercel.sh/vercel.json", + "framework": "fastapi" +} diff --git a/aai_cli/init/templates/voice-agent/README.md b/aai_cli/init/templates/voice-agent/README.md index 8f4103e7..d05141bf 100644 --- a/aai_cli/init/templates/voice-agent/README.md +++ b/aai_cli/init/templates/voice-agent/README.md @@ -18,10 +18,11 @@ The Voice Agent API requires a plan with access enabled. ## Deploy to Vercel Push this folder to a Git repo and import it on Vercel. Set `ASSEMBLYAI_API_KEY` as a -Vercel environment variable (the local `.env` is git-ignored). No extra config is needed -(no `vercel.json`): Vercel runs `api/index.py` as the function, and that FastAPI app -serves the page and assets (from `static/`) plus the `/api/token` route. The WebSocket -runs browser → AssemblyAI, so nothing long-running is needed. +Vercel environment variable (the local `.env` is git-ignored). The shipped `vercel.json` +pins the FastAPI framework preset, so Vercel builds `api/index.py` as the function and +routes every request to that FastAPI app, which serves the page and assets (from +`static/`) plus the `/api/token` route. The WebSocket runs browser → AssemblyAI, so +nothing long-running is needed. ## Deploy elsewhere diff --git a/aai_cli/init/templates/voice-agent/vercel.json b/aai_cli/init/templates/voice-agent/vercel.json new file mode 100644 index 00000000..10e8a7c1 --- /dev/null +++ b/aai_cli/init/templates/voice-agent/vercel.json @@ -0,0 +1,4 @@ +{ + "$schema": "https://openapi.vercel.sh/vercel.json", + "framework": "fastapi" +} diff --git a/aai_cli/main.py b/aai_cli/main.py index ff881efe..11bd8f11 100644 --- a/aai_cli/main.py +++ b/aai_cli/main.py @@ -254,10 +254,7 @@ def _offer_or_help(ctx: typer.Context, state: AppState) -> None: 'aai transcribe call.mp3 --llm "summarize action items"', ), ] - ) - + "\n\n[bold]Authentication[/bold]\n\n" - "Run 'aai login', or set ASSEMBLYAI_API_KEY (used before the stored key). " - "--env or AAI_ENV selects the backend: production, sandbox000.", + ), ) def main( ctx: typer.Context, diff --git a/tests/__snapshots__/test_cli_output_snapshots.ambr b/tests/__snapshots__/test_cli_output_snapshots.ambr index 92fd6ac8..d41d7ae9 100644 --- a/tests/__snapshots__/test_cli_output_snapshots.ambr +++ b/tests/__snapshots__/test_cli_output_snapshots.ambr @@ -756,7 +756,7 @@ │ --llm-interval FLOAT RANGE [x>=0.0] Seconds between --llm summary │ │ refreshes (0 refreshes on every │ │ turn). │ - │ [default: 30.0] │ + │ [default: 10.0] │ │ --model TEXT LLM Gateway model. │ │ [default: │ │ claude-haiku-4-5-20251001] │ diff --git a/tests/test_code_gen.py b/tests/test_code_gen.py index dc4b69bd..70e2bb34 100644 --- a/tests/test_code_gen.py +++ b/tests/test_code_gen.py @@ -102,6 +102,38 @@ def test_transcribe_render_no_config_is_minimal(): assert "TranscriptionConfig(" not in code # no kwargs -> no config object +def test_transcribe_render_youtube_downloads_before_upload(): + # AssemblyAI can't fetch a YouTube watch URL itself, so the generated script must + # download the audio with yt-dlp first and upload the local file (mirroring the CLI), + # not hand the raw URL to transcribe() — which would fail with a download error. + code = code_gen.transcribe({}, source="https://www.youtube.com/watch?v=ZRcpnM26nJM") + ast.parse(code) + assert "import yt_dlp" in code + assert "import tempfile" in code + assert "yt_dlp.YoutubeDL(" in code + assert "extract_info('https://www.youtube.com/watch?v=ZRcpnM26nJM', download=True)" in code + # The transcribe call takes the downloaded local path, never the YouTube URL. + assert "transcriber.transcribe(_audio)" in code + assert "transcribe('https://www.youtube.com" not in code + assert 'transcribe("https://www.youtube.com' not in code + + +def test_transcribe_render_youtube_passes_config_to_local_upload(): + # With a config object the download still wraps the upload, and config flows through. + code = code_gen.transcribe({"speaker_labels": True}, source="https://youtu.be/abc123") + ast.parse(code) + assert "transcriber.transcribe(_audio, config=config)" in code + + +def test_transcribe_render_plain_url_is_not_downloaded(): + # A non-YouTube http(s) URL is uploaded straight through — no yt-dlp scaffolding. + code = code_gen.transcribe({}, source="https://assembly.ai/wildfires.mp3") + ast.parse(code) + assert "yt_dlp" not in code + assert "tempfile" not in code + assert "transcriber.transcribe('https://assembly.ai/wildfires.mp3')" in code + + def test_stream_render_parses_and_is_runnable_shape(): from assemblyai.streaming.v3 import SpeechModel diff --git a/tests/test_help_rendering.py b/tests/test_help_rendering.py index eef39823..c467d816 100644 --- a/tests/test_help_rendering.py +++ b/tests/test_help_rendering.py @@ -37,16 +37,6 @@ def test_no_flag_name_is_clipped_at_80_columns(argv): assert not _CLIPPED_FLAG.search(plain), _CLIPPED_FLAG.search(plain) -def test_root_help_documents_authentication(): - # The shared "how do I authenticate / pick a backend" line: the env vars must be - # discoverable from the CLI itself, not only from external docs. - result = runner.invoke(app, ["--help"]) - assert result.exit_code == 0 - plain = _plain(result.output) - assert "ASSEMBLYAI_API_KEY" in plain - assert "AAI_ENV" in plain - - def test_unknown_flag_suggestion_renders_clean(): # Vendored Click formats this as a stringified 1-tuple ("('(Possible options: # --json)',)"); main.py folds the suggestion into the message instead. diff --git a/tests/test_init_scaffold.py b/tests/test_init_scaffold.py index 2c6f25a7..f28d20f8 100644 --- a/tests/test_init_scaffold.py +++ b/tests/test_init_scaffold.py @@ -33,7 +33,9 @@ def test_scaffold_copies_files_and_renames_dotfiles(tmp_path): scaffold.scaffold("audio-transcription", target, api_key="sk-real-key") assert (target / "api" / "index.py").exists() assert (target / "static" / "index.html").exists() - assert not (target / "vercel.json").exists() + # vercel.json ships in the scaffold: it pins the FastAPI framework preset so the + # `aai deploy` -> `vercel deploy` path doesn't auto-detect the "services" framework. + assert (target / "vercel.json").exists() # dotfile templates are renamed to their dotted names assert (target / ".gitignore").exists() assert (target / ".env.example").exists() diff --git a/tests/test_init_template_contract.py b/tests/test_init_template_contract.py index 4468caeb..e4b85089 100644 --- a/tests/test_init_template_contract.py +++ b/tests/test_init_template_contract.py @@ -1,4 +1,5 @@ import ast +import json import re from pathlib import Path @@ -36,10 +37,24 @@ def test_required_files_present(template_dir): "runtime.txt", "Dockerfile", "dockerignore", + "vercel.json", ): assert (template_dir / rel).exists(), f"{template_dir.name} missing {rel}" +def test_vercel_json_pins_fastapi_framework(template_dir): + """Vercel's zero-config Python detection now resolves an `api/` + Dockerfile layout + to the multi-service `services` framework, which fails the deploy ("no services + declared"). Pinning the FastAPI preset makes Vercel build `api/index.py` and route + every request to the ASGI app — and stops auto-detection from ever picking + `services` again.""" + config = json.loads((template_dir / "vercel.json").read_text()) + assert config.get("framework") == "fastapi", ( + f'{template_dir.name}: vercel.json must pin "framework": "fastapi" so Vercel ' + f'never auto-detects the "services" framework; got {config.get("framework")!r}' + ) + + def test_dockerfile_runs_uvicorn_on_platform_port(template_dir): """Fly/Railway/Render(Docker)/Cloudflare-Containers build this image. It must run uvicorn on the app, bind 0.0.0.0, and honor the platform's injected ${PORT}."""