diff --git a/.importlinter b/.importlinter index 38ccd1be..3959aa5d 100644 --- a/.importlinter +++ b/.importlinter @@ -7,6 +7,7 @@ name = Core modules do not import command modules type = forbidden source_modules = aai_cli.agent + aai_cli.agent_exec aai_cli.argscan aai_cli.auth aai_cli.client @@ -24,11 +25,14 @@ source_modules = aai_cli.help_text aai_cli.init aai_cli.llm + aai_cli.llm_exec aai_cli.microphone aai_cli.options aai_cli.output aai_cli.render + aai_cli.speak_exec aai_cli.stdio + aai_cli.stream_exec aai_cli.streaming aai_cli.telemetry aai_cli.theme diff --git a/AGENTS.md b/AGENTS.md index b65cd506..7c49469a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -164,6 +164,8 @@ A Typer CLI. `aai_cli/main.py` builds the `app`, registers each command sub-app, Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `agent`, `speak`, `llm`, `transcripts`, `login` (login/logout/whoami), `doctor`, `init`, `dev`, `share`, `deploy`, `setup`, `onboard`, `account` (balance/usage/limits), `keys`, `sessions`, `audit`, `telemetry` (status/enable/disable)). Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps any `CLIError` to clean stderr output + the error's exit code. Commands never print tracebacks for expected failures. +**Options/run split for flag-heavy commands** (gh-CLI style): the Typer function only parses argv into a frozen `Options` dataclass and hands it to a module-level `run_(opts, state, *, json_mode)` through a thin lambda adapter in `run_command(ctx, ..., json=...)`. The five run commands follow it — `aai_cli/stream_exec.py` (the reference implementation), `transcribe_exec.py`, `agent_exec.py`, `speak_exec.py`, `llm_exec.py`. Because the run path is a plain function of data, tests construct options directly (`dataclasses.replace` off a defaults instance, see `tests/test_stream_exec.py` and `tests/test_command_options_seam.py`) instead of round-tripping argv through `CliRunner` — which is also the cheap way to kill mutation-gate mutants on orchestration lines. Follow this for new or heavily-reworked commands with long bodies; small commands keep the inline `body()` closure — the dataclass is pure ceremony there. + ### Cross-cutting state (resolution order matters) - **`context.py`** — `AppState` (profile, env) is attached to the Typer context in the root `@app.callback()`. `run_command` is the standard command wrapper. diff --git a/aai_cli/agent_exec.py b/aai_cli/agent_exec.py new file mode 100644 index 00000000..8d8b2cdc --- /dev/null +++ b/aai_cli/agent_exec.py @@ -0,0 +1,154 @@ +"""Run logic for `assembly agent`: the options/run split (see AGENTS.md). + +The command module (aai_cli/commands/agent.py) only parses argv — it builds an +``AgentOptions`` and hands it to ``run_agent`` via ``context.run_command``, so tests +can drive validation, --show-code, and session wiring by constructing options +directly, with no CliRunner argv round-trip. +""" + +from __future__ import annotations + +import contextlib +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import typer + +from aai_cli import choices, client, code_gen, output +from aai_cli.agent.audio import SAMPLE_RATE, DuplexAudio, NullPlayer +from aai_cli.agent.render import AgentRenderer +from aai_cli.agent.session import AgentRunConfig, run_session +from aai_cli.agent.voices import VOICE_NAMES +from aai_cli.context import AppState +from aai_cli.errors import CLIError, UsageError +from aai_cli.streaming.session import validate_output_flags +from aai_cli.streaming.sources import FileSource + + +@dataclass(frozen=True) +class AgentOptions: + """Every `assembly agent` conversation flag as plain data. + + ``--list-voices`` is excluded: it dispatches to its own auth-free body in the + command module. ``--json`` is excluded: run_command resolves it into the + ``json_mode`` argument. + """ + + source: str | None + sample: bool + voice: str + system_prompt: str + system_prompt_file: Path | None + greeting: str + device: int | None + output_field: choices.TextOrJson | None + show_code: bool + + +def _resolve_system_prompt(system_prompt: str, system_prompt_file: Path | None) -> str: + """The persona text: a --system-prompt-file (if given) overrides --system-prompt.""" + if system_prompt_file is None: + return system_prompt + try: + return system_prompt_file.read_text(encoding="utf-8") + except OSError as exc: + raise CLIError( + f"Could not read --system-prompt-file {system_prompt_file}: {exc}", + error_type="file_not_found", + exit_code=2, + suggestion="Check the path and that the file is readable.", + ) from exc + + +def _open_audio( + renderer: AgentRenderer, + *, + source: str | None, + sample: bool, + device: int | None, + from_file: bool, +) -> tuple[Any, Any]: + """Build the (mic, player) pair for either file-driven or live-mic input.""" + if from_file: + # Stream the clip as the user's speech and stop after the agent replies. + # No greeting and full-duplex so no part of the clip is muted/dropped, + # and a NullPlayer since there is no listener for the reply audio. + return FileSource(client.resolve_audio_source(source, sample=sample)), NullPlayer() + # One full-duplex stream for mic + speaker: macOS rejects two separate + # streams on a device, which silently kills capture. + duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device) + # notice() self-suppresses in JSON mode and routes to stderr otherwise, so a + # piped `assembly agent | …` never reads this advisory as transcript data. + renderer.notice( + "Use headphones — the mic stays open while the agent speaks, " + "so speakers would let it hear itself.\n" + ) + return duplex.mic, duplex.player + + +def _print_show_code(opts: AgentOptions, system_prompt_text: str) -> None: + """Print the equivalent agent script and exit without authenticating or opening + audio. Raw stdout for `> script.py`.""" + if opts.source or opts.sample: + # A faithful file-driven agent script would need the CLI's whole + # ffmpeg-decode + ready-gate + exit-after-reply machinery, which is + # impractical to inline; the snippet is microphone-driven, so say so + # on stderr instead of silently dropping the source. stderr keeps + # `--show-code > script.py` byte-clean. + output.error_console.print( + "[aai.warn]Note:[/aai.warn] the generated script uses the microphone; " + "it does not stream the audio source you passed." + ) + output.print_code(code_gen.agent(opts.voice, system_prompt_text, opts.greeting)) + + +def run_agent(opts: AgentOptions, state: AppState, *, json_mode: bool) -> None: + """Execute one `assembly agent` conversation from already-parsed flags.""" + validate_output_flags(json_mode=json_mode, output_field=opts.output_field) + text_mode, json_mode = output.stream_output_modes(opts.output_field, json_mode=json_mode) + if opts.voice not in VOICE_NAMES: + raise UsageError( + f"Unknown voice {opts.voice!r}.", + suggestion="Run 'assembly agent --list-voices' to see the options.", + ) + system_prompt_text = _resolve_system_prompt(opts.system_prompt, opts.system_prompt_file) + + if opts.show_code: + _print_show_code(opts, system_prompt_text) + return + + from_file = bool(opts.source) or opts.sample + if from_file and opts.device is not None: + raise UsageError("--device applies only to microphone input.") + if from_file: + # Existence-check the clip before credentials, so a typo'd path reads as + # "file not found" instead of triggering a login. + client.resolve_audio_source(opts.source, sample=opts.sample) + api_key = state.resolve_api_key() + + renderer = AgentRenderer( + json_mode=json_mode, + text_mode=text_mode, + mic_input=not from_file, + ) + audio, player = _open_audio( + renderer, source=opts.source, sample=opts.sample, device=opts.device, from_file=from_file + ) + run_config = AgentRunConfig( + voice=opts.voice, + system_prompt=system_prompt_text, + greeting="" if from_file else opts.greeting, + full_duplex=True, # one duplex stream -> mic always open (use headphones) + exit_after_reply=from_file, + ) + try: + run_session(api_key, renderer=renderer, player=player, mic=audio, config=run_config) + except KeyboardInterrupt: + renderer.stopped() + except BrokenPipeError as exc: + # Downstream consumer (e.g. `| head`) closed the pipe; stop quietly. + raise typer.Exit(code=0) from exc + finally: + with contextlib.suppress(BrokenPipeError): + renderer.close() diff --git a/aai_cli/commands/agent.py b/aai_cli/commands/agent.py index b3e259c2..da9dbe8e 100644 --- a/aai_cli/commands/agent.py +++ b/aai_cli/commands/agent.py @@ -1,77 +1,23 @@ from __future__ import annotations -import contextlib from pathlib import Path -from typing import Any import typer -from aai_cli import choices, client, code_gen, help_panels, options, output -from aai_cli.agent.audio import SAMPLE_RATE, DuplexAudio, NullPlayer -from aai_cli.agent.render import AgentRenderer -from aai_cli.agent.session import ( - DEFAULT_GREETING, - DEFAULT_PROMPT, - AgentRunConfig, - run_session, -) +from aai_cli import agent_exec, choices, help_panels, options, output +from aai_cli.agent.session import DEFAULT_GREETING, DEFAULT_PROMPT from aai_cli.agent.voices import ( DEFAULT_VOICE, - VOICE_NAMES, VOICES, complete_voice, format_voice_list, ) from aai_cli.context import AppState, run_command -from aai_cli.errors import CLIError, UsageError from aai_cli.help_text import examples_epilog -from aai_cli.streaming.session import validate_output_flags -from aai_cli.streaming.sources import FileSource app = typer.Typer() -def _resolve_system_prompt(system_prompt: str, system_prompt_file: Path | None) -> str: - """The persona text: a --system-prompt-file (if given) overrides --system-prompt.""" - if system_prompt_file is None: - return system_prompt - try: - return system_prompt_file.read_text(encoding="utf-8") - except OSError as exc: - raise CLIError( - f"Could not read --system-prompt-file {system_prompt_file}: {exc}", - error_type="file_not_found", - exit_code=2, - suggestion="Check the path and that the file is readable.", - ) from exc - - -def _open_audio( - renderer: AgentRenderer, - *, - source: str | None, - sample: bool, - device: int | None, - from_file: bool, -) -> tuple[Any, Any]: - """Build the (mic, player) pair for either file-driven or live-mic input.""" - if from_file: - # Stream the clip as the user's speech and stop after the agent replies. - # No greeting and full-duplex so no part of the clip is muted/dropped, - # and a NullPlayer since there is no listener for the reply audio. - return FileSource(client.resolve_audio_source(source, sample=sample)), NullPlayer() - # One full-duplex stream for mic + speaker: macOS rejects two separate - # streams on a device, which silently kills capture. - duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device) - # notice() self-suppresses in JSON mode and routes to stderr otherwise, so a - # piped `assembly agent | …` never reads this advisory as transcript data. - renderer.notice( - "Use headphones — the mic stays open while the agent speaks, " - "so speakers would let it hear itself.\n" - ) - return duplex.mic, duplex.player - - def _emit_voice_list(_state: AppState, json_mode: bool) -> None: """--list-voices body, routed through run_command so --json yields a machine-readable array instead of the human list; needs no auth.""" @@ -149,65 +95,19 @@ def agent( run_command(ctx, _emit_voice_list, json=json_out) return - def body(state: AppState, json_mode: bool) -> None: - validate_output_flags(json_mode=json_mode, output_field=output_field) - text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode) - if voice not in VOICE_NAMES: - raise UsageError( - f"Unknown voice {voice!r}.", - suggestion="Run 'assembly agent --list-voices' to see the options.", - ) - system_prompt_text = _resolve_system_prompt(system_prompt, system_prompt_file) - - if show_code: - # Print-only: emit the equivalent agent script from the flags and exit - # without authenticating or opening audio. Raw stdout for `> script.py`. - if source or sample: - # A faithful file-driven agent script would need the CLI's whole - # ffmpeg-decode + ready-gate + exit-after-reply machinery, which is - # impractical to inline; the snippet is microphone-driven, so say so - # on stderr instead of silently dropping the source. stderr keeps - # `--show-code > script.py` byte-clean. - output.error_console.print( - "[aai.warn]Note:[/aai.warn] the generated script uses the microphone; " - "it does not stream the audio source you passed." - ) - output.print_code(code_gen.agent(voice, system_prompt_text, greeting)) - return - - from_file = bool(source) or sample - if from_file and device is not None: - raise UsageError("--device applies only to microphone input.") - if from_file: - # Existence-check the clip before credentials, so a typo'd path reads as - # "file not found" instead of triggering a login. - client.resolve_audio_source(source, sample=sample) - api_key = state.resolve_api_key() - - renderer = AgentRenderer( - json_mode=json_mode, - text_mode=text_mode, - mic_input=not from_file, - ) - audio, player = _open_audio( - renderer, source=source, sample=sample, device=device, from_file=from_file - ) - run_config = AgentRunConfig( - voice=voice, - system_prompt=system_prompt_text, - greeting="" if from_file else greeting, - full_duplex=True, # one duplex stream -> mic always open (use headphones) - exit_after_reply=from_file, - ) - try: - run_session(api_key, renderer=renderer, player=player, mic=audio, config=run_config) - except KeyboardInterrupt: - renderer.stopped() - except BrokenPipeError as exc: - # Downstream consumer (e.g. `| head`) closed the pipe; stop quietly. - raise typer.Exit(code=0) from exc - finally: - with contextlib.suppress(BrokenPipeError): - renderer.close() - - run_command(ctx, body, json=json_out) + opts = agent_exec.AgentOptions( + source=source, + sample=sample, + voice=voice, + system_prompt=system_prompt, + system_prompt_file=system_prompt_file, + greeting=greeting, + device=device, + output_field=output_field, + show_code=show_code, + ) + run_command( + ctx, + lambda state, json_mode: agent_exec.run_agent(opts, state, json_mode=json_mode), + json=json_out, + ) diff --git a/aai_cli/commands/llm.py b/aai_cli/commands/llm.py index 2850a264..14d088fd 100644 --- a/aai_cli/commands/llm.py +++ b/aai_cli/commands/llm.py @@ -3,46 +3,15 @@ from collections.abc import Callable import typer -from rich.markup import escape -from aai_cli import choices, client, help_panels, options, output, stdio +from aai_cli import choices, help_panels, llm_exec, options, output from aai_cli import llm as gateway from aai_cli.context import AppState, run_command from aai_cli.errors import UsageError -from aai_cli.follow import FollowRenderer from aai_cli.help_text import examples_epilog app = typer.Typer() -_FOLLOW_STDIN_MESSAGE = ( - "--follow needs transcript text piped on stdin, e.g. " - '`assembly stream -o text | assembly llm -f "summarize action items as I talk"`.' -) - - -def _validate_follow_args( - prompt: str | None, output_field: str | None, transcript_id: str | None -) -> str: - """Reject flag combinations that don't apply to --follow's live-panel mode. - - Returns the validated (non-empty) prompt so the caller has a plain ``str``. - """ - if not prompt: - raise UsageError("Provide a prompt to run over the streamed transcript.") - if output_field is not None: - raise UsageError( - "--output applies to one-shot mode; --follow renders a live panel " - "(or NDJSON when piped)." - ) - if transcript_id: - raise UsageError( - "--follow runs over live transcript text piped on stdin; it can't be " - "combined with --transcript-id." - ) - if not stdio.stdin_is_piped(): - raise UsageError(_FOLLOW_STDIN_MESSAGE) - return prompt - def _emit_model_list(_state: AppState, json_mode: bool) -> None: """--list-models body, routed through run_command so --json yields a @@ -67,27 +36,6 @@ def body(state: AppState, json_mode: bool) -> None: return body -def _stdin_transcript_text( - state: AppState, json_mode: bool, transcript_id: str | None -) -> str | None: - """Resolve the inline transcript text for one-shot mode. - - Text piped on stdin becomes the content the prompt operates on, unless an - explicit --transcript-id is given — that injects server-side and takes - priority, so piped text is ignored with a visible warning (suppressed by - --quiet, structured under --json). - """ - if transcript_id is None: - return stdio.piped_stdin_text() - # Same cheap local id check as `transcripts get`, before auth or network. - client.validate_transcript_id(transcript_id) - if stdio.stdin_is_piped() and not state.quiet: - output.emit_warning( - "Ignoring piped stdin; --transcript-id takes priority.", json_mode=json_mode - ) - return None - - @app.command( rich_help_panel=help_panels.TRANSCRIPTION, epilog=examples_epilog( @@ -149,62 +97,17 @@ def llm( run_command(ctx, _list_models_body(output_field), json=json_out) return - def follow_body(state: AppState, json_mode: bool) -> None: - prompt_text = _validate_follow_args(prompt, output_field, transcript_id) - api_key = state.resolve_api_key() - - def ask(transcript_text: str) -> str: - messages = gateway.build_messages( - prompt_text, system=system, transcript_text=transcript_text - ) - response = gateway.complete( - api_key, model=model, messages=messages, max_tokens=max_tokens - ) - return gateway.content_of(response) - - transcript: list[str] = [] - interrupted = False - with FollowRenderer(json_mode=json_mode) as render: - # Ctrl-C is the normal "stop watching" signal -> exit cleanly (code 0). - try: - for turn in stdio.iter_piped_stdin_lines(): - transcript.append(turn) - render(ask("\n".join(transcript)), len(transcript)) - except KeyboardInterrupt: - interrupted = True - if not transcript and not interrupted: - # An empty pipe (`assembly llm -f "…" None: - if not prompt: - raise UsageError( - "Provide a prompt.", - suggestion="Or pass --list-models to see available models.", - ) - prompt_text = prompt - stdin_text = _stdin_transcript_text(state, json_mode, transcript_id) - api_key = state.resolve_api_key() - messages = gateway.build_messages( - prompt_text, system=system, transcript_id=transcript_id, transcript_text=stdin_text - ) - response = gateway.complete( - api_key, - model=model, - messages=messages, - max_tokens=max_tokens, - transcript_id=transcript_id, - ) - content = gateway.content_of(response) - if output_field == "text": - # Just the answer, raw — so `… | assembly llm -o text "…" | next` composes cleanly. - output.emit_text(content) - return - output.emit( - {"model": model, "output": content, "usage": gateway.usage_of(response)}, - lambda d: escape(str(d["output"])), - json_mode=json_mode or output_field == "json", - ) - - run_command(ctx, follow_body if follow else body, json=json_out) + opts = llm_exec.LlmOptions( + prompt=prompt, + model=model, + transcript_id=transcript_id, + system=system, + follow=follow, + output_field=output_field, + max_tokens=max_tokens, + ) + run_command( + ctx, + lambda state, json_mode: llm_exec.run_llm(opts, state, json_mode=json_mode), + json=json_out, + ) diff --git a/aai_cli/commands/speak.py b/aai_cli/commands/speak.py index 89b13770..948def0b 100644 --- a/aai_cli/commands/speak.py +++ b/aai_cli/commands/speak.py @@ -1,169 +1,16 @@ from __future__ import annotations -import sys from pathlib import Path import typer -from aai_cli import help_panels, options, output -from aai_cli.context import AppState, run_command -from aai_cli.errors import CLIError, UsageError +from aai_cli import help_panels, options, speak_exec +from aai_cli.context import run_command from aai_cli.help_text import examples_epilog -from aai_cli.tts import audio, dialogue, session +from aai_cli.speak_exec import DEFAULT_LANGUAGE app = typer.Typer() -# The streaming-TTS reference client defaults to the PocketTTS "jane" voice and -# English, so the CLI sends the same and a bare `assembly speak` works out of the box. -# Override either with --voice/--language. -DEFAULT_VOICE = "jane" -DEFAULT_LANGUAGE = "English" - - -def _read_text(text: str | None) -> str: - """The text to speak: the non-blank argument, or piped stdin when the argument - is omitted entirely. A *blank* argument (e.g. "") is a usage error, never a - silent fall-through to stdin — so `assembly speak "$MSG"` with an empty MSG fails - fast instead of consuming whatever happens to be on the pipe.""" - if text is not None and text.strip(): - return text - # `text is None` (argument omitted), not merely blank: see the docstring rationale. - if text is None and not sys.stdin.isatty(): - piped = sys.stdin.read().strip() - if piped: - return piped - raise UsageError( - "No text to speak.", - suggestion='Pass text as an argument: assembly speak "Hello" — or pipe it via stdin.', - ) - - -def _output_audio(result: session.SpeakResult, out: Path | None) -> None: - """Write a WAV when --out is given, else play through the speakers.""" - if out is not None: - audio.write_wav(out, result.pcm, result.sample_rate) - else: - audio.play_pcm(result.pcm, result.sample_rate) - - -def _disposition(out: Path | None) -> str: - return f"saved to {out}" if out is not None else "played" - - -def _emit_single( - result: session.SpeakResult, - cfg: session.SpeakConfig, - out: Path | None, - *, - json_mode: bool, -) -> None: - """Single-voice result: a JSON object on stdout, or a human note on stderr.""" - duration = round(result.audio_duration_seconds, 3) - if json_mode: - output.emit_ndjson( - { - "voice": cfg.voice, - "language": cfg.language, - "sample_rate": result.sample_rate, - "audio_duration_seconds": duration, - "bytes": len(result.pcm), - "out": str(out) if out is not None else None, - } - ) - return - output.error_console.print( - f"[aai.muted]Spoke {duration}s of audio ({_disposition(out)}).[/aai.muted]" - ) - - -def _emit_multi( - result: session.SpeakResult, - speakers: dict[str, str], - segment_count: int, - out: Path | None, - *, - json_mode: bool, -) -> None: - """Multi-voice result: a JSON object on stdout, or a human note on stderr.""" - duration = round(result.audio_duration_seconds, 3) - if json_mode: - output.emit_ndjson( - { - "mode": "multi", - "speakers": speakers, - "segments": segment_count, - "sample_rate": result.sample_rate, - "audio_duration_seconds": duration, - "bytes": len(result.pcm), - "out": str(out) if out is not None else None, - } - ) - return - voices = ", ".join(f"{spk}={voice}" for spk, voice in speakers.items()) - output.error_console.print( - f"[aai.muted]Spoke {duration}s across {len(speakers)} voices " - f"({voices}) ({_disposition(out)}).[/aai.muted]" - ) - - -def _speak_single( - api_key: str, - text: str, - voice: str, - language: str, - sample_rate: int | None, - out: Path | None, - *, - json_mode: bool, - quiet: bool, -) -> None: - cfg = session.SpeakConfig(text=text, voice=voice, language=language, sample_rate=sample_rate) - with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet): - result = session.synthesize( - api_key, cfg, on_warning=lambda m: output.emit_warning(m, json_mode=json_mode) - ) - _output_audio(result, out) - _emit_single(result, cfg, out, json_mode=json_mode) - - -def _speak_dialogue( - api_key: str, - text: str, - bare_voice: str | None, - overrides: dict[str, str], - language: str, - sample_rate: int | None, - out: Path | None, - *, - json_mode: bool, - quiet: bool, -) -> None: - segments = dialogue.parse_segments(text) - if not segments: - raise UsageError( - "No text to speak.", - suggestion="The input had speaker labels but no spoken text.", - ) - if bare_voice is not None: - output.emit_warning( - "Ignoring bare --voice in multi-speaker mode; " - "set a voice per speaker with --voice A=NAME.", - json_mode=json_mode, - ) - resolved, speakers = dialogue.assign_voices( - segments, dialogue.DEFAULT_VOICE_ROTATION, overrides - ) - with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet): - result = session.synthesize_dialogue( - api_key, - resolved, - language=language, - sample_rate=sample_rate, - on_warning=lambda m: output.emit_warning(m, json_mode=json_mode), - ) - _output_audio(result, out) - _emit_multi(result, speakers, len(resolved), out, json_mode=json_mode) - @app.command( rich_help_panel=help_panels.TRANSCRIPTION, @@ -221,47 +68,15 @@ def speak( goes before the subcommand). """ - def body(state: AppState, json_mode: bool) -> None: - if not session.is_available(): - raise CLIError( - "assembly speak is only available in the sandbox.", - error_type="unsupported_environment", - exit_code=2, - suggestion="Re-run as: assembly --sandbox speak … " - "(--sandbox goes before the command; or use --env sandbox000).", - ) - spoken = _read_text(text) - api_key = state.resolve_api_key() - bare_voice, overrides = dialogue.parse_voice_overrides(voice) - if dialogue.looks_like_speaker_labeled(spoken): - _speak_dialogue( - api_key, - spoken, - bare_voice, - overrides, - language, - sample_rate, - out, - json_mode=json_mode, - quiet=state.quiet, - ) - else: - if overrides: - # Mirror the inverse warning in _speak_dialogue: never drop a - # requested voice mapping silently. - output.emit_warning( - "Ignoring --voice SPEAKER=VOICE mappings; input has no speaker labels.", - json_mode=json_mode, - ) - _speak_single( - api_key, - spoken, - bare_voice or DEFAULT_VOICE, - language, - sample_rate, - out, - json_mode=json_mode, - quiet=state.quiet, - ) - - run_command(ctx, body, json=json_out) + opts = speak_exec.SpeakOptions( + text=text, + voice=voice, + language=language, + sample_rate=sample_rate, + out=out, + ) + run_command( + ctx, + lambda state, json_mode: speak_exec.run_speak(opts, state, json_mode=json_mode), + json=json_out, + ) diff --git a/aai_cli/commands/stream.py b/aai_cli/commands/stream.py index 822018d9..4eb9bc40 100644 --- a/aai_cli/commands/stream.py +++ b/aai_cli/commands/stream.py @@ -1,81 +1,19 @@ from __future__ import annotations -import tempfile from pathlib import Path import typer from assemblyai.streaming.v3 import Encoding, NoiseSuppressionModel, SpeechModel -from aai_cli import ( - choices, - client, - code_gen, - config_builder, - help_panels, - llm, - options, - output, - youtube, -) -from aai_cli.context import AppState, run_command -from aai_cli.errors import UsageError -from aai_cli.follow import FollowRenderer +from aai_cli import choices, help_panels, llm, options, stream_exec +from aai_cli.context import run_command from aai_cli.help_text import examples_epilog -from aai_cli.microphone import MicrophoneSource -from aai_cli.streaming.macos import MacSystemAudioSource -from aai_cli.streaming.render import StreamRenderer -from aai_cli.streaming.session import ( - SourceOptions, - StreamSession, - validate_output_flags, - validate_sources, -) -from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource app = typer.Typer() DEFAULT_SPEECH_MODEL = SpeechModel.u3_rt_pro -def _dispatch(session: StreamSession, opts: SourceOptions) -> None: - """Open the right audio source(s) for the flags and stream them.""" - if opts.from_system_audio: - system = MacSystemAudioSource(on_open=session.on_open) - if opts.system_audio_only: - session.run(system, system.sample_rate, source_label="system") - else: - mic = MicrophoneSource( - target_rate=TARGET_RATE, - device=opts.device, - capture_rate=opts.sample_rate, - on_open=session.on_open, - ) - session.run_parallel( - [("system", system, system.sample_rate), ("you", mic, mic.sample_rate)] - ) - elif opts.from_stdin: - # Raw PCM16 mono piped on stdin (e.g. `ffmpeg … -f s16le - | assembly stream -`). - stdin_src = StdinSource(sample_rate=opts.sample_rate or TARGET_RATE) - session.run(stdin_src, stdin_src.sample_rate) - elif opts.source and youtube.is_downloadable_url(opts.source): - # Fetch the audio first, then stream the local file in real time. - with tempfile.TemporaryDirectory(prefix="aai-yt-") as td: - local = youtube.download_audio(opts.source, Path(td)) - session.run(FileSource(str(local)), TARGET_RATE) - elif opts.from_file: - file_audio = FileSource(client.resolve_audio_source(opts.source, sample=opts.sample)) - session.run(file_audio, file_audio.sample_rate) - else: - # Capture at the device's native rate (or --sample-rate override) and tell the - # streaming API that rate, rather than forcing one the device may reject. - # "Listening…" is announced once the device is open (see StreamSession.on_open), - # not when the session opens — so early speech isn't lost in the gap. - mic = MicrophoneSource( - device=opts.device, capture_rate=opts.sample_rate, on_open=session.on_open - ) - session.run(mic, mic.sample_rate) - - @app.command( rich_help_panel=help_panels.TRANSCRIPTION, epilog=examples_epilog( @@ -348,102 +286,47 @@ def stream( in-process, refreshing the answer on every finalized turn; for a separate step instead, pipe the text out with -o text | assembly llm -f "…". """ - - def body(state: AppState, json_mode: bool) -> None: - validate_output_flags(json_mode=json_mode, output_field=output_field) - text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode) - opts = SourceOptions( - source=source, - sample=sample, - sample_rate=sample_rate, - device=device, - system_audio=system_audio, - system_audio_only=system_audio_only, - ) - # Every streaming flag except sample_rate, which is set per source at stream time. - base_flags: dict[str, object] = { - "speech_model": config_builder.enum_value(speech_model), - "format_turns": format_turns if format_turns is not None else True, - "encoding": config_builder.enum_value(encoding), - "language_detection": language_detection, - "domain": domain, - "end_of_turn_confidence_threshold": end_of_turn_confidence_threshold, - "min_turn_silence": min_turn_silence, - "max_turn_silence": max_turn_silence, - "vad_threshold": vad_threshold, - "include_partial_turns": include_partial_turns, - "keyterms_prompt": list(keyterms_prompt) if keyterms_prompt else None, - "filter_profanity": filter_profanity, - "speaker_labels": speaker_labels, - "max_speakers": max_speakers, - "voice_focus": config_builder.enum_value(voice_focus), - "voice_focus_threshold": voice_focus_threshold, - "redact_pii": redact_pii, - "redact_pii_policies": config_builder.split_csv(redact_pii_policy), - "redact_pii_sub": redact_pii_sub, - "inactivity_timeout": inactivity_timeout, - "webhook_url": webhook_url, - "prompt": prompt, - } - base_flags.update(config_builder.auth_header_flags(webhook_auth_header)) - - if show_code: - # Print-only: emit a script faithful to the requested source — mic - # (default), stdin (-), or a file/URL — and exit without opening audio or - # authenticating. Raw stdout so `--show-code > script.py` is runnable. - # The same source validation as a real run, so e.g. a file + --sample-rate - # conflict errors here too instead of silently generating mic code. - validate_sources(opts, has_llm=bool(llm_prompt), text_mode=text_mode) - if opts.from_system_audio: - raise UsageError("--show-code does not support macOS system audio capture yet.") - if opts.source and youtube.is_downloadable_url(opts.source): - raise UsageError( - "--show-code does not support downloaded sources (YouTube, podcast pages) yet.", - suggestion="Download the audio first (e.g. yt-dlp) and pass the local file.", - ) - code_source: str | None = None - if opts.from_stdin: - code_source = "-" - elif opts.from_file: - # check_local=False: generating code for a file you don't have yet is fine. - code_source = client.resolve_audio_source( - opts.source, sample=opts.sample, check_local=False - ) - merged = config_builder.merge_streaming_params( - # sample_rate precedence: --sample-rate (None is dropped by the merge) - # beats --config/--config-file, which beat the 16 kHz default below — - # so an explicit `--config sample_rate=…` is honored, not overridden. - flags=base_flags | {"sample_rate": opts.sample_rate}, - overrides=config_kv, - config_file=config_file, - ) - merged.setdefault("sample_rate", TARGET_RATE) - gateway = code_gen.gateway_options( - list(llm_prompt or []), model, max_tokens, interval=llm_interval - ) - output.print_code(code_gen.stream(merged, llm=gateway, source=code_source)) - return - - # Validate the requested sources (including that a local file exists) before - # credentials, so a typo'd path reads as "file not found" — not as a login. - validate_sources(opts, has_llm=bool(llm_prompt), text_mode=text_mode) - if opts.from_file and not opts.from_stdin: - client.resolve_audio_source(opts.source, sample=opts.sample) - api_key = state.resolve_api_key() - - llm_prompts = list(llm_prompt or []) - session = StreamSession( - api_key=api_key, - base_flags=base_flags, - overrides=config_kv, - config_file=config_file, - renderer=StreamRenderer(json_mode=json_mode, text_mode=text_mode), - follow=FollowRenderer(json_mode=json_mode) if llm_prompts else None, - llm_prompts=llm_prompts, - model=model, - max_tokens=max_tokens, - llm_interval=llm_interval, - ) - _dispatch(session, opts) - - run_command(ctx, body, json=json_out) + opts = stream_exec.StreamOptions( + source=source, + sample=sample, + sample_rate=sample_rate, + device=device, + system_audio=system_audio, + system_audio_only=system_audio_only, + speech_model=speech_model, + encoding=encoding, + language_detection=language_detection, + domain=domain, + prompt=prompt, + keyterms_prompt=keyterms_prompt, + end_of_turn_confidence_threshold=end_of_turn_confidence_threshold, + min_turn_silence=min_turn_silence, + max_turn_silence=max_turn_silence, + vad_threshold=vad_threshold, + format_turns=format_turns, + include_partial_turns=include_partial_turns, + speaker_labels=speaker_labels, + max_speakers=max_speakers, + voice_focus=voice_focus, + voice_focus_threshold=voice_focus_threshold, + inactivity_timeout=inactivity_timeout, + filter_profanity=filter_profanity, + redact_pii=redact_pii, + redact_pii_policy=redact_pii_policy, + redact_pii_sub=redact_pii_sub, + webhook_url=webhook_url, + webhook_auth_header=webhook_auth_header, + llm_prompt=llm_prompt, + llm_interval=llm_interval, + model=model, + max_tokens=max_tokens, + config_kv=config_kv, + config_file=config_file, + output_field=output_field, + show_code=show_code, + ) + run_command( + ctx, + lambda state, json_mode: stream_exec.run_stream(opts, state, json_mode=json_mode), + json=json_out, + ) diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py index 6989205f..21a0320b 100644 --- a/aai_cli/commands/transcribe.py +++ b/aai_cli/commands/transcribe.py @@ -5,23 +5,8 @@ import assemblyai as aai import typer -from aai_cli import ( - choices, - client, - code_gen, - config_builder, - help_panels, - llm, - options, - output, - transcribe_batch, - transcribe_exec, -) - -# The package attribute `code_gen.transcribe` is the wrapper function, so the module's -# render() (which also takes the -o output field) is imported from the submodule itself. -from aai_cli.code_gen.transcribe import render as render_transcribe_code -from aai_cli.context import AppState, run_command +from aai_cli import choices, help_panels, llm, options, transcribe_exec +from aai_cli.context import run_command from aai_cli.help_text import examples_epilog app = typer.Typer() @@ -361,130 +346,59 @@ def transcribe( Curated flags cover common features; --config KEY=VALUE and --config-file reach every other field. Analysis (summary, chapters, ...) renders in human mode. """ - - def body(state: AppState, json_mode: bool) -> None: - transcribe_exec.validate_language_flags( - language_code, language_detection=language_detection - ) - pii_policies = config_builder.split_csv(redact_pii_policy) - transcribe_exec.validate_pii_policies(pii_policies) - flags: dict[str, object] = { - "speech_model": config_builder.enum_value(speech_model), - "language_code": language_code, - "language_detection": language_detection, - "keyterms_prompt": list(keyterms_prompt) if keyterms_prompt else None, - "temperature": temperature, - "prompt": prompt, - "punctuate": punctuate, - "format_text": format_text, - "disfluencies": disfluencies, - "speaker_labels": speaker_labels or None, - "speakers_expected": speakers_expected, - "multichannel": multichannel, - "redact_pii": redact_pii, - "redact_pii_policies": pii_policies, - "redact_pii_sub": config_builder.enum_value(redact_pii_sub), - "redact_pii_audio": redact_pii_audio, - "filter_profanity": filter_profanity, - "content_safety": content_safety, - "content_safety_confidence": content_safety_confidence, - "speech_threshold": speech_threshold, - "summarization": summarization, - "summary_model": config_builder.enum_value(summary_model), - "summary_type": config_builder.enum_value(summary_type), - "auto_chapters": auto_chapters, - "sentiment_analysis": sentiment_analysis, - "entity_detection": entity_detection, - "auto_highlights": auto_highlights, - "iab_categories": topic_detection, - "word_boost": list(word_boost) if word_boost else None, - "custom_spelling": ( - config_builder.load_custom_spelling(custom_spelling_file) - if custom_spelling_file - else None - ), - "audio_start_from": audio_start, - "audio_end_at": audio_end, - "webhook_url": webhook_url, - "speech_understanding": ( - config_builder.translation_request(list(translate_to)) if translate_to else None - ), - } - flags.update(config_builder.auth_header_flags(webhook_auth_header)) - - transcribe_exec.validate_out_with_llm(out, llm_prompt) - transcribe_exec.validate_out_path(out) - transcribe_exec.validate_json_with_output(output_field, json_mode=json_mode) - - merged = config_builder.merge_transcribe_config( - flags=flags, overrides=config_kv, config_file=config_file - ) - - transcribe_exec.validate_speakers_expected(merged) - - sources = transcribe_batch.expand_sources(source, from_stdin=from_stdin, sample=sample) - if sources is not None: - transcribe_batch.reject_single_source_flags( - out=out, output_field=output_field, llm_prompt=llm_prompt, show_code=show_code - ) - transcribe_batch.run_batch( - state.resolve_api_key(), - sources, - transcription_config=config_builder.construct_transcription_config(merged), - concurrency=concurrency, - force=force, - json_mode=json_mode, - quiet=state.quiet, - ) - return - - if show_code: - # Print-only: build the equivalent script and exit without transcribing or - # authenticating (raw stdout, so `--show-code > script.py` runs). No - # source/--sample needed — fall back to a placeholder path for a pure snippet. - audio = ( - client.resolve_audio_source(source, sample=sample, check_local=False) - if source or sample - else "your-audio-file.mp3" - ) - gateway = code_gen.gateway_options(list(llm_prompt or []), model, max_tokens) - output.print_code( - render_transcribe_code( - merged, - audio, - llm_gateway=gateway, - output=output_field, - download_sections=list(download_sections or []), - ) - ) - return - - tc = config_builder.construct_transcription_config(merged) - - # A typo'd path must read as "file not found", not trigger a login. - transcribe_exec.check_source_exists(source, sample=sample) - transcribe_exec.warn_unrecognized_extension(source, json_mode=json_mode, quiet=state.quiet) - - api_key = state.resolve_api_key() - with output.status("Transcribing…", json_mode=json_mode, quiet=state.quiet): - transcript = transcribe_exec.run_transcription( - api_key, - source, - sample=sample, - transcription_config=tc, - download_sections=list(download_sections or []), - ) - - transcribe_exec.deliver_result( - transcript, - api_key=api_key, - out=out, - output_field=output_field, - transform=transcribe_exec.TransformOptions( - prompts=list(llm_prompt or []), model=model, max_tokens=max_tokens - ), - json_mode=json_mode, - quiet=state.quiet, - ) - - run_command(ctx, body, json=json_out) + opts = transcribe_exec.TranscribeOptions( + source=source, + sample=sample, + from_stdin=from_stdin, + concurrency=concurrency, + force=force, + speech_model=speech_model, + language_code=language_code, + language_detection=language_detection, + keyterms_prompt=keyterms_prompt, + temperature=temperature, + prompt=prompt, + punctuate=punctuate, + format_text=format_text, + disfluencies=disfluencies, + speaker_labels=speaker_labels, + speakers_expected=speakers_expected, + multichannel=multichannel, + redact_pii=redact_pii, + redact_pii_policy=redact_pii_policy, + redact_pii_sub=redact_pii_sub, + redact_pii_audio=redact_pii_audio, + filter_profanity=filter_profanity, + content_safety=content_safety, + content_safety_confidence=content_safety_confidence, + speech_threshold=speech_threshold, + summarization=summarization, + summary_model=summary_model, + summary_type=summary_type, + auto_chapters=auto_chapters, + sentiment_analysis=sentiment_analysis, + entity_detection=entity_detection, + auto_highlights=auto_highlights, + topic_detection=topic_detection, + word_boost=word_boost, + custom_spelling_file=custom_spelling_file, + audio_start=audio_start, + audio_end=audio_end, + download_sections=download_sections, + webhook_url=webhook_url, + webhook_auth_header=webhook_auth_header, + translate_to=translate_to, + config_kv=config_kv, + config_file=config_file, + llm_prompt=llm_prompt, + model=model, + max_tokens=max_tokens, + output_field=output_field, + out=out, + show_code=show_code, + ) + run_command( + ctx, + lambda state, json_mode: transcribe_exec.run_transcribe(opts, state, json_mode=json_mode), + json=json_out, + ) diff --git a/aai_cli/llm_exec.py b/aai_cli/llm_exec.py new file mode 100644 index 00000000..d8c500e6 --- /dev/null +++ b/aai_cli/llm_exec.py @@ -0,0 +1,159 @@ +"""Run logic for `assembly llm`: the options/run split (see AGENTS.md). + +The command module (aai_cli/commands/llm.py) only parses argv — it builds an +``LlmOptions`` and hands it to ``run_llm`` via ``context.run_command``, so tests can +drive one-shot and --follow behavior by constructing options directly, with no +CliRunner argv round-trip. (``aai_cli/llm.py`` is the gateway client itself and is +rich-free by architecture contract, so the rendering-aware run path lives here.) +""" + +from __future__ import annotations + +from dataclasses import dataclass + +from rich.markup import escape + +from aai_cli import choices, client, output, stdio +from aai_cli import llm as gateway +from aai_cli.context import AppState +from aai_cli.errors import UsageError +from aai_cli.follow import FollowRenderer + +_FOLLOW_STDIN_MESSAGE = ( + "--follow needs transcript text piped on stdin, e.g. " + '`assembly stream -o text | assembly llm -f "summarize action items as I talk"`.' +) + + +@dataclass(frozen=True) +class LlmOptions: + """Every `assembly llm` prompt flag as plain data. + + ``--list-models`` is excluded: it dispatches to its own auth-free body in the + command module. ``--json`` is excluded: run_command resolves it into the + ``json_mode`` argument. + """ + + prompt: str | None + model: str + transcript_id: str | None + system: str | None + follow: bool + output_field: choices.TextOrJson | None + max_tokens: int + + +def _validate_follow_args( + prompt: str | None, output_field: str | None, transcript_id: str | None +) -> str: + """Reject flag combinations that don't apply to --follow's live-panel mode. + + Returns the validated (non-empty) prompt so the caller has a plain ``str``. + """ + if not prompt: + raise UsageError("Provide a prompt to run over the streamed transcript.") + if output_field is not None: + raise UsageError( + "--output applies to one-shot mode; --follow renders a live panel " + "(or NDJSON when piped)." + ) + if transcript_id: + raise UsageError( + "--follow runs over live transcript text piped on stdin; it can't be " + "combined with --transcript-id." + ) + if not stdio.stdin_is_piped(): + raise UsageError(_FOLLOW_STDIN_MESSAGE) + return prompt + + +def _stdin_transcript_text( + state: AppState, transcript_id: str | None, *, json_mode: bool +) -> str | None: + """Resolve the inline transcript text for one-shot mode. + + Text piped on stdin becomes the content the prompt operates on, unless an + explicit --transcript-id is given — that injects server-side and takes + priority, so piped text is ignored with a visible warning (suppressed by + --quiet, structured under --json). + """ + if transcript_id is None: + return stdio.piped_stdin_text() + # Same cheap local id check as `transcripts get`, before auth or network. + client.validate_transcript_id(transcript_id) + if stdio.stdin_is_piped() and not state.quiet: + output.emit_warning( + "Ignoring piped stdin; --transcript-id takes priority.", json_mode=json_mode + ) + return None + + +def _run_follow(opts: LlmOptions, state: AppState, *, json_mode: bool) -> None: + prompt_text = _validate_follow_args(opts.prompt, opts.output_field, opts.transcript_id) + api_key = state.resolve_api_key() + + def ask(transcript_text: str) -> str: + messages = gateway.build_messages( + prompt_text, system=opts.system, transcript_text=transcript_text + ) + response = gateway.complete( + api_key, model=opts.model, messages=messages, max_tokens=opts.max_tokens + ) + return gateway.content_of(response) + + transcript: list[str] = [] + interrupted = False + with FollowRenderer(json_mode=json_mode) as render: + # Ctrl-C is the normal "stop watching" signal -> exit cleanly (code 0). + try: + for turn in stdio.iter_piped_stdin_lines(): + transcript.append(turn) + render(ask("\n".join(transcript)), len(transcript)) + except KeyboardInterrupt: + interrupted = True + if not transcript and not interrupted: + # An empty pipe (`assembly llm -f "…" None: + if not opts.prompt: + raise UsageError( + "Provide a prompt.", + suggestion="Or pass --list-models to see available models.", + ) + prompt_text = opts.prompt + stdin_text = _stdin_transcript_text(state, opts.transcript_id, json_mode=json_mode) + api_key = state.resolve_api_key() + messages = gateway.build_messages( + prompt_text, + system=opts.system, + transcript_id=opts.transcript_id, + transcript_text=stdin_text, + ) + response = gateway.complete( + api_key, + model=opts.model, + messages=messages, + max_tokens=opts.max_tokens, + transcript_id=opts.transcript_id, + ) + content = gateway.content_of(response) + if opts.output_field == "text": + # Just the answer, raw — so `… | assembly llm -o text "…" | next` composes cleanly. + output.emit_text(content) + return + output.emit( + {"model": opts.model, "output": content, "usage": gateway.usage_of(response)}, + lambda d: escape(str(d["output"])), + json_mode=json_mode or opts.output_field == "json", + ) + + +def run_llm(opts: LlmOptions, state: AppState, *, json_mode: bool) -> None: + """Execute one `assembly llm` invocation (one-shot or --follow) from parsed flags.""" + if opts.follow: + _run_follow(opts, state, json_mode=json_mode) + else: + _run_oneshot(opts, state, json_mode=json_mode) diff --git a/aai_cli/speak_exec.py b/aai_cli/speak_exec.py new file mode 100644 index 00000000..96f9ca3d --- /dev/null +++ b/aai_cli/speak_exec.py @@ -0,0 +1,220 @@ +"""Run logic for `assembly speak`: the options/run split (see AGENTS.md). + +The command module (aai_cli/commands/speak.py) only parses argv — it builds a +``SpeakOptions`` and hands it to ``run_speak`` via ``context.run_command``, so tests +can drive text resolution, voice assignment, and synthesis wiring by constructing +options directly, with no CliRunner argv round-trip. +""" + +from __future__ import annotations + +import sys +from dataclasses import dataclass +from pathlib import Path + +from aai_cli import output +from aai_cli.context import AppState +from aai_cli.errors import CLIError, UsageError +from aai_cli.tts import audio, dialogue, session + +# The streaming-TTS reference client defaults to the PocketTTS "jane" voice and +# English, so the CLI sends the same and a bare `assembly speak` works out of the box. +# Override either with --voice/--language. +DEFAULT_VOICE = "jane" +DEFAULT_LANGUAGE = "English" + + +@dataclass(frozen=True) +class SpeakOptions: + """Every `assembly speak` flag as plain data (``--json`` excluded: run_command + resolves it into the ``json_mode`` argument).""" + + text: str | None + voice: list[str] + language: str + sample_rate: int | None + out: Path | None + + +def _read_text(text: str | None) -> str: + """The text to speak: the non-blank argument, or piped stdin when the argument + is omitted entirely. A *blank* argument (e.g. "") is a usage error, never a + silent fall-through to stdin — so `assembly speak "$MSG"` with an empty MSG fails + fast instead of consuming whatever happens to be on the pipe.""" + if text is not None and text.strip(): + return text + # `text is None` (argument omitted), not merely blank: see the docstring rationale. + if text is None and not sys.stdin.isatty(): + piped = sys.stdin.read().strip() + if piped: + return piped + raise UsageError( + "No text to speak.", + suggestion='Pass text as an argument: assembly speak "Hello" — or pipe it via stdin.', + ) + + +def _output_audio(result: session.SpeakResult, out: Path | None) -> None: + """Write a WAV when --out is given, else play through the speakers.""" + if out is not None: + audio.write_wav(out, result.pcm, result.sample_rate) + else: + audio.play_pcm(result.pcm, result.sample_rate) + + +def _disposition(out: Path | None) -> str: + return f"saved to {out}" if out is not None else "played" + + +def _emit_single( + result: session.SpeakResult, + cfg: session.SpeakConfig, + out: Path | None, + *, + json_mode: bool, +) -> None: + """Single-voice result: a JSON object on stdout, or a human note on stderr.""" + duration = round(result.audio_duration_seconds, 3) + if json_mode: + output.emit_ndjson( + { + "voice": cfg.voice, + "language": cfg.language, + "sample_rate": result.sample_rate, + "audio_duration_seconds": duration, + "bytes": len(result.pcm), + "out": str(out) if out is not None else None, + } + ) + return + output.error_console.print( + f"[aai.muted]Spoke {duration}s of audio ({_disposition(out)}).[/aai.muted]" + ) + + +def _emit_multi( + result: session.SpeakResult, + speakers: dict[str, str], + segment_count: int, + out: Path | None, + *, + json_mode: bool, +) -> None: + """Multi-voice result: a JSON object on stdout, or a human note on stderr.""" + duration = round(result.audio_duration_seconds, 3) + if json_mode: + output.emit_ndjson( + { + "mode": "multi", + "speakers": speakers, + "segments": segment_count, + "sample_rate": result.sample_rate, + "audio_duration_seconds": duration, + "bytes": len(result.pcm), + "out": str(out) if out is not None else None, + } + ) + return + voices = ", ".join(f"{spk}={voice}" for spk, voice in speakers.items()) + output.error_console.print( + f"[aai.muted]Spoke {duration}s across {len(speakers)} voices " + f"({voices}) ({_disposition(out)}).[/aai.muted]" + ) + + +def _speak_single( + api_key: str, + text: str, + voice: str, + opts: SpeakOptions, + *, + json_mode: bool, + quiet: bool, +) -> None: + cfg = session.SpeakConfig( + text=text, voice=voice, language=opts.language, sample_rate=opts.sample_rate + ) + with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet): + result = session.synthesize( + api_key, cfg, on_warning=lambda m: output.emit_warning(m, json_mode=json_mode) + ) + _output_audio(result, opts.out) + _emit_single(result, cfg, opts.out, json_mode=json_mode) + + +def _speak_dialogue( + api_key: str, + text: str, + bare_voice: str | None, + overrides: dict[str, str], + opts: SpeakOptions, + *, + json_mode: bool, + quiet: bool, +) -> None: + segments = dialogue.parse_segments(text) + if not segments: + raise UsageError( + "No text to speak.", + suggestion="The input had speaker labels but no spoken text.", + ) + if bare_voice is not None: + output.emit_warning( + "Ignoring bare --voice in multi-speaker mode; " + "set a voice per speaker with --voice A=NAME.", + json_mode=json_mode, + ) + resolved, speakers = dialogue.assign_voices( + segments, dialogue.DEFAULT_VOICE_ROTATION, overrides + ) + with output.status("Synthesizing speech…", json_mode=json_mode, quiet=quiet): + result = session.synthesize_dialogue( + api_key, + resolved, + language=opts.language, + sample_rate=opts.sample_rate, + on_warning=lambda m: output.emit_warning(m, json_mode=json_mode), + ) + _output_audio(result, opts.out) + _emit_multi(result, speakers, len(resolved), opts.out, json_mode=json_mode) + + +def run_speak(opts: SpeakOptions, state: AppState, *, json_mode: bool) -> None: + """Execute one `assembly speak` invocation from already-parsed flags.""" + if not session.is_available(): + raise CLIError( + "assembly speak is only available in the sandbox.", + error_type="unsupported_environment", + exit_code=2, + suggestion="Re-run as: assembly --sandbox speak … " + "(--sandbox goes before the command; or use --env sandbox000).", + ) + spoken = _read_text(opts.text) + api_key = state.resolve_api_key() + bare_voice, overrides = dialogue.parse_voice_overrides(opts.voice) + if dialogue.looks_like_speaker_labeled(spoken): + _speak_dialogue( + api_key, + spoken, + bare_voice, + overrides, + opts, + json_mode=json_mode, + quiet=state.quiet, + ) + else: + if overrides: + # Mirror the inverse warning in _speak_dialogue: never drop a + # requested voice mapping silently. + output.emit_warning( + "Ignoring --voice SPEAKER=VOICE mappings; input has no speaker labels.", + json_mode=json_mode, + ) + _speak_single( + api_key, + spoken, + bare_voice or DEFAULT_VOICE, + opts, + json_mode=json_mode, + quiet=state.quiet, + ) diff --git a/aai_cli/stream_exec.py b/aai_cli/stream_exec.py new file mode 100644 index 00000000..f2bdf72e --- /dev/null +++ b/aai_cli/stream_exec.py @@ -0,0 +1,236 @@ +"""Run logic for `assembly stream`: a gh-style options/run split. + +The command module (aai_cli/commands/stream.py) only parses argv — it builds a +``StreamOptions`` and hands it to ``run_stream`` via ``context.run_command``. Keeping +the run path a module-level function of plain data (instead of a closure over the +Typer locals) lets tests drive validation, --show-code, and session wiring by +constructing a ``StreamOptions`` directly, with no CliRunner argv round-trip. +""" + +from __future__ import annotations + +import tempfile +from dataclasses import dataclass +from pathlib import Path + +from assemblyai.streaming.v3 import Encoding, NoiseSuppressionModel, SpeechModel + +from aai_cli import choices, client, code_gen, config_builder, output, youtube +from aai_cli.context import AppState +from aai_cli.errors import UsageError +from aai_cli.follow import FollowRenderer +from aai_cli.microphone import MicrophoneSource +from aai_cli.streaming.macos import MacSystemAudioSource +from aai_cli.streaming.render import StreamRenderer +from aai_cli.streaming.session import ( + SourceOptions, + StreamSession, + validate_output_flags, + validate_sources, +) +from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource + + +@dataclass(frozen=True) +class StreamOptions: + """Every `assembly stream` flag as plain data. + + One field per CLI flag (``--json`` excluded: run_command resolves it into the + ``json_mode`` argument), so a test can describe an invocation without argv. + """ + + source: str | None + sample: bool + sample_rate: int | None + device: int | None + system_audio: bool + system_audio_only: bool + speech_model: SpeechModel + encoding: Encoding | None + language_detection: bool | None + domain: str | None + prompt: str | None + keyterms_prompt: list[str] | None + end_of_turn_confidence_threshold: float | None + min_turn_silence: int | None + max_turn_silence: int | None + vad_threshold: float | None + format_turns: bool | None + include_partial_turns: bool | None + speaker_labels: bool | None + max_speakers: int | None + voice_focus: NoiseSuppressionModel | None + voice_focus_threshold: float | None + inactivity_timeout: int | None + filter_profanity: bool | None + redact_pii: bool | None + redact_pii_policy: str | None + redact_pii_sub: str | None + webhook_url: str | None + webhook_auth_header: str | None + llm_prompt: list[str] | None + llm_interval: float + model: str + max_tokens: int + config_kv: list[str] | None + config_file: Path | None + output_field: choices.TextOrJson | None + show_code: bool + + def source_options(self) -> SourceOptions: + """The audio-input subset, in the shape the validation/dispatch helpers read.""" + return SourceOptions( + source=self.source, + sample=self.sample, + sample_rate=self.sample_rate, + device=self.device, + system_audio=self.system_audio, + system_audio_only=self.system_audio_only, + ) + + def base_flags(self) -> dict[str, object]: + """Every streaming flag except sample_rate, which is set per source at stream time.""" + flags: dict[str, object] = { + "speech_model": config_builder.enum_value(self.speech_model), + "format_turns": self.format_turns if self.format_turns is not None else True, + "encoding": config_builder.enum_value(self.encoding), + "language_detection": self.language_detection, + "domain": self.domain, + "end_of_turn_confidence_threshold": self.end_of_turn_confidence_threshold, + "min_turn_silence": self.min_turn_silence, + "max_turn_silence": self.max_turn_silence, + "vad_threshold": self.vad_threshold, + "include_partial_turns": self.include_partial_turns, + "keyterms_prompt": list(self.keyterms_prompt) if self.keyterms_prompt else None, + "filter_profanity": self.filter_profanity, + "speaker_labels": self.speaker_labels, + "max_speakers": self.max_speakers, + "voice_focus": config_builder.enum_value(self.voice_focus), + "voice_focus_threshold": self.voice_focus_threshold, + "redact_pii": self.redact_pii, + "redact_pii_policies": config_builder.split_csv(self.redact_pii_policy), + "redact_pii_sub": self.redact_pii_sub, + "inactivity_timeout": self.inactivity_timeout, + "webhook_url": self.webhook_url, + "prompt": self.prompt, + } + flags.update(config_builder.auth_header_flags(self.webhook_auth_header)) + return flags + + +def _print_show_code( + opts: StreamOptions, + sources: SourceOptions, + base_flags: dict[str, object], + *, + text_mode: bool, +) -> None: + """Print the equivalent SDK script without opening audio or authenticating. + + Emits a script faithful to the requested source — mic (default), stdin (-), or a + file/URL — on raw stdout, so `--show-code > script.py` is runnable. Applies the + same source validation as a real run, so e.g. a file + --sample-rate conflict + errors here too instead of silently generating mic code. + """ + validate_sources(sources, has_llm=bool(opts.llm_prompt), text_mode=text_mode) + if sources.from_system_audio: + raise UsageError("--show-code does not support macOS system audio capture yet.") + if sources.source and youtube.is_downloadable_url(sources.source): + raise UsageError( + "--show-code does not support downloaded sources (YouTube, podcast pages) yet.", + suggestion="Download the audio first (e.g. yt-dlp) and pass the local file.", + ) + code_source: str | None = None + if sources.from_stdin: + code_source = "-" + elif sources.from_file: + # check_local=False: generating code for a file you don't have yet is fine. + code_source = client.resolve_audio_source( + sources.source, sample=sources.sample, check_local=False + ) + merged = config_builder.merge_streaming_params( + # sample_rate precedence: --sample-rate (None is dropped by the merge) + # beats --config/--config-file, which beat the 16 kHz default below — + # so an explicit `--config sample_rate=…` is honored, not overridden. + flags=base_flags | {"sample_rate": sources.sample_rate}, + overrides=opts.config_kv, + config_file=opts.config_file, + ) + merged.setdefault("sample_rate", TARGET_RATE) + gateway = code_gen.gateway_options( + list(opts.llm_prompt or []), opts.model, opts.max_tokens, interval=opts.llm_interval + ) + output.print_code(code_gen.stream(merged, llm=gateway, source=code_source)) + + +def _dispatch(session: StreamSession, opts: SourceOptions) -> None: + """Open the right audio source(s) for the flags and stream them.""" + if opts.from_system_audio: + system = MacSystemAudioSource(on_open=session.on_open) + if opts.system_audio_only: + session.run(system, system.sample_rate, source_label="system") + else: + mic = MicrophoneSource( + target_rate=TARGET_RATE, + device=opts.device, + capture_rate=opts.sample_rate, + on_open=session.on_open, + ) + session.run_parallel( + [("system", system, system.sample_rate), ("you", mic, mic.sample_rate)] + ) + elif opts.from_stdin: + # Raw PCM16 mono piped on stdin (e.g. `ffmpeg … -f s16le - | assembly stream -`). + stdin_src = StdinSource(sample_rate=opts.sample_rate or TARGET_RATE) + session.run(stdin_src, stdin_src.sample_rate) + elif opts.source and youtube.is_downloadable_url(opts.source): + # Fetch the audio first, then stream the local file in real time. + with tempfile.TemporaryDirectory(prefix="aai-yt-") as td: + local = youtube.download_audio(opts.source, Path(td)) + session.run(FileSource(str(local)), TARGET_RATE) + elif opts.from_file: + file_audio = FileSource(client.resolve_audio_source(opts.source, sample=opts.sample)) + session.run(file_audio, file_audio.sample_rate) + else: + # Capture at the device's native rate (or --sample-rate override) and tell the + # streaming API that rate, rather than forcing one the device may reject. + # "Listening…" is announced once the device is open (see StreamSession.on_open), + # not when the session opens — so early speech isn't lost in the gap. + mic = MicrophoneSource( + device=opts.device, capture_rate=opts.sample_rate, on_open=session.on_open + ) + session.run(mic, mic.sample_rate) + + +def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None: + """Execute one `assembly stream` invocation from already-parsed flags.""" + validate_output_flags(json_mode=json_mode, output_field=opts.output_field) + text_mode, json_mode = output.stream_output_modes(opts.output_field, json_mode=json_mode) + sources = opts.source_options() + base_flags = opts.base_flags() + + if opts.show_code: + _print_show_code(opts, sources, base_flags, text_mode=text_mode) + return + + # Validate the requested sources (including that a local file exists) before + # credentials, so a typo'd path reads as "file not found" — not as a login. + validate_sources(sources, has_llm=bool(opts.llm_prompt), text_mode=text_mode) + if sources.from_file and not sources.from_stdin: + client.resolve_audio_source(sources.source, sample=sources.sample) + api_key = state.resolve_api_key() + + llm_prompts = list(opts.llm_prompt or []) + session = StreamSession( + api_key=api_key, + base_flags=base_flags, + overrides=opts.config_kv, + config_file=opts.config_file, + renderer=StreamRenderer(json_mode=json_mode, text_mode=text_mode), + follow=FollowRenderer(json_mode=json_mode) if llm_prompts else None, + llm_prompts=llm_prompts, + model=opts.model, + max_tokens=opts.max_tokens, + llm_interval=opts.llm_interval, + ) + _dispatch(session, sources) diff --git a/aai_cli/transcribe_exec.py b/aai_cli/transcribe_exec.py index d0693ef6..5c602e8a 100644 --- a/aai_cli/transcribe_exec.py +++ b/aai_cli/transcribe_exec.py @@ -10,13 +10,26 @@ import json import os import tempfile +from dataclasses import dataclass from pathlib import Path from typing import Any, NamedTuple import assemblyai as aai from rich.markup import escape -from aai_cli import choices, client, llm, output, stdio, transcribe_render, youtube +from aai_cli import ( + choices, + client, + code_gen, + config_builder, + llm, + output, + stdio, + transcribe_render, + youtube, +) +from aai_cli.code_gen.transcribe import render as render_transcribe_code +from aai_cli.context import AppState from aai_cli.errors import UsageError, mutually_exclusive # The PII policy strings the SDK accepts, validated client-side so a typo'd @@ -225,3 +238,207 @@ def deliver_result( output.emit(client.transcript_json_payload(transcript), lambda d: d, json_mode=True) else: transcribe_render.render_transcript_result(transcript, output.console) + + +@dataclass(frozen=True) +class TranscribeOptions: + """Every `assembly transcribe` flag as plain data (options/run split, see AGENTS.md). + + One field per CLI flag (``--json`` excluded: run_command resolves it into the + ``json_mode`` argument), so a test can describe an invocation without argv. + """ + + source: str | None + sample: bool + from_stdin: bool + concurrency: int + force: bool + speech_model: aai.SpeechModel | None + language_code: str | None + language_detection: bool | None + keyterms_prompt: list[str] | None + temperature: float | None + prompt: str | None + punctuate: bool | None + format_text: bool | None + disfluencies: bool | None + speaker_labels: bool + speakers_expected: int | None + multichannel: bool | None + redact_pii: bool | None + redact_pii_policy: str | None + redact_pii_sub: aai.PIISubstitutionPolicy | None + redact_pii_audio: bool | None + filter_profanity: bool | None + content_safety: bool | None + content_safety_confidence: int | None + speech_threshold: float | None + summarization: bool | None + summary_model: aai.SummarizationModel | None + summary_type: aai.SummarizationType | None + auto_chapters: bool | None + sentiment_analysis: bool | None + entity_detection: bool | None + auto_highlights: bool | None + topic_detection: bool | None + word_boost: list[str] | None + custom_spelling_file: Path | None + audio_start: int | None + audio_end: int | None + download_sections: list[str] | None + webhook_url: str | None + webhook_auth_header: str | None + translate_to: list[str] | None + config_kv: list[str] | None + config_file: Path | None + llm_prompt: list[str] | None + model: str + max_tokens: int + output_field: choices.TranscriptOutput | None + out: Path | None + show_code: bool + + def flags(self, pii_policies: list[str] | None) -> dict[str, object]: + """The curated flags in TranscriptionConfig field names (None = unset).""" + flags: dict[str, object] = { + "speech_model": config_builder.enum_value(self.speech_model), + "language_code": self.language_code, + "language_detection": self.language_detection, + "keyterms_prompt": list(self.keyterms_prompt) if self.keyterms_prompt else None, + "temperature": self.temperature, + "prompt": self.prompt, + "punctuate": self.punctuate, + "format_text": self.format_text, + "disfluencies": self.disfluencies, + "speaker_labels": self.speaker_labels or None, + "speakers_expected": self.speakers_expected, + "multichannel": self.multichannel, + "redact_pii": self.redact_pii, + "redact_pii_policies": pii_policies, + "redact_pii_sub": config_builder.enum_value(self.redact_pii_sub), + "redact_pii_audio": self.redact_pii_audio, + "filter_profanity": self.filter_profanity, + "content_safety": self.content_safety, + "content_safety_confidence": self.content_safety_confidence, + "speech_threshold": self.speech_threshold, + "summarization": self.summarization, + "summary_model": config_builder.enum_value(self.summary_model), + "summary_type": config_builder.enum_value(self.summary_type), + "auto_chapters": self.auto_chapters, + "sentiment_analysis": self.sentiment_analysis, + "entity_detection": self.entity_detection, + "auto_highlights": self.auto_highlights, + "iab_categories": self.topic_detection, + "word_boost": list(self.word_boost) if self.word_boost else None, + "custom_spelling": ( + config_builder.load_custom_spelling(self.custom_spelling_file) + if self.custom_spelling_file + else None + ), + "audio_start_from": self.audio_start, + "audio_end_at": self.audio_end, + "webhook_url": self.webhook_url, + "speech_understanding": ( + config_builder.translation_request(list(self.translate_to)) + if self.translate_to + else None + ), + } + flags.update(config_builder.auth_header_flags(self.webhook_auth_header)) + return flags + + +def _print_show_code(opts: TranscribeOptions, merged: dict[str, object]) -> None: + """Print the equivalent SDK script and exit without transcribing or authenticating. + + Raw stdout, so `--show-code > script.py` runs. No source/--sample needed — fall + back to a placeholder path for a pure snippet. + """ + audio = ( + client.resolve_audio_source(opts.source, sample=opts.sample, check_local=False) + if opts.source or opts.sample + else "your-audio-file.mp3" + ) + gateway = code_gen.gateway_options(list(opts.llm_prompt or []), opts.model, opts.max_tokens) + output.print_code( + render_transcribe_code( + merged, + audio, + llm_gateway=gateway, + output=opts.output_field, + download_sections=list(opts.download_sections or []), + ) + ) + + +def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) -> None: + """Execute one `assembly transcribe` invocation from already-parsed flags.""" + # Module-load order: transcribe_batch imports this module, so import it lazily. + from aai_cli import transcribe_batch + + validate_language_flags(opts.language_code, language_detection=opts.language_detection) + pii_policies = config_builder.split_csv(opts.redact_pii_policy) + validate_pii_policies(pii_policies) + flags = opts.flags(pii_policies) + + validate_out_with_llm(opts.out, opts.llm_prompt) + validate_out_path(opts.out) + validate_json_with_output(opts.output_field, json_mode=json_mode) + + merged = config_builder.merge_transcribe_config( + flags=flags, overrides=opts.config_kv, config_file=opts.config_file + ) + validate_speakers_expected(merged) + + sources = transcribe_batch.expand_sources( + opts.source, from_stdin=opts.from_stdin, sample=opts.sample + ) + if sources is not None: + transcribe_batch.reject_single_source_flags( + out=opts.out, + output_field=opts.output_field, + llm_prompt=opts.llm_prompt, + show_code=opts.show_code, + ) + transcribe_batch.run_batch( + state.resolve_api_key(), + sources, + transcription_config=config_builder.construct_transcription_config(merged), + concurrency=opts.concurrency, + force=opts.force, + json_mode=json_mode, + quiet=state.quiet, + ) + return + + if opts.show_code: + _print_show_code(opts, merged) + return + + tc = config_builder.construct_transcription_config(merged) + + # A typo'd path must read as "file not found", not trigger a login. + check_source_exists(opts.source, sample=opts.sample) + warn_unrecognized_extension(opts.source, json_mode=json_mode, quiet=state.quiet) + + api_key = state.resolve_api_key() + with output.status("Transcribing…", json_mode=json_mode, quiet=state.quiet): + transcript = run_transcription( + api_key, + opts.source, + sample=opts.sample, + transcription_config=tc, + download_sections=list(opts.download_sections or []), + ) + + deliver_result( + transcript, + api_key=api_key, + out=opts.out, + output_field=opts.output_field, + transform=TransformOptions( + prompts=list(opts.llm_prompt or []), model=opts.model, max_tokens=opts.max_tokens + ), + json_mode=json_mode, + quiet=state.quiet, + ) diff --git a/tests/test_agent_command.py b/tests/test_agent_command.py index ae8482fc..c2d23b7b 100644 --- a/tests/test_agent_command.py +++ b/tests/test_agent_command.py @@ -33,7 +33,7 @@ def test_list_voices_prints_and_exits_without_connecting(monkeypatch): def fake_run_session(api_key, *, renderer, player, mic, config): called["ran"] = True - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) result = runner.invoke(app, ["agent", "--list-voices"]) assert result.exit_code == 0 assert "ivy" in result.output @@ -44,7 +44,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config): def test_list_voices_json_emits_machine_readable_array(monkeypatch): monkeypatch.setattr( - "aai_cli.commands.agent.run_session", + "aai_cli.agent_exec.run_session", lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not connect")), ) result = runner.invoke(app, ["agent", "--list-voices", "--json"]) @@ -59,12 +59,12 @@ def test_list_voices_json_emits_machine_readable_array(monkeypatch): def test_agent_unauthenticated_runs_login(monkeypatch): monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True) monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result) - monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: f"filesrc:{src}") + monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: f"filesrc:{src}") def fake_run_session(api_key, *, renderer, player, mic, config): raise AssertionError(f"agent session should not run after auto-login: {api_key}") - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) result = runner.invoke(app, ["agent", "--sample", "--json"]) assert result.exit_code == 4 assert config.get_api_key("default") == "sk_from_oauth" @@ -79,7 +79,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config): renderer.user_final("hello agent") renderer.agent_transcript("hello human", interrupted=False) - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) result = runner.invoke(app, ["agent", "--json"]) assert result.exit_code == 0 lines = [json.loads(x) for x in result.output.splitlines() if x.strip()] @@ -96,7 +96,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config): seen["prompt"] = config.system_prompt seen["full_duplex"] = config.full_duplex - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) prompt_file = tmp_path / "p.txt" prompt_file.write_text("be a pirate") result = runner.invoke( @@ -120,7 +120,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config): def test_agent_headphones_notice_in_human_mode(monkeypatch): config.set_api_key("default", "sk_live") monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False) - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None) result = runner.invoke(app, ["agent"]) assert result.exit_code == 0 assert "headphones" in result.output.lower() # mic stays open -> warn to use headphones @@ -132,21 +132,21 @@ def test_agent_ctrl_c_exits_cleanly(monkeypatch): def raise_kbd(*a, **k): raise KeyboardInterrupt - monkeypatch.setattr("aai_cli.commands.agent.run_session", raise_kbd) + monkeypatch.setattr("aai_cli.agent_exec.run_session", raise_kbd) result = runner.invoke(app, ["agent"]) assert result.exit_code == 0 def test_agent_unknown_voice_exits_2(monkeypatch): config.set_api_key("default", "sk_live") - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None) result = runner.invoke(app, ["agent", "--voice", "not-a-voice"]) assert result.exit_code == 2 def test_agent_prompt_file_not_found_exits_2(monkeypatch): config.set_api_key("default", "sk_live") - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None) result = runner.invoke( app, ["agent", "--system-prompt-file", "/tmp/no_such_file_xyz_voiceagent.txt"] ) @@ -160,7 +160,7 @@ def _capture_run_session(monkeypatch): def fake_run_session(api_key, *, renderer, player, mic, config): seen.update(renderer=renderer, player=player, mic=mic, config=config) - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) return seen @@ -169,7 +169,7 @@ def test_agent_file_source_streams_clip_and_exits_after_reply(monkeypatch, tmp_p wav = tmp_path / "say.wav" wav.write_bytes(b"RIFF") # FileSource is faked below; contents don't matter - monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: f"filesrc:{src}") + monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: f"filesrc:{src}") seen = _capture_run_session(monkeypatch) result = runner.invoke(app, ["agent", str(wav)]) @@ -192,7 +192,7 @@ def fake_file_source(src): captured["src"] = src return "filesrc" - monkeypatch.setattr("aai_cli.commands.agent.FileSource", fake_file_source) + monkeypatch.setattr("aai_cli.agent_exec.FileSource", fake_file_source) seen = _capture_run_session(monkeypatch) result = runner.invoke(app, ["agent", "--sample"]) @@ -203,7 +203,7 @@ def fake_file_source(src): def test_agent_file_source_with_device_exits_2(monkeypatch, tmp_path): config.set_api_key("default", "sk_live") - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None) wav = tmp_path / "say.wav" wav.write_bytes(b"RIFF") result = runner.invoke(app, ["agent", str(wav), "--device", "1"]) @@ -213,8 +213,8 @@ def test_agent_file_source_with_device_exits_2(monkeypatch, tmp_path): def test_agent_file_source_no_headphones_notice(monkeypatch, tmp_path): config.set_api_key("default", "sk_live") monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False) - monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: "filesrc") - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: "filesrc") + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None) wav = tmp_path / "say.wav" wav.write_bytes(b"RIFF") result = runner.invoke(app, ["agent", str(wav)]) @@ -225,12 +225,12 @@ def test_agent_file_source_no_headphones_notice(monkeypatch, tmp_path): def test_agent_file_source_no_start_talking_notice(monkeypatch, tmp_path): config.set_api_key("default", "sk_live") monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False) - monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: "filesrc") + monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: "filesrc") def fake_run_session(api_key, *, renderer, player, mic, config): renderer.connected() # session.ready arrives even for a file-driven run - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) wav = tmp_path / "say.wav" wav.write_bytes(b"RIFF") result = runner.invoke(app, ["agent", str(wav)]) @@ -255,12 +255,12 @@ def start(self): def close(self): pass - monkeypatch.setattr("aai_cli.commands.agent.DuplexAudio", FakeDuplex) + monkeypatch.setattr("aai_cli.agent_exec.DuplexAudio", FakeDuplex) def fake_run_session(api_key, *, renderer, player, mic, config): renderer.connected() - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) result = runner.invoke(app, ["agent"]) assert result.exit_code == 0 assert "start talking" in result.output.lower() # live mic -> prompt the user to speak @@ -269,7 +269,7 @@ def fake_run_session(api_key, *, renderer, player, mic, config): def test_agent_show_code_prints_without_session(monkeypatch): # Print-only: emits the agent script, never starts a session or opens audio, no auth. called = [] - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: called.append(True)) + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: called.append(True)) result = runner.invoke(app, ["agent", "--voice", "ivy", "--show-code"]) assert result.exit_code == 0 assert called == [] # never ran a session @@ -284,7 +284,7 @@ def test_agent_show_code_file_source_warns_on_stderr(monkeypatch): def _boom(*a, **k): raise AssertionError("must not run a session") - monkeypatch.setattr("aai_cli.commands.agent.run_session", _boom) + monkeypatch.setattr("aai_cli.agent_exec.run_session", _boom) result = _invoke_split(["agent", "clip.wav", "--show-code"]) assert result.exit_code == 0 assert "uses the microphone" in result.stderr @@ -319,7 +319,7 @@ def test_agent_headphones_notice_routes_to_stderr(monkeypatch): # default human mode the notice goes to stderr, stdout stays transcript-only. config.set_api_key("default", "sk_live") monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False) - monkeypatch.setattr("aai_cli.commands.agent.run_session", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.agent_exec.run_session", lambda *a, **k: None) result = _invoke_split(["agent"]) assert result.exit_code == 0 assert "headphones" in result.stderr.lower() @@ -331,7 +331,7 @@ def _boom(*a, **k): raise AssertionError("must not run a session") monkeypatch.setattr( - "aai_cli.commands.agent.run_session", + "aai_cli.agent_exec.run_session", _boom, ) result = runner.invoke(app, ["agent", "--voice", "ivy", "--show-code", "--json"]) @@ -342,13 +342,13 @@ def _boom(*a, **k): def test_agent_output_text_emits_plain_transcript(monkeypatch): # `-o text` -> plain you:/agent: lines on stdout (pipe into assembly llm). config.set_api_key("default", "sk_live") - monkeypatch.setattr("aai_cli.commands.agent.FileSource", lambda src: "filesrc") + monkeypatch.setattr("aai_cli.agent_exec.FileSource", lambda src: "filesrc") def fake_run_session(api_key, *, renderer, player, mic, config): renderer.user_final("hello there") renderer.agent_transcript("hi, how can I help?", interrupted=False) - monkeypatch.setattr("aai_cli.commands.agent.run_session", fake_run_session) + monkeypatch.setattr("aai_cli.agent_exec.run_session", fake_run_session) result = runner.invoke(app, ["agent", "--sample", "-o", "text"]) assert result.exit_code == 0 assert "you: hello there" in result.output @@ -370,11 +370,11 @@ def test_resolve_system_prompt_unreadable_file_raises_clierror(tmp_path): import pytest - from aai_cli.commands import agent + from aai_cli import agent_exec from aai_cli.errors import CLIError missing = Path(tmp_path) / "does-not-exist.txt" with pytest.raises(CLIError) as exc: - agent._resolve_system_prompt("fallback prompt", missing) + agent_exec._resolve_system_prompt("fallback prompt", missing) assert exc.value.exit_code == 2 assert "system-prompt-file" in exc.value.message diff --git a/tests/test_command_options_seam.py b/tests/test_command_options_seam.py new file mode 100644 index 00000000..1c81af21 --- /dev/null +++ b/tests/test_command_options_seam.py @@ -0,0 +1,210 @@ +"""Direct tests of the options/run seams (transcribe/agent/speak/llm exec modules). + +Each command module parses argv into a frozen Options dataclass; everything +after that is a module-level run function of plain data. These tests construct +options directly (dataclasses.replace off a defaults instance) instead of +round-tripping argv through CliRunner. The stream seam's tests live in +test_stream_exec.py. +""" + +from __future__ import annotations + +import dataclasses + +import pytest +import typer + +from aai_cli import agent_exec, choices, config, llm, llm_exec, speak_exec, transcribe_exec +from aai_cli.agent.session import DEFAULT_GREETING, DEFAULT_PROMPT +from aai_cli.agent.voices import DEFAULT_VOICE +from aai_cli.context import AppState +from aai_cli.errors import CLIError, UsageError +from aai_cli.options import DEFAULT_BATCH_CONCURRENCY + +# The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace. +TRANSCRIBE_DEFAULTS = transcribe_exec.TranscribeOptions( + source=None, + sample=False, + from_stdin=False, + concurrency=DEFAULT_BATCH_CONCURRENCY, + force=False, + speech_model=None, + language_code=None, + language_detection=None, + keyterms_prompt=None, + temperature=None, + prompt=None, + punctuate=None, + format_text=None, + disfluencies=None, + speaker_labels=False, + speakers_expected=None, + multichannel=None, + redact_pii=None, + redact_pii_policy=None, + redact_pii_sub=None, + redact_pii_audio=None, + filter_profanity=None, + content_safety=None, + content_safety_confidence=None, + speech_threshold=None, + summarization=None, + summary_model=None, + summary_type=None, + auto_chapters=None, + sentiment_analysis=None, + entity_detection=None, + auto_highlights=None, + topic_detection=None, + word_boost=None, + custom_spelling_file=None, + audio_start=None, + audio_end=None, + download_sections=None, + webhook_url=None, + webhook_auth_header=None, + translate_to=None, + config_kv=None, + config_file=None, + llm_prompt=None, + model=llm.DEFAULT_MODEL, + max_tokens=llm.DEFAULT_MAX_TOKENS, + output_field=None, + out=None, + show_code=False, +) + +AGENT_DEFAULTS = agent_exec.AgentOptions( + source=None, + sample=False, + voice=DEFAULT_VOICE, + system_prompt=DEFAULT_PROMPT, + system_prompt_file=None, + greeting=DEFAULT_GREETING, + device=None, + output_field=None, + show_code=False, +) + +SPEAK_DEFAULTS = speak_exec.SpeakOptions( + text=None, + voice=[], + language=speak_exec.DEFAULT_LANGUAGE, + sample_rate=None, + out=None, +) + +LLM_DEFAULTS = llm_exec.LlmOptions( + prompt=None, + model=llm.DEFAULT_MODEL, + transcript_id=None, + system=None, + follow=False, + output_field=None, + max_tokens=llm.DEFAULT_MAX_TOKENS, +) + + +@pytest.mark.parametrize( + "defaults", + [TRANSCRIBE_DEFAULTS, AGENT_DEFAULTS, SPEAK_DEFAULTS, LLM_DEFAULTS], + ids=["transcribe", "agent", "speak", "llm"], +) +def test_options_are_immutable(defaults): + field_name = dataclasses.fields(defaults)[0].name + with pytest.raises(dataclasses.FrozenInstanceError): + setattr(defaults, field_name, None) + + +def test_run_transcribe_validates_flags_before_credentials(): + # No API key configured: a flag conflict surfaces as a usage error, not + # NotAuthenticated — validation runs before any credential resolution. + with pytest.raises(UsageError): + transcribe_exec.run_transcribe( + dataclasses.replace( + TRANSCRIBE_DEFAULTS, language_code="en_us", language_detection=True + ), + AppState(), + json_mode=False, + ) + + +def test_transcribe_flags_drop_unset_speaker_labels(): + # The boolean --speaker-labels flag maps to None when unset (so the request + # omits the field entirely), and True only when explicitly enabled. + assert TRANSCRIBE_DEFAULTS.flags(None)["speaker_labels"] is None + enabled = dataclasses.replace(TRANSCRIBE_DEFAULTS, speaker_labels=True) + assert enabled.flags(None)["speaker_labels"] is True + + +def test_run_agent_session_config_without_cli(monkeypatch): + config.set_api_key("default", "sk_live") + seen = {} + + def fake_run_session(api_key, *, renderer, player, mic, config): + seen["api_key"] = api_key + seen["config"] = config + + monkeypatch.setattr(agent_exec, "run_session", fake_run_session) + monkeypatch.setattr(agent_exec, "DuplexAudio", _FakeDuplex) + + agent_exec.run_agent( + dataclasses.replace(AGENT_DEFAULTS, greeting="Ahoy"), AppState(), json_mode=True + ) + assert seen["api_key"] == "sk_live" + run_config = seen["config"] + assert run_config.voice == DEFAULT_VOICE + assert run_config.greeting == "Ahoy" + assert run_config.full_duplex is True + assert run_config.exit_after_reply is False + + +class _FakeDuplex: + def __init__(self, *, target_rate=None, device=None): + self.mic = object() + self.player = object() + + +def test_run_agent_ctrl_c_stops_cleanly(monkeypatch): + # Ctrl-C is the normal "user hung up" signal: the session ends without an error. + config.set_api_key("default", "sk_live") + + def raise_interrupt(api_key, *, renderer, player, mic, config): + raise KeyboardInterrupt + + monkeypatch.setattr(agent_exec, "run_session", raise_interrupt) + monkeypatch.setattr(agent_exec, "DuplexAudio", _FakeDuplex) + agent_exec.run_agent(AGENT_DEFAULTS, AppState(), json_mode=True) # no exception + + +def test_run_agent_broken_pipe_exits_zero(monkeypatch): + # A closed downstream pipe (`assembly agent | head`) is a clean stop, not a failure. + config.set_api_key("default", "sk_live") + + def raise_broken_pipe(api_key, *, renderer, player, mic, config): + raise BrokenPipeError + + monkeypatch.setattr(agent_exec, "run_session", raise_broken_pipe) + monkeypatch.setattr(agent_exec, "DuplexAudio", _FakeDuplex) + with pytest.raises(typer.Exit) as exc: + agent_exec.run_agent(AGENT_DEFAULTS, AppState(), json_mode=True) + assert exc.value.exit_code == 0 + + +def test_run_speak_requires_sandbox(): + # The active environment defaults to production, which has no streaming-TTS host. + with pytest.raises(CLIError) as exc: + speak_exec.run_speak(SPEAK_DEFAULTS, AppState(), json_mode=False) + assert exc.value.exit_code == 2 + assert "--sandbox" in (exc.value.suggestion or "") + + +def test_run_llm_follow_rejects_output_field(): + with pytest.raises(UsageError): + llm_exec.run_llm( + dataclasses.replace( + LLM_DEFAULTS, follow=True, prompt="x", output_field=choices.TextOrJson.text + ), + AppState(), + json_mode=False, + ) diff --git a/tests/test_llm_command.py b/tests/test_llm_command.py index e89cb0c1..e66c4fd5 100644 --- a/tests/test_llm_command.py +++ b/tests/test_llm_command.py @@ -212,7 +212,7 @@ def test_llm_transcript_id_stdin_warning_suppressed_by_quiet(monkeypatch): def test_llm_transcript_id_no_warning_when_stdin_is_a_terminal(monkeypatch): _auth() - monkeypatch.setattr("aai_cli.commands.llm.stdio.stdin_is_piped", lambda: False) + monkeypatch.setattr("aai_cli.llm_exec.stdio.stdin_is_piped", lambda: False) monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload("s")) result = runner.invoke(app, ["llm", "summarize", "--transcript-id", "t_9"]) assert result.exit_code == 0 @@ -388,7 +388,7 @@ def test_llm_follow_requires_a_prompt(monkeypatch): def test_llm_follow_requires_piped_stdin(monkeypatch): # Interactively (no pipe) --follow would block forever; reject it with guidance. _auth() - monkeypatch.setattr("aai_cli.commands.llm.stdio.stdin_is_piped", lambda: False) + monkeypatch.setattr("aai_cli.llm_exec.stdio.stdin_is_piped", lambda: False) monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"]) assert result.exit_code == 2 @@ -424,9 +424,7 @@ def __iter__(self): def __next__(self): raise KeyboardInterrupt - monkeypatch.setattr( - "aai_cli.commands.llm.stdio.iter_piped_stdin_lines", lambda: _InterruptIter() - ) + monkeypatch.setattr("aai_cli.llm_exec.stdio.iter_piped_stdin_lines", lambda: _InterruptIter()) monkeypatch.setattr("aai_cli.commands.llm.gateway.complete", lambda *a, **k: _payload()) result = runner.invoke(app, ["llm", "summarize", "--follow", "--json"], input="") assert result.exit_code == 0 diff --git a/tests/test_replay_e2e.py b/tests/test_replay_e2e.py index db07670b..f1ef49c5 100644 --- a/tests/test_replay_e2e.py +++ b/tests/test_replay_e2e.py @@ -35,7 +35,7 @@ def test_transcribe_sample_renders_real_transcript(monkeypatch, mocker): _with_api_key() _human(monkeypatch) mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=rf.transcript("transcribe_sample"), ) diff --git a/tests/test_source_validation.py b/tests/test_source_validation.py index aac8277e..94b888e6 100644 --- a/tests/test_source_validation.py +++ b/tests/test_source_validation.py @@ -46,7 +46,7 @@ def test_resolve_audio_source_source_plus_sample_rejected_even_without_checks(): def test_transcribe_source_plus_sample_exits_2(mocker, tmp_path): # No key configured: the conflict must fail before credential resolution. - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) clip = tmp_path / "clip.mp3" clip.write_bytes(b"fake") result = runner.invoke(app, ["transcribe", str(clip), "--sample"]) @@ -69,7 +69,7 @@ def test_resolve_audio_source_rejects_directory(tmp_path): def test_transcribe_directory_source_fails_before_credentials(mocker, tmp_path): # No key configured: a directory is batch mode, and an empty one must read as # "no audio files", never trigger a login (or an upload attempt). - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", str(tmp_path)]) assert result.exit_code == 2 # Rich may wrap the long tmp path mid-token (even inside a word), so compare with @@ -122,7 +122,7 @@ def test_transcripts_get_rejects_path_traversal_id(): def test_transcribe_missing_file_fails_before_credentials(mocker): # No key is configured: the path check must fire first, so the user sees # "file not found" instead of a login prompt (or a keyring error). - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", "missing.wav"]) assert result.exit_code == 2 assert "File not found: missing.wav" in result.output @@ -145,7 +145,7 @@ def test_transcribe_empty_stdin_exits_2(): def test_stream_missing_file_fails_before_credentials(monkeypatch): called = {"stream": False} monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda *a, **k: called.__setitem__("stream", True), ) result = runner.invoke(app, ["stream", "missing.wav"]) diff --git a/tests/test_speak.py b/tests/test_speak.py index 5f510c6e..b25cc92b 100644 --- a/tests/test_speak.py +++ b/tests/test_speak.py @@ -47,7 +47,7 @@ def test_production_env_is_rejected_with_sandbox_hint(): def test_plays_audio_by_default(monkeypatch, fake_synthesize): played: dict = {} monkeypatch.setattr( - "aai_cli.commands.speak.audio.play_pcm", + "aai_cli.speak_exec.audio.play_pcm", lambda pcm, rate, **_: played.update(pcm=pcm, rate=rate), ) result = runner.invoke(app, ["--sandbox", "speak", "Hello there"]) @@ -63,12 +63,12 @@ def test_plays_audio_by_default(monkeypatch, fake_synthesize): def test_out_writes_wav_and_does_not_play(monkeypatch, tmp_path, fake_synthesize): monkeypatch.setattr( - "aai_cli.commands.speak.audio.play_pcm", + "aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: pytest.fail("should not play when --out is given"), ) written: dict = {} monkeypatch.setattr( - "aai_cli.commands.speak.audio.write_wav", + "aai_cli.speak_exec.audio.write_wav", lambda path, pcm, rate: written.update(path=path, pcm=pcm, rate=rate), ) out = tmp_path / "x.wav" @@ -82,7 +82,7 @@ def test_out_writes_wav_and_does_not_play(monkeypatch, tmp_path, fake_synthesize def test_reads_text_from_stdin_when_arg_omitted(monkeypatch, fake_synthesize): - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke(app, ["--sandbox", "speak"], input="piped text\n") assert result.exit_code == 0 assert fake_synthesize["cfg"].text == "piped text" @@ -104,7 +104,7 @@ def test_blank_arg_does_not_fall_back_to_stdin(monkeypatch): def test_voice_and_language_flow_into_config(monkeypatch, fake_synthesize): - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke( app, ["--sandbox", "speak", "Hi", "--voice", "jane", "--language", "English"] ) @@ -116,7 +116,7 @@ def test_voice_and_language_flow_into_config(monkeypatch, fake_synthesize): def test_json_mode_emits_metadata_object_on_stdout(monkeypatch, fake_synthesize): - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke(app, ["--sandbox", "speak", "Hi", "--voice", "jane", "--json"]) assert result.exit_code == 0 # The behavioral split: --json yields a parseable object, not human prose. @@ -131,7 +131,7 @@ def test_json_mode_emits_metadata_object_on_stdout(monkeypatch, fake_synthesize) def test_human_mode_keeps_stdout_clean(monkeypatch, fake_synthesize): - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke(app, ["--sandbox", "speak", "Hi"]) assert result.exit_code == 0 # Human summary goes to stderr; stdout stays empty (audio went to the speaker). @@ -150,7 +150,7 @@ def _fake(api_key, segments, *, language=None, sample_rate=None, connect=None, o ) monkeypatch.setattr(session, "synthesize_dialogue", _fake) - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) return calls @@ -203,7 +203,7 @@ def test_dialogue_json_reports_speaker_voice_map(fake_dialogue): def test_dialogue_json_out_path_is_reported(fake_dialogue, monkeypatch, tmp_path): # With --out, the multi JSON reports the file path (not null) — pins the # `str(out) if out is not None else None` branch in _emit_multi. - monkeypatch.setattr("aai_cli.commands.speak.audio.write_wav", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.write_wav", lambda *a, **k: None) out = tmp_path / "dialogue.wav" text = "Speaker A: One.\nSpeaker B: Two." result = runner.invoke(app, ["--sandbox", "speak", "--out", str(out), "--json"], input=text) @@ -223,7 +223,7 @@ def test_empty_speaker_labels_raises_usage_error(): def test_unlabeled_text_still_uses_single_voice_path(fake_synthesize, monkeypatch): # A bare --voice still selects the single-voice voice for ordinary prose. - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke(app, ["--sandbox", "speak", "Just prose.", "--voice", "mary"]) assert result.exit_code == 0 assert fake_synthesize["cfg"].voice == "mary" @@ -235,7 +235,7 @@ def test_unlabeled_text_still_uses_single_voice_path(fake_synthesize, monkeypatc def test_speaker_mappings_on_unlabeled_input_warn_not_silently_drop(fake_synthesize, monkeypatch): # The mirror of the bare-voice-in-dialogue note: SPEAKER=VOICE mappings can't # apply to plain prose, and the user is told instead of the flag vanishing. - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke(app, ["--sandbox", "speak", "Just prose.", "--voice", "A=vera"]) assert result.exit_code == 0 assert "Ignoring --voice SPEAKER=VOICE mappings" in result.stderr @@ -245,7 +245,7 @@ def test_speaker_mappings_on_unlabeled_input_warn_not_silently_drop(fake_synthes def test_speaker_mappings_warning_is_structured_in_json_mode(fake_synthesize, monkeypatch): - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke( app, ["--sandbox", "speak", "Just prose.", "--voice", "A=vera", "--json"] ) @@ -267,7 +267,7 @@ def test_sample_rate_must_be_positive(): def test_sample_rate_floor_accepts_one(fake_synthesize, monkeypatch): # min=1 exactly: 1 Hz is degenerate but valid (the server enforces its own floor). - monkeypatch.setattr("aai_cli.commands.speak.audio.play_pcm", lambda *a, **k: None) + monkeypatch.setattr("aai_cli.speak_exec.audio.play_pcm", lambda *a, **k: None) result = runner.invoke(app, ["--sandbox", "speak", "Hi", "--sample-rate", "1"]) assert result.exit_code == 0 assert fake_synthesize["cfg"].sample_rate == 1 diff --git a/tests/test_stream_command.py b/tests/test_stream_command.py index 897c309d..0f5f381f 100644 --- a/tests/test_stream_command.py +++ b/tests/test_stream_command.py @@ -43,7 +43,7 @@ def test_stream_help_lists_command(): def test_stream_mic_renders_turns(monkeypatch): config.set_api_key("default", "sk_live") - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _drive_turns) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _drive_turns) result = runner.invoke(app, ["stream", "--json"]) assert result.exit_code == 0 lines = [json.loads(x) for x in result.output.splitlines() if x.strip()] @@ -60,7 +60,7 @@ def fake_stream_audio( seen["source_type"] = type(source).__name__ seen["rate"] = params.sample_rate - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) import wave p = tmp_path / "a.wav" @@ -90,7 +90,7 @@ def __iter__(self): captured["on_open"]() # the SDK iterating us == the mic is now live return iter([b"\x00\x00"]) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) order = [] @@ -103,7 +103,7 @@ def fake_stream_audio( list(source) # consume the mic -> on_open fires -> "Listening…" prints order.append("consumed") - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream"]) assert result.exit_code == 0 assert "Listening" in result.output # shown once the mic opened @@ -118,7 +118,7 @@ def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination if on_begin: on_begin(types.SimpleNamespace(id="x")) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake) import wave p = tmp_path / "a.wav" @@ -141,7 +141,7 @@ def fake_stream_audio( ): raise AssertionError(f"streaming should not start after auto-login: {api_key}") - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "--json"]) assert result.exit_code == 4 assert config.get_api_key("default") == "sk_from_oauth" @@ -162,7 +162,7 @@ def test_stream_sample_uses_hosted_clip(monkeypatch): config.set_api_key("default", "sk_live") monkeypatch.setattr("aai_cli.streaming.sources.shutil.which", lambda _n: "/usr/bin/ffmpeg") seen = {} - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _capture_source(seen)) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _capture_source(seen)) result = runner.invoke(app, ["stream", "--sample"]) assert result.exit_code == 0 assert type(seen["source"]).__name__ == "FileSource" @@ -174,7 +174,7 @@ def test_stream_url_source_uses_filesource(monkeypatch): config.set_api_key("default", "sk_live") monkeypatch.setattr("aai_cli.streaming.sources.shutil.which", lambda _n: "/usr/bin/ffmpeg") seen = {} - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _capture_source(seen)) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _capture_source(seen)) result = runner.invoke(app, ["stream", "https://example.com/clip.mp3"]) assert result.exit_code == 0 assert type(seen["source"]).__name__ == "FileSource" @@ -187,7 +187,7 @@ def test_stream_ctrl_c_exits_cleanly(monkeypatch): def raise_kbd(*a, **k): raise KeyboardInterrupt - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", raise_kbd) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", raise_kbd) result = runner.invoke(app, ["stream"]) assert result.exit_code == 0 @@ -199,7 +199,7 @@ def test_stream_ctrl_c_human_mode_prints_stopped(monkeypatch): def raise_kbd(*a, **k): raise KeyboardInterrupt - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", raise_kbd) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", raise_kbd) result = runner.invoke(app, ["stream"]) assert result.exit_code == 0 assert "Stopped." in result.output @@ -211,7 +211,7 @@ def test_stream_broken_pipe_exits_zero(monkeypatch): def raise_broken_pipe(*a, **k): raise BrokenPipeError - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", raise_broken_pipe) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", raise_broken_pipe) result = runner.invoke(app, ["stream"]) assert result.exit_code == 0 @@ -232,7 +232,7 @@ def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination if on_termination: on_termination(types.SimpleNamespace(audio_duration_seconds=2.0)) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake) p = tmp_path / "a.wav" with wave.open(str(p), "wb") as w: w.setnchannels(1) @@ -254,7 +254,7 @@ def test_stream_prompt_biases_speech_model(monkeypatch): def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None): seen["prompt"] = params.prompt - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake) result = runner.invoke(app, ["stream", "--prompt", "expect crypto jargon", "--json"]) assert result.exit_code == 0 # --prompt is the speech-model prompt, forwarded to the streaming session. @@ -271,14 +271,14 @@ def test_stream_youtube_url_downloads_then_streams(monkeypatch, tmp_path): w.setsampwidth(2) w.setframerate(16000) w.writeframes(b"\x00\x01" * 100) - monkeypatch.setattr("aai_cli.commands.stream.youtube.download_audio", lambda url, d: fake) + monkeypatch.setattr("aai_cli.stream_exec.youtube.download_audio", lambda url, d: fake) seen = {} def fake_stream(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None): seen["source_type"] = type(source).__name__ seen["src"] = getattr(source, "source", None) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream) result = runner.invoke(app, ["stream", "https://youtu.be/abc"]) assert result.exit_code == 0 assert seen["source_type"] == "FileSource" # streamed the downloaded local file @@ -295,14 +295,14 @@ def test_stream_podcast_page_url_downloads_then_streams(monkeypatch, tmp_path): w.setsampwidth(2) w.setframerate(16000) w.writeframes(b"\x00\x01" * 100) - monkeypatch.setattr("aai_cli.commands.stream.youtube.download_audio", lambda url, d: fake) + monkeypatch.setattr("aai_cli.stream_exec.youtube.download_audio", lambda url, d: fake) seen = {} def fake_stream(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None): seen["source_type"] = type(source).__name__ seen["src"] = getattr(source, "source", None) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream) result = runner.invoke(app, ["stream", "https://www.spreaker.com/episode/12345"]) assert result.exit_code == 0 assert seen["source_type"] == "FileSource" # streamed the downloaded local file @@ -317,11 +317,11 @@ def test_stream_downloadable_url_resolves_credentials_before_downloading(monkeyp monkeypatch.setattr("aai_cli.context._interactive_session", lambda: False) downloads = [] monkeypatch.setattr( - "aai_cli.commands.stream.youtube.download_audio", + "aai_cli.stream_exec.youtube.download_audio", lambda url, dest: downloads.append(url), ) monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda *a, **k: pytest.fail("must not stream without credentials"), ) result = runner.invoke(app, ["stream", "https://youtu.be/abc"]) @@ -351,7 +351,7 @@ def fake_stream_audio( seen["rate"] = params.sample_rate b"".join(source) # drain the StdinSource - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "-", "--sample-rate", "1"], input=b"\x00\x00") assert result.exit_code == 0 assert seen["rate"] == 1 @@ -367,7 +367,7 @@ def fake_stream_audio( seen["rate"] = params.sample_rate seen["audio"] = b"".join(source) # consume the StdinSource - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "-"], input=b"\x01\x02" * 100) assert result.exit_code == 0 assert seen["rate"] == 16000 # default raw-PCM rate @@ -398,9 +398,9 @@ def fake_stream_audio( raise APIError("mic failed") time.sleep(0.2) - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "--system-audio", "--json"]) assert result.exit_code == 1 assert "mic failed" in result.output @@ -417,7 +417,7 @@ def fake_stream_audio( on_turn(types.SimpleNamespace(transcript="partial", end_of_turn=False)) on_turn(types.SimpleNamespace(transcript="hello world", end_of_turn=True)) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "-", "-o", "text"], input=b"\x00\x00") assert result.exit_code == 0 # Final turn only, plain text; partials and JSON envelopes are not on stdout. diff --git a/tests/test_stream_command_flags.py b/tests/test_stream_command_flags.py index 35527c3c..d8144589 100644 --- a/tests/test_stream_command_flags.py +++ b/tests/test_stream_command_flags.py @@ -20,7 +20,7 @@ def fake_stream_audio( ): captured["params"] = params - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) runner.invoke( app, @@ -43,7 +43,7 @@ def test_stream_config_escape_hatch(monkeypatch): config.set_api_key("default", "sk_live") captured = {} monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda api_key, source, *, params, **kw: captured.update(params=params), ) @@ -55,7 +55,7 @@ def test_stream_maps_webhook_auth_header(monkeypatch): config.set_api_key("default", "sk_live") captured = {} monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda api_key, source, *, params, **kw: captured.update(params=params), ) @@ -79,7 +79,7 @@ def test_stream_format_turns_tristate(monkeypatch): config.set_api_key("default", "sk_live") captured = {} monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda api_key, source, *, params, **kw: captured.update(params=params), ) @@ -134,7 +134,7 @@ def test_stream_file_source_with_sample_rejected(monkeypatch, tmp_path): def _boom(*a, **k): raise AssertionError("must not stream a conflicting source") - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _boom) wav = tmp_path / "a.wav" wav.write_bytes(b"RIFF") result = runner.invoke(app, ["stream", str(wav), "--sample"]) diff --git a/tests/test_stream_exec.py b/tests/test_stream_exec.py new file mode 100644 index 00000000..2e945522 --- /dev/null +++ b/tests/test_stream_exec.py @@ -0,0 +1,117 @@ +"""Direct tests of the `assembly stream` options/run seam (aai_cli.stream_exec). + +The command module only parses argv into a StreamOptions; everything after that is +run_stream, a plain function of data. These tests drive validation, flag mapping, +and session wiring by constructing options directly — no CliRunner argv round-trip, +no merged-stream output parsing. +""" + +from __future__ import annotations + +import dataclasses + +import pytest + +from aai_cli import config, llm, stream_exec +from aai_cli.commands.stream import DEFAULT_SPEECH_MODEL +from aai_cli.context import AppState +from aai_cli.errors import UsageError + +# The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace. +DEFAULTS = stream_exec.StreamOptions( + source=None, + sample=False, + sample_rate=None, + device=None, + system_audio=False, + system_audio_only=False, + speech_model=DEFAULT_SPEECH_MODEL, + encoding=None, + language_detection=None, + domain=None, + prompt=None, + keyterms_prompt=None, + end_of_turn_confidence_threshold=None, + min_turn_silence=None, + max_turn_silence=None, + vad_threshold=None, + format_turns=None, + include_partial_turns=None, + speaker_labels=None, + max_speakers=None, + voice_focus=None, + voice_focus_threshold=None, + inactivity_timeout=None, + filter_profanity=None, + redact_pii=None, + redact_pii_policy=None, + redact_pii_sub=None, + webhook_url=None, + webhook_auth_header=None, + llm_prompt=None, + llm_interval=10.0, + model=llm.DEFAULT_MODEL, + max_tokens=llm.DEFAULT_MAX_TOKENS, + config_kv=None, + config_file=None, + output_field=None, + show_code=False, +) + + +class FakeMic: + """Mirrors MicrophoneSource's keyword signature (see microphone.py).""" + + def __init__(self, *, target_rate=None, device=None, capture_rate=None, on_open=None): + self.sample_rate = capture_rate or 16000 + self.device = device + + def __iter__(self): + return iter([b"\x00\x00"]) + + +def test_run_stream_maps_flags_to_params_without_cli(monkeypatch): + # The seam's payoff: assert the flag->StreamingParameters mapping by constructing + # options directly, instead of threading a giant argv through CliRunner. + config.set_api_key("default", "sk_live") + seen = {} + + def fake_stream_audio(api_key, source, *, params, **_kwargs): + seen["api_key"] = api_key + seen["params"] = params + + monkeypatch.setattr(stream_exec.client, "stream_audio", fake_stream_audio) + monkeypatch.setattr(stream_exec, "MicrophoneSource", FakeMic) + + stream_exec.run_stream( + dataclasses.replace( + DEFAULTS, + domain="medical-v1", + prompt="expect drug names", + keyterms_prompt=["AssemblyAI"], + ), + AppState(), + json_mode=True, + ) + assert seen["api_key"] == "sk_live" + params = seen["params"] + assert params.domain == "medical-v1" + assert params.prompt == "expect drug names" + assert params.keyterms_prompt == ["AssemblyAI"] + + +def test_run_stream_validates_before_resolving_credentials(): + # No API key is configured: a flag conflict must surface as a usage error, not + # as NotAuthenticated — validation runs before any credential resolution. + with pytest.raises(UsageError): + stream_exec.run_stream( + dataclasses.replace(DEFAULTS, system_audio=True, system_audio_only=True), + AppState(), + json_mode=False, + ) + + +def test_stream_options_are_immutable(): + field_name = "sample" + with pytest.raises(dataclasses.FrozenInstanceError): + setattr(DEFAULTS, field_name, True) diff --git a/tests/test_stream_llm.py b/tests/test_stream_llm.py index 9806b143..503d23a4 100644 --- a/tests/test_stream_llm.py +++ b/tests/test_stream_llm.py @@ -27,8 +27,8 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens): seen["max_tokens"] = max_tokens return f"answer:{transcript_text}" - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake) - monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake) + monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain) result = runner.invoke( app, [ @@ -67,8 +67,8 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens): seen["prompts"] = prompts return "done" - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake) - monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake) + monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain) result = runner.invoke( app, ["stream", "--llm", "summarize", "--llm", "translate to french", "--json"] ) @@ -79,7 +79,7 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens): def test_stream_llm_rejects_output_text(monkeypatch): config.set_api_key("default", "sk_live") monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda *a, **k: (_ for _ in ()).throw(AssertionError("must not stream")), ) result = runner.invoke(app, ["stream", "--llm", "summarize", "-o", "text"]) @@ -100,8 +100,8 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens): called["ran"] = True return "x" - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake) - monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake) + monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain) result = runner.invoke(app, ["stream", "--json"]) assert result.exit_code == 0 assert called["ran"] is False # no --llm -> no gateway call @@ -111,7 +111,7 @@ def test_stream_show_code_with_llm_emits_follow_loop(monkeypatch): def _boom(*a, **k): raise AssertionError("must not stream") - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _boom) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _boom) result = runner.invoke(app, ["stream", "--llm", "summarize", "--show-code"]) assert result.exit_code == 0 assert "from openai import OpenAI" in result.output @@ -127,9 +127,9 @@ def _eot_turn(text): def _llm_session(*, interval, clock, monkeypatch, emitted): import io - from aai_cli.commands.stream import StreamSession from aai_cli.follow import FollowRenderer from aai_cli.streaming.render import StreamRenderer + from aai_cli.streaming.session import StreamSession # Capture each follow refresh (json mode emits one NDJSON object per refresh) and # make run_chain echo the transcript it summarized so assertions read the cadence. @@ -218,8 +218,8 @@ def test_maybe_summarize_is_noop_without_follow(): # is never run (no gateway call) regardless of transcript content. import io - from aai_cli.commands.stream import StreamSession from aai_cli.streaming.render import StreamRenderer + from aai_cli.streaming.session import StreamSession session = StreamSession( api_key="sk", diff --git a/tests/test_stream_session.py b/tests/test_stream_session.py index 2bb4d830..73226d84 100644 --- a/tests/test_stream_session.py +++ b/tests/test_stream_session.py @@ -24,8 +24,8 @@ def test_stream_session_listening_notice_latches(monkeypatch): # callback fires repeatedly (pins the `self._listening_started = True` latch). import io - from aai_cli.commands.stream import StreamSession from aai_cli.streaming.render import StreamRenderer + from aai_cli.streaming.session import StreamSession renderer = StreamRenderer(json_mode=False, out=io.StringIO()) calls = {"n": 0} @@ -53,9 +53,9 @@ def test_stream_session_closes_renderer_on_error(monkeypatch): import pytest - from aai_cli.commands.stream import StreamSession from aai_cli.errors import CLIError from aai_cli.streaming.render import StreamRenderer + from aai_cli.streaming.session import StreamSession renderer = StreamRenderer(json_mode=False, out=io.StringIO()) closed = {"n": 0} @@ -64,7 +64,7 @@ def test_stream_session_closes_renderer_on_error(monkeypatch): def boom(*_args, **_kwargs): raise CLIError("stream blew up") - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", boom) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", boom) session = StreamSession( api_key="sk", base_flags={}, @@ -122,9 +122,9 @@ def fake_stream_audio( if on_turn: on_turn(types.SimpleNamespace(transcript=source_type, end_of_turn=True)) - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "--system-audio", "--json"]) assert result.exit_code == 0 assert set(source_types) == {"FakeSystemAudio", "FakeMic"} @@ -154,9 +154,9 @@ def __iter__(self): def fail_mic(**_kwargs): raise AssertionError("system-audio-only must not open the microphone") - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", fail_mic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", _capture_source(seen)) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", fail_mic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", _capture_source(seen)) result = runner.invoke(app, ["stream", "--system-audio-only", "--json"]) assert result.exit_code == 0 assert type(seen["source"]).__name__ == "FakeSystemAudio" @@ -195,9 +195,9 @@ def fake_stream_audio( ): list(source) - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke( app, ["stream", "--system-audio", "--device", "2", "--sample-rate", "44100", "--json"], @@ -235,10 +235,10 @@ def fake_run_chain(api_key, prompts, *, transcript_text, model, max_tokens): transcript_inputs.append(transcript_text) return "summary" - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) - monkeypatch.setattr("aai_cli.commands.stream.llm.run_chain", fake_run_chain) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.llm.run_chain", fake_run_chain) result = runner.invoke(app, ["stream", "--system-audio", "--llm", "summarize", "--json"]) assert result.exit_code == 0 assert any("System: FakeSystemAudio" in value for value in transcript_inputs) @@ -271,9 +271,9 @@ def fake_stream_audio( chunk = next(iter(source)) speaker_labels_by_chunk[chunk] = params.speaker_labels - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) result = runner.invoke(app, ["stream", "--system-audio", "--speaker-labels", "--json"]) assert result.exit_code == 0 assert speaker_labels_by_chunk[b"system"] is True @@ -319,9 +319,9 @@ def fake_stream_audio( ): raise APIError(f"{type(source).__name__} failed") - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", ImmediateThread) result = runner.invoke(app, ["stream", "--system-audio", "--json"]) assert result.exit_code == 1 @@ -367,9 +367,9 @@ def join(self, timeout=None): def fake_stream_audio(api_key, source, *, params, **_kwargs): raise RuntimeError("event parsing blew up") - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) - monkeypatch.setattr("aai_cli.commands.stream.client.stream_audio", fake_stream_audio) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.client.stream_audio", fake_stream_audio) monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", ImmediateThread) result = runner.invoke(app, ["stream", "--system-audio", "--json"]) assert result.exit_code == 1 @@ -398,8 +398,8 @@ def __init__(self, *, target, args, daemon): def start(self): raise KeyboardInterrupt - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", InterruptingThread) result = runner.invoke(app, ["stream", "--system-audio"]) assert result.exit_code == 0 @@ -424,8 +424,8 @@ def __init__(self, *, target, args, daemon): def start(self): raise BrokenPipeError - monkeypatch.setattr("aai_cli.commands.stream.MacSystemAudioSource", FakeSystemAudio) - monkeypatch.setattr("aai_cli.commands.stream.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.stream_exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.stream_exec.MicrophoneSource", FakeMic) monkeypatch.setattr("aai_cli.streaming.session.threading.Thread", BrokenPipeThread) result = runner.invoke(app, ["stream", "--system-audio"]) assert result.exit_code == 0 diff --git a/tests/test_stream_show_code.py b/tests/test_stream_show_code.py index c2f2f675..79e1589d 100644 --- a/tests/test_stream_show_code.py +++ b/tests/test_stream_show_code.py @@ -15,7 +15,7 @@ def test_stream_show_code_prints_without_streaming(monkeypatch): # Print-only: emits the mic-streaming script, never opens audio or streams, no auth. called = [] monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", lambda *a, **k: called.append(True), ) result = runner.invoke(app, ["stream", "--show-code"]) @@ -101,7 +101,7 @@ def _boom(*a, **k): raise AssertionError("must not stream") monkeypatch.setattr( - "aai_cli.commands.stream.client.stream_audio", + "aai_cli.stream_exec.client.stream_audio", _boom, ) result = runner.invoke(app, ["stream", "--show-code", "--json"]) diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index fb7bea2a..a847c0ad 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -57,7 +57,7 @@ def _fake_transcript(mocker): def test_transcribe_sample_prints_text(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -71,7 +71,7 @@ def test_transcribe_sample_prints_text(mocker): def test_transcribe_json_output(mocker): _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -83,7 +83,7 @@ def test_transcribe_unauthenticated_runs_login_then_transcribes(monkeypatch, moc monkeypatch.setattr("aai_cli.context._interactive_session", lambda: True) monkeypatch.setattr("aai_cli.context.run_login_flow", _login_result) tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -97,7 +97,7 @@ def test_transcribe_unauthenticated_runs_login_then_transcribes(monkeypatch, moc def test_transcribe_output_text_field(mocker): _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -109,7 +109,7 @@ def test_transcribe_output_text_field(mocker): def test_transcribe_output_id_field(mocker): _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -122,7 +122,7 @@ def test_transcribe_output_srt_field(mocker): _auth() t = _fake_transcript(mocker) t.export_subtitles_srt.return_value = "1\n00:00:00,000 --> 00:00:02,000\nhello world\n" - mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True, return_value=t) + mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t) result = runner.invoke(app, ["transcribe", "audio.mp3", "-o", "srt"]) assert result.exit_code == 0 assert "00:00:00,000 --> 00:00:02,000" in result.output # SRT body, pipe-friendly @@ -132,7 +132,7 @@ def test_transcribe_output_srt_field(mocker): def test_transcribe_output_invalid_exits_2(mocker): _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -151,7 +151,7 @@ def fake_transcribe(api_key, audio, *, config): seen["bytes"] = pathlib.Path(audio).read_bytes() return _fake_transcript(mocker) - monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", fake_transcribe) + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", fake_transcribe) result = runner.invoke(app, ["transcribe", "-", "-o", "text"], input=b"RIFFfake-wav-bytes") assert result.exit_code == 0 assert result.output.strip() == "hello world" @@ -165,7 +165,7 @@ def test_transcribe_status_renders_enum_value(mocker): t = _fake_transcript(mocker) t.status = aai.TranscriptStatus.completed t.json_response = None - mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True, return_value=t) + mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t) result = runner.invoke(app, ["transcribe", "audio.mp3", "--json"]) assert result.exit_code == 0 assert '"status": "completed"' in result.output @@ -182,7 +182,7 @@ def fake_transform(api_key, *, prompt, model, transcript_id, max_tokens, transcr return "a short summary" mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -211,7 +211,7 @@ def fake_transform( return f"out({prompt})" mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -243,7 +243,7 @@ def test_transcribe_prompt_human_shows_only_transform(monkeypatch, mocker): _auth() monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False) mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -263,7 +263,7 @@ def test_transcribe_chained_prompts_human_labels_each_step(monkeypatch, mocker): _auth() monkeypatch.setattr("aai_cli.output.resolve_json", lambda *, explicit: False) mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -290,7 +290,7 @@ def test_transcribe_youtube_url_downloads_then_transcribes(monkeypatch, mocker, lambda url, d, *, download_sections=None: fake, ) tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -312,7 +312,7 @@ def _capture(url, d, *, download_sections=None): monkeypatch.setattr("aai_cli.transcribe_exec.youtube.download_audio", _capture) mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -335,7 +335,7 @@ def test_transcribe_podcast_page_url_downloads_then_transcribes(monkeypatch, moc lambda url, d, *, download_sections=None: fake, ) tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -354,7 +354,7 @@ def _no_download(url, d, *, download_sections=None): monkeypatch.setattr("aai_cli.transcribe_exec.youtube.download_audio", _no_download) tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -369,7 +369,7 @@ def test_transcribe_renders_summary_human(monkeypatch, mocker): t = _fake_transcript(mocker) t.summary = "three bullet summary" t.chapters = [] - mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True, return_value=t) + mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t) result = runner.invoke(app, ["transcribe", "audio.mp3", "--summarization"]) assert result.exit_code == 0 assert "Summary:" in result.output diff --git a/tests/test_transcribe_batch.py b/tests/test_transcribe_batch.py index 69705071..3cc2e367 100644 --- a/tests/test_transcribe_batch.py +++ b/tests/test_transcribe_batch.py @@ -17,7 +17,7 @@ runner = CliRunner() -_TRANSCRIBE = "aai_cli.commands.transcribe.client.transcribe" +_TRANSCRIBE = "aai_cli.transcribe_exec.client.transcribe" @pytest.fixture(autouse=True) diff --git a/tests/test_transcribe_batch_sources.py b/tests/test_transcribe_batch_sources.py index e6836d21..5553e993 100644 --- a/tests/test_transcribe_batch_sources.py +++ b/tests/test_transcribe_batch_sources.py @@ -16,7 +16,7 @@ runner = CliRunner() -_TRANSCRIBE = "aai_cli.commands.transcribe.client.transcribe" +_TRANSCRIBE = "aai_cli.transcribe_exec.client.transcribe" @pytest.fixture(autouse=True) diff --git a/tests/test_transcribe_flags.py b/tests/test_transcribe_flags.py index 5087ea02..4fec6677 100644 --- a/tests/test_transcribe_flags.py +++ b/tests/test_transcribe_flags.py @@ -52,7 +52,7 @@ def _enum_or_str(value): def test_transcribe_passes_speaker_labels(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -63,7 +63,7 @@ def test_transcribe_passes_speaker_labels(mocker): def test_transcribe_prompt_biases_speech_model(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -76,7 +76,7 @@ def test_transcribe_prompt_biases_speech_model(mocker): def test_transcribe_maps_analysis_flags(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -102,7 +102,7 @@ def test_transcribe_maps_analysis_flags(mocker): def test_transcribe_redact_pii_policy_csv(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -127,7 +127,7 @@ def test_transcribe_redact_pii_policy_csv(mocker): def test_transcribe_config_escape_hatch(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -138,7 +138,7 @@ def test_transcribe_config_escape_hatch(mocker): def test_transcribe_unknown_config_field_exits_2(mocker): _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -150,7 +150,7 @@ def test_transcribe_unknown_config_field_exits_2(mocker): def test_transcribe_webhook_auth_header(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -173,7 +173,7 @@ def test_transcribe_webhook_auth_header(mocker): def test_transcribe_negative_audio_start_exits_2(mocker): _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-start", "-100"]) assert result.exit_code == 2 tx.assert_not_called() @@ -181,7 +181,7 @@ def test_transcribe_negative_audio_start_exits_2(mocker): def test_transcribe_language_code_with_detection_exits_2(mocker): _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke( app, ["transcribe", "audio.mp3", "--language-code", "en_us", "--language-detection"], @@ -196,7 +196,7 @@ def test_transcribe_language_flags_alone_are_accepted(mocker): # Only the combination is contradictory; each flag works on its own. _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -210,7 +210,7 @@ def test_transcribe_language_flags_alone_are_accepted(mocker): def test_transcribe_speakers_expected_without_labels_exits_2(mocker): _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", "audio.mp3", "--speakers-expected", "2"]) assert result.exit_code == 2 assert "--speakers-expected only applies when diarization is enabled." in result.output @@ -221,7 +221,7 @@ def test_transcribe_speakers_expected_without_labels_exits_2(mocker): def test_transcribe_speakers_expected_with_labels_is_accepted(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -237,7 +237,7 @@ def test_transcribe_speakers_expected_with_config_speaker_labels_is_accepted(moc # runs on the merged config, not just the curated flag. _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -254,7 +254,7 @@ def test_transcribe_temperature_out_of_range_exits_2(mocker, value): # The API documents temperature as 0 (most deterministic) to 1 (least); reject # out-of-range values client-side instead of letting them flow to the request. _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", "audio.mp3", "--temperature", value]) assert result.exit_code == 2 tx.assert_not_called() @@ -264,7 +264,7 @@ def test_transcribe_temperature_out_of_range_exits_2(mocker, value): def test_transcribe_temperature_bounds_are_inclusive(mocker, value): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -275,7 +275,7 @@ def test_transcribe_temperature_bounds_are_inclusive(mocker, value): def test_transcribe_negative_audio_end_exits_2(mocker): _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", "audio.mp3", "--audio-end", "-100"]) assert result.exit_code == 2 tx.assert_not_called() @@ -284,7 +284,7 @@ def test_transcribe_negative_audio_end_exits_2(mocker): def test_transcribe_audio_end_zero_is_accepted(mocker): _auth() tx = mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -297,7 +297,7 @@ def test_transcribe_json_with_non_json_output_field_exits_2(mocker): # --json means "the full JSON payload" (same as -o json); -o text contradicts it # and must not silently win. _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke(app, ["transcribe", "audio.mp3", "-o", "text", "--json"]) assert result.exit_code == 2 assert "--json and -o text can't be combined." in result.output @@ -310,7 +310,7 @@ def test_transcribe_json_with_o_json_is_accepted(mocker): _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -323,7 +323,7 @@ def test_transcribe_warns_on_non_audio_extension(mocker, tmp_path): _auth() (tmp_path / "notes.txt").write_bytes(b"fake") mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -337,7 +337,7 @@ def test_transcribe_non_audio_warning_suppressed_by_quiet(mocker, tmp_path): _auth() (tmp_path / "notes.txt").write_bytes(b"fake") mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -352,7 +352,7 @@ def test_transcribe_non_audio_warning_is_structured_under_json(mocker, tmp_path) _auth() (tmp_path / "notes.txt").write_bytes(b"fake") mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -370,7 +370,7 @@ def test_transcribe_no_warning_for_audio_or_extensionless_files(mocker, tmp_path _auth() (tmp_path / name).write_bytes(b"fake") mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -384,7 +384,7 @@ def test_transcribe_no_warning_for_urls_or_sample(mocker, argv): # Remote sources aren't local files; the extension heuristic doesn't apply. _auth() mocker.patch( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=_fake_transcript(mocker), ) @@ -395,7 +395,7 @@ def test_transcribe_no_warning_for_urls_or_sample(mocker, argv): def test_transcribe_unknown_pii_policy_exits_2_and_lists_valid(mocker): _auth() - tx = mocker.patch("aai_cli.commands.transcribe.client.transcribe", autospec=True) + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) result = runner.invoke( app, ["transcribe", "audio.mp3", "--redact-pii", "--redact-pii-policy", "not_a_policy"], diff --git a/tests/test_transcribe_out.py b/tests/test_transcribe_out.py index 25591081..69abb69a 100644 --- a/tests/test_transcribe_out.py +++ b/tests/test_transcribe_out.py @@ -18,7 +18,7 @@ def audio_file(tmp_path, monkeypatch): (tmp_path / "audio.mp3").write_bytes(b"fake-audio") -_TRANSCRIBE = "aai_cli.commands.transcribe.client.transcribe" +_TRANSCRIBE = "aai_cli.transcribe_exec.client.transcribe" def _auth(): diff --git a/tests/test_transcribe_show_code.py b/tests/test_transcribe_show_code.py index d28e1aaf..b5fe3032 100644 --- a/tests/test_transcribe_show_code.py +++ b/tests/test_transcribe_show_code.py @@ -15,7 +15,7 @@ def test_transcribe_show_code_prints_without_transcribing(monkeypatch): # Print-only: emits code, never calls the API, needs no auth. called = [] monkeypatch.setattr( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", lambda *a, **k: called.append(True), ) result = runner.invoke(app, ["transcribe", "--sample", "--speaker-labels", "--show-code"]) @@ -34,7 +34,7 @@ def test_transcribe_show_code_includes_download_sections(monkeypatch): def _boom(*a, **k): raise AssertionError("must not transcribe") - monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom) + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom) result = runner.invoke( app, ["transcribe", "https://youtu.be/abc", "--download-sections", "*0:00-5:00", "--show-code"], @@ -50,7 +50,7 @@ def test_transcribe_show_code_without_source_uses_placeholder(monkeypatch): def _boom(*a, **k): raise AssertionError("must not transcribe") - monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom) + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom) result = runner.invoke(app, ["transcribe", "--show-code"]) assert result.exit_code == 0 assert "import assemblyai as aai" in result.output @@ -63,7 +63,7 @@ def _boom(*a, **k): raise AssertionError("must not transcribe") monkeypatch.setattr( - "aai_cli.commands.transcribe.client.transcribe", + "aai_cli.transcribe_exec.client.transcribe", _boom, ) result = runner.invoke(app, ["transcribe", "--sample", "--show-code", "--json"]) @@ -77,7 +77,7 @@ def test_transcribe_show_code_includes_llm_gateway_without_running(monkeypatch): def _boom(*a, **k): raise AssertionError("must not call the API") - monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom) + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom) monkeypatch.setattr("aai_cli.commands.transcribe.llm.transform_transcript", _boom) result = runner.invoke( app, @@ -94,7 +94,7 @@ def test_transcribe_show_code_output_srt_generates_export(monkeypatch): def _boom(*a, **k): raise AssertionError("must not transcribe") - monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom) + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom) result = runner.invoke(app, ["transcribe", "--sample", "-o", "srt", "--show-code"]) assert result.exit_code == 0 compile(result.output, "", "exec") # the emitted script is runnable @@ -106,7 +106,7 @@ def test_transcribe_show_code_output_utterances_generates_loop(monkeypatch): def _boom(*a, **k): raise AssertionError("must not transcribe") - monkeypatch.setattr("aai_cli.commands.transcribe.client.transcribe", _boom) + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom) result = runner.invoke(app, ["transcribe", "--sample", "-o", "utterances", "--show-code"]) assert result.exit_code == 0 compile(result.output, "", "exec")