diff --git a/aai_cli/choices.py b/aai_cli/choices.py index 52915949..55e58286 100644 --- a/aai_cli/choices.py +++ b/aai_cli/choices.py @@ -17,6 +17,7 @@ class TranscriptOutput(enum.StrEnum): status = "status" utterances = "utterances" srt = "srt" + vtt = "vtt" json = "json" diff --git a/aai_cli/client.py b/aai_cli/client.py index fa45b984..bea44769 100644 --- a/aai_cli/client.py +++ b/aai_cli/client.py @@ -3,6 +3,7 @@ import contextlib import json import re +from abc import abstractmethod from collections.abc import Callable, Generator, Iterable from pathlib import Path from typing import Any, Literal, Protocol @@ -222,24 +223,61 @@ def _render_utterances(transcript: Any) -> str: ) -def _export_srt(transcript: Any) -> str: +class _SubtitleTranscript(Protocol): + """The slice of ``aai.Transcript`` the subtitle renderers touch.""" + + @abstractmethod + def export_subtitles_srt(self, chars_per_caption: int | None) -> str: + """Fetch the transcript's SRT captions.""" + + @abstractmethod + def export_subtitles_vtt(self, chars_per_caption: int | None) -> str: + """Fetch the transcript's VTT captions.""" + + +def _export_srt(transcript: _SubtitleTranscript, chars_per_caption: int | None) -> str: # The SDK fetches SRT from the `/srt` export endpoint, so this hits the network. with _sdk_errors("Could not export SRT subtitles"): - return str(transcript.export_subtitles_srt()) + return str(transcript.export_subtitles_srt(chars_per_caption=chars_per_caption)) + + +def _export_vtt(transcript: _SubtitleTranscript, chars_per_caption: int | None) -> str: + # The SDK fetches VTT from the `/vtt` export endpoint, so this hits the network. + with _sdk_errors("Could not export VTT subtitles"): + return str(transcript.export_subtitles_vtt(chars_per_caption=chars_per_caption)) + +# Subtitle fields hit an export endpoint and take the --chars-per-caption knob. +_SUBTITLE_RENDERERS: dict[str, Callable[[_SubtitleTranscript, int | None], str]] = { + "srt": _export_srt, + "vtt": _export_vtt, +} # Output field -> renderer. Fields absent here fall back to the plain transcript text. _FIELD_RENDERERS: dict[str, Callable[[Any], str]] = { "id": lambda t: str(getattr(t, "id", "") or ""), "status": status_str, "utterances": _render_utterances, - "srt": _export_srt, "json": lambda t: json.dumps(transcript_json_payload(t), default=str), } -def select_transcript_field(transcript: Any, field: str) -> str: +def validate_chars_per_caption(chars_per_caption: int | None, field: str | None) -> None: + """``--chars-per-caption`` only shapes subtitle exports; any other ``-o`` contradicts it.""" + if chars_per_caption is not None and field not in _SUBTITLE_RENDERERS: + raise UsageError( + "--chars-per-caption only applies to subtitle output.", + suggestion="Add -o srt or -o vtt.", + ) + + +def select_transcript_field( + transcript: Any, field: str, *, chars_per_caption: int | None = None +) -> str: """Render a single transcript field for ``-o/--output``.""" + subtitles = _SUBTITLE_RENDERERS.get(field) + if subtitles is not None: + return subtitles(transcript, chars_per_caption) return _FIELD_RENDERERS.get(field, _transcript_text)(transcript) diff --git a/aai_cli/code_gen/transcribe.py b/aai_cli/code_gen/transcribe.py index 403a53e8..8e49206b 100644 --- a/aai_cli/code_gen/transcribe.py +++ b/aai_cli/code_gen/transcribe.py @@ -7,7 +7,7 @@ # ``-o/--output`` choice -> printed-result code, mirroring the run path's # ``client._FIELD_RENDERERS`` semantics: plain fields, the speaker-labeled -# utterances loop, the SRT export endpoint, and the raw ``json_response`` payload. +# utterances loop, the SRT/VTT export endpoints, and the raw ``json_response`` payload. _OUTPUT_SNIPPETS: dict[str, str] = { "text": "print(transcript.text)", "id": "print(transcript.id)", @@ -16,9 +16,13 @@ 'for utt in transcript.utterances or []:\n print(f"Speaker {utt.speaker}: {utt.text}")' ), "srt": "print(transcript.export_subtitles_srt())", + "vtt": "print(transcript.export_subtitles_vtt())", "json": "print(json.dumps(transcript.json_response, default=str))", } +# The subtitle exports take the --chars-per-caption knob as a kwarg. +_SUBTITLE_FORMATS = ("srt", "vtt") + def render( merged: dict[str, object], @@ -26,6 +30,7 @@ def render( *, llm_gateway: dict[str, object] | None = None, output: str | None = None, + chars_per_caption: int | None = None, download_sections: list[str] | None = None, ) -> str: """Generate a runnable transcribe script reproducing this CLI invocation. @@ -37,7 +42,7 @@ def render( When `output` (a ``-o/--output`` field name) is given, the script prints that one field instead — and, as in the real command, it takes precedence over the LLM chain - and the analysis sections. + and the analysis sections. `chars_per_caption` shapes the srt/vtt export calls. When `download_sections` (yt-dlp ``--download-sections`` specs) is given for a downloadable URL, the generated yt-dlp call fetches only those parts of the source. @@ -57,7 +62,7 @@ def render( has_sections=ranges_expr is not None, ) + _transcribe_block(merged, source, needs_download=needs_download, ranges_expr=ranges_expr) - + _result_block(merged, llm_gateway, output) + + _result_block(merged, llm_gateway, output, chars_per_caption) ) parts.append("") return "\n".join(parts) @@ -183,10 +188,18 @@ def _transcribe_block( def _result_block( - merged: dict[str, object], llm_gateway: dict[str, object] | None, output: str | None + merged: dict[str, object], + llm_gateway: dict[str, object] | None, + output: str | None, + chars_per_caption: int | None, ) -> list[str]: """The printed-result lines: one ``-o`` field, the LLM chain, or the analysis sections.""" if output is not None: + if output in _SUBTITLE_FORMATS and chars_per_caption is not None: + return [ + f"print(transcript.export_subtitles_{output}" + f"(chars_per_caption={chars_per_caption}))" + ] # Unknown names fall back to the plain text, like select_transcript_field does. return [_OUTPUT_SNIPPETS.get(output, _OUTPUT_SNIPPETS["text"])] if llm_gateway: diff --git a/aai_cli/commands/transcribe.py b/aai_cli/commands/transcribe.py index 5e6873f0..851e2d1f 100644 --- a/aai_cli/commands/transcribe.py +++ b/aai_cli/commands/transcribe.py @@ -320,8 +320,9 @@ def transcribe( None, "-o", "--output", - help="Print one field: text, id, status, utterances, srt (captions), or json.", + help="Print one field: text, id, status, utterances, srt or vtt (captions), or json.", ), + chars_per_caption: int | None = options.chars_per_caption_option(), out: Path | None = typer.Option( None, "--out", @@ -397,6 +398,7 @@ def transcribe( model=model, max_tokens=max_tokens, output_field=output_field, + chars_per_caption=chars_per_caption, out=out, show_code=show_code, ) diff --git a/aai_cli/commands/transcripts.py b/aai_cli/commands/transcripts.py index ba8b36f7..19b2da38 100644 --- a/aai_cli/commands/transcripts.py +++ b/aai_cli/commands/transcripts.py @@ -62,6 +62,7 @@ def render(data: list[dict[str, object]]) -> object: ("Fetch a transcript's text by id", "assembly transcripts get 5551234-abcd"), ("Speaker-labeled turns", "assembly transcripts get 5551234-abcd -o utterances"), ("Save SRT subtitles", "assembly transcripts get 5551234-abcd -o srt > captions.srt"), + ("Save VTT subtitles", "assembly transcripts get 5551234-abcd -o vtt > captions.vtt"), ("Get the raw JSON", "assembly transcripts get 5551234-abcd --json"), ] ) @@ -75,14 +76,16 @@ def get( "--output", help="Print one field of the result.", ), + chars_per_caption: int | None = options.chars_per_caption_option(), json_out: bool = options.json_option(), ) -> None: """Fetch a past transcript by id and print its text.""" def body(state: AppState, json_mode: bool) -> None: - # Cheap local id validation first: a malformed id is a usage error whether - # or not the user is signed in, so it must not trigger auth/login first. + # Cheap local validation first: a malformed id or flag conflict is a usage + # error whether or not the user is signed in, so it must not trigger auth. client.validate_transcript_id(transcript_id) + client.validate_chars_per_caption(chars_per_caption, output_field) api_key = state.resolve_api_key() transcript = client.get_transcript(api_key, transcript_id) if client.status_str(transcript) == "error": @@ -92,7 +95,11 @@ def body(state: AppState, json_mode: bool) -> None: ) if output_field is not None: # Raw single-field output for pipelines (overrides --json), matching `transcribe`. - output.emit_text(client.select_transcript_field(transcript, output_field)) + output.emit_text( + client.select_transcript_field( + transcript, output_field, chars_per_caption=chars_per_caption + ) + ) return if json_mode: # The full SDK payload, identical to `assembly transcribe … --json`, so the diff --git a/aai_cli/options.py b/aai_cli/options.py index e1c16bfc..064467c4 100644 --- a/aai_cli/options.py +++ b/aai_cli/options.py @@ -19,6 +19,17 @@ def json_option(help_text: str = "Output raw JSON.") -> bool: return flag +def chars_per_caption_option() -> int | None: + """The ``--chars-per-caption`` knob for the ``-o srt``/``-o vtt`` subtitle exports.""" + value: int | None = typer.Option( + None, + "--chars-per-caption", + min=1, + help="Max characters per caption line (only with -o srt or -o vtt).", + ) + return value + + # Batch-mode flags for `transcribe` (see transcribe_batch.py). Defined here because # this module owns the FBT003 carve-out for Typer's boolean positional defaults. diff --git a/aai_cli/skills/aai-cli/references/history.md b/aai_cli/skills/aai-cli/references/history.md index 3ca84a71..6f5e6afa 100644 --- a/aai_cli/skills/aai-cli/references/history.md +++ b/aai_cli/skills/aai-cli/references/history.md @@ -32,8 +32,9 @@ Fetch a past transcript by id and print its text. Key options: -- `-o/--output text|id|status|utterances|srt|json` — print one field; omit for - the default human view. +- `-o/--output text|id|status|utterances|srt|vtt|json` — print one field; omit + for the default human view. `--chars-per-caption N` caps caption line length + for the srt/vtt exports. - `--json` — full raw JSON. Examples: diff --git a/aai_cli/skills/aai-cli/references/transcription.md b/aai_cli/skills/aai-cli/references/transcription.md index 12ef2994..3038b7e4 100644 --- a/aai_cli/skills/aai-cli/references/transcription.md +++ b/aai_cli/skills/aai-cli/references/transcription.md @@ -25,7 +25,8 @@ High-value flags (run `assembly transcribe --help` for the full set): `--config-file config.json`. - Post-process: `--llm "PROMPT"` (repeatable; chains over the transcript via LLM Gateway), `--translate-to es` (repeatable). -- Output: `-o text|id|status|utterances|srt|json`, `--json`, `--show-code`. +- Output: `-o text|id|status|utterances|srt|vtt|json`, `--chars-per-caption N` + (caption line length, with `-o srt`/`-o vtt`), `--json`, `--show-code`. Examples: diff --git a/aai_cli/transcribe_exec.py b/aai_cli/transcribe_exec.py index 712d1cc3..729c25f2 100644 --- a/aai_cli/transcribe_exec.py +++ b/aai_cli/transcribe_exec.py @@ -135,12 +135,15 @@ def out_payload( transcript: aai.Transcript, output_field: choices.TranscriptOutput | None, *, + chars_per_caption: int | None, json_mode: bool, ) -> str: """The text to write for ``--out``: the chosen ``-o`` field, the ``--json`` payload, or the plain transcript text — the same content stdout would get, as a file artifact.""" if output_field is not None: - return client.select_transcript_field(transcript, output_field) + return client.select_transcript_field( + transcript, output_field, chars_per_caption=chars_per_caption + ) if json_mode: return json.dumps(client.transcript_json_payload(transcript), default=str) return client.select_transcript_field(transcript, choices.TranscriptOutput.text) @@ -197,6 +200,7 @@ def deliver_result( api_key: str, out: Path | None, output_field: choices.TranscriptOutput | None, + chars_per_caption: int | None, transform: TransformOptions, json_mode: bool, quiet: bool, @@ -206,14 +210,23 @@ def deliver_result( if out is not None: # Write a clean file artifact and confirm on stderr; stdout stays empty. # The path itself was validated up front by validate_out_path. - out.write_text(out_payload(transcript, output_field, json_mode=json_mode) + "\n") + out.write_text( + out_payload( + transcript, output_field, chars_per_caption=chars_per_caption, json_mode=json_mode + ) + + "\n" + ) if not quiet: output.error_console.print(output.success(f"Saved to {escape(str(out))}")) return if output_field is not None: # Raw single-field output for pipelines (overrides --json and analysis render). - output.emit_text(client.select_transcript_field(transcript, output_field)) + output.emit_text( + client.select_transcript_field( + transcript, output_field, chars_per_caption=chars_per_caption + ) + ) return if transform.prompts: @@ -295,6 +308,7 @@ class TranscribeOptions: model: str max_tokens: int output_field: choices.TranscriptOutput | None + chars_per_caption: int | None out: Path | None show_code: bool @@ -366,6 +380,7 @@ def _print_show_code(opts: TranscribeOptions, merged: dict[str, object]) -> None audio, llm_gateway=gateway, output=opts.output_field, + chars_per_caption=opts.chars_per_caption, download_sections=list(opts.download_sections or []), ) ) @@ -384,6 +399,7 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) validate_out_with_llm(opts.out, opts.llm_prompt) validate_out_path(opts.out) validate_json_with_output(opts.output_field, json_mode=json_mode) + client.validate_chars_per_caption(opts.chars_per_caption, opts.output_field) merged = config_builder.merge_transcribe_config( flags=flags, overrides=opts.config_kv, config_file=opts.config_file @@ -438,6 +454,7 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool) api_key=api_key, out=opts.out, output_field=opts.output_field, + chars_per_caption=opts.chars_per_caption, transform=TransformOptions( prompts=list(opts.llm_prompt or []), model=opts.model, max_tokens=opts.max_tokens ), diff --git a/tests/__snapshots__/test_snapshots_help_history.ambr b/tests/__snapshots__/test_snapshots_help_history.ambr index 58681abf..2cf6fce1 100644 --- a/tests/__snapshots__/test_snapshots_help_history.ambr +++ b/tests/__snapshots__/test_snapshots_help_history.ambr @@ -69,10 +69,14 @@ │ * transcript_id TEXT Transcript id. [required] │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ - │ --output -o [text|id|status|utterances|s Print one field of the │ - │ rt|json] result. │ - │ --json -j Output raw JSON. │ - │ --help Show this message and exit. │ + │ --output -o [text|id|status|uttera Print one field of the │ + │ nces|srt|vtt|json] result. │ + │ --chars-per-caption INTEGER RANGE [x>=1] Max characters per │ + │ caption line (only with │ + │ -o srt or -o vtt). │ + │ --json -j Output raw JSON. │ + │ --help Show this message and │ + │ exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ Examples @@ -82,6 +86,8 @@ $ assembly transcripts get 5551234-abcd -o utterances Save SRT subtitles $ assembly transcripts get 5551234-abcd -o srt > captions.srt + Save VTT subtitles + $ assembly transcripts get 5551234-abcd -o vtt > captions.vtt Get the raw JSON $ assembly transcripts get 5551234-abcd --json diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index 7cf3763d..29d7c2f1 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -423,24 +423,30 @@ │ directory/glob (batch mode). │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Options ────────────────────────────────────────────────────────────────────╮ - │ --sample Use the hosted │ - │ wildfires.mp3 sample. │ - │ --json -j Output the full result as │ - │ JSON. Text stays the │ - │ default even when piped; │ - │ opt in here (same as -o │ - │ json). │ - │ --output -o [text|id|status|utterances Print one field: text, id, │ - │ |srt|json] status, utterances, srt │ - │ (captions), or json. │ - │ --out FILE Save the result to a file │ - │ instead of printing it │ - │ (clean text; pairs with │ - │ -o). │ - │ --show-code Print the equivalent Python │ - │ SDK code and exit (does not │ - │ transcribe). │ - │ --help Show this message and exit. │ + │ --sample Use the hosted │ + │ wildfires.mp3 sample. │ + │ --json -j Output the full result │ + │ as JSON. Text stays the │ + │ default even when │ + │ piped; opt in here │ + │ (same as -o json). │ + │ --output -o [text|id|status|uttera Print one field: text, │ + │ nces|srt|vtt|json] id, status, utterances, │ + │ srt or vtt (captions), │ + │ or json. │ + │ --chars-per-caption INTEGER RANGE [x>=1] Max characters per │ + │ caption line (only with │ + │ -o srt or -o vtt). │ + │ --out FILE Save the result to a │ + │ file instead of │ + │ printing it (clean │ + │ text; pairs with -o). │ + │ --show-code Print the equivalent │ + │ Python SDK code and │ + │ exit (does not │ + │ transcribe). │ + │ --help Show this message and │ + │ exit. │ ╰──────────────────────────────────────────────────────────────────────────────╯ ╭─ Batch ──────────────────────────────────────────────────────────────────────╮ │ --from-stdin Batch mode: read audio paths/URLs │ diff --git a/tests/test_client.py b/tests/test_client.py index aaf4527b..06bd2e6c 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -254,14 +254,39 @@ def test_select_transcript_field_srt_uses_sdk(mocker): assert client.select_transcript_field(t, "srt") == ( "1\n00:00:00,000 --> 00:00:02,000\nhello world\n" ) - t.export_subtitles_srt.assert_called_once_with() + t.export_subtitles_srt.assert_called_once_with(chars_per_caption=None) + + +def test_select_transcript_field_vtt_uses_sdk(mocker): + t = mocker.MagicMock() + t.export_subtitles_vtt.return_value = "WEBVTT\n\n00:00:00.000 --> 00:00:02.000\nhello world\n" + assert client.select_transcript_field(t, "vtt") == ( + "WEBVTT\n\n00:00:00.000 --> 00:00:02.000\nhello world\n" + ) + t.export_subtitles_vtt.assert_called_once_with(chars_per_caption=None) + + +@pytest.mark.parametrize("field", ["srt", "vtt"]) +def test_select_transcript_field_subtitles_forward_chars_per_caption(mocker, field): + t = mocker.MagicMock() + client.select_transcript_field(t, field, chars_per_caption=42) + getattr(t, f"export_subtitles_{field}").assert_called_once_with(chars_per_caption=42) def test_select_transcript_field_srt_network_error_becomes_apierror(mocker): t = mocker.MagicMock() t.export_subtitles_srt.side_effect = RuntimeError("connection reset") - with pytest.raises(APIError): + with pytest.raises(APIError) as exc: client.select_transcript_field(t, "srt") + assert "Could not export SRT subtitles" in exc.value.message + + +def test_select_transcript_field_vtt_network_error_becomes_apierror(mocker): + t = mocker.MagicMock() + t.export_subtitles_vtt.side_effect = RuntimeError("connection reset") + with pytest.raises(APIError) as exc: + client.select_transcript_field(t, "vtt") + assert "Could not export VTT subtitles" in exc.value.message def test_select_transcript_field_srt_auth_error_becomes_not_authenticated(mocker): @@ -273,6 +298,34 @@ def test_select_transcript_field_srt_auth_error_becomes_not_authenticated(mocker client.select_transcript_field(t, "srt") +def test_select_transcript_field_vtt_auth_error_becomes_not_authenticated(mocker): + from aai_cli.errors import NotAuthenticated + + t = mocker.MagicMock() + t.export_subtitles_vtt.side_effect = RuntimeError("HTTP 401 Unauthorized") + with pytest.raises(NotAuthenticated): + client.select_transcript_field(t, "vtt") + + +@pytest.mark.parametrize("field", ["srt", "vtt"]) +def test_validate_chars_per_caption_allows_subtitle_fields(field): + client.validate_chars_per_caption(40, field) # no exception + + +def test_validate_chars_per_caption_allows_unset_value(): + client.validate_chars_per_caption(None, "text") # no exception + + +@pytest.mark.parametrize("field", [None, "text", "json"]) +def test_validate_chars_per_caption_rejects_non_subtitle_fields(field): + from aai_cli.errors import UsageError + + with pytest.raises(UsageError) as exc: + client.validate_chars_per_caption(40, field) + assert "--chars-per-caption only applies to subtitle output" in exc.value.message + assert "-o srt or -o vtt" in (exc.value.suggestion or "") + + def test_get_transcript_calls_sdk(mocker): fake = mocker.MagicMock() g = mocker.patch.object(client.aai.Transcript, "get_by_id", return_value=fake) diff --git a/tests/test_code_gen.py b/tests/test_code_gen.py index 27147112..3ffbc3ca 100644 --- a/tests/test_code_gen.py +++ b/tests/test_code_gen.py @@ -238,6 +238,7 @@ def test_every_snippet_execs_against_a_realistic_transcript() -> None: ("status", "print(transcript.status.value)"), ("utterances", 'print(f"Speaker {utt.speaker}: {utt.text}")'), ("srt", "print(transcript.export_subtitles_srt())"), + ("vtt", "print(transcript.export_subtitles_vtt())"), ("json", "print(json.dumps(transcript.json_response, default=str))"), ], ) @@ -259,8 +260,10 @@ def test_output_field_maps_cover_every_transcript_output_choice(): values = {member.value for member in TranscriptOutput} assert set(_OUTPUT_SNIPPETS) == values - # `text` is the run path's documented fallback; every other choice is explicit. - assert set(client._FIELD_RENDERERS) == values - {"text"} + # `text` is the run path's documented fallback; every other choice is explicit, + # split between the plain renderers and the subtitle exports. + assert set(client._FIELD_RENDERERS) | set(client._SUBTITLE_RENDERERS) == values - {"text"} + assert set(client._SUBTITLE_RENDERERS) == {"srt", "vtt"} def test_transcribe_render_output_json_imports_json_only_when_needed(): @@ -298,6 +301,14 @@ def test_transcribe_render_unknown_output_falls_back_to_text(): assert "print(transcript.text)" in code +@pytest.mark.parametrize("fmt", ["srt", "vtt"]) +def test_transcribe_render_chars_per_caption_shapes_subtitle_export(fmt): + # --chars-per-caption must land in the export call, not be silently dropped. + code = render_transcribe_code({}, "audio.mp3", output=fmt, chars_per_caption=42) + _compiles(code) + assert f"print(transcript.export_subtitles_{fmt}(chars_per_caption=42))" in code + + def test_transcribe_show_code_includes_llm_gateway_transform(): code = code_gen.transcribe( {"speaker_labels": True}, diff --git a/tests/test_code_gen_fuzz.py b/tests/test_code_gen_fuzz.py index f6f1a8e2..5d3c7784 100644 --- a/tests/test_code_gen_fuzz.py +++ b/tests/test_code_gen_fuzz.py @@ -152,7 +152,12 @@ def test_fuzz_result_handling_always_execs(merged): @given( merged=merged_strategy(config_builder.TRANSCRIBE_COERCE), - field=st.sampled_from(["text", "id", "status", "utterances", "srt", "json"]), + field=st.sampled_from(["text", "id", "status", "utterances", "srt", "vtt", "json"]), + chars_per_caption=st.one_of(st.none(), st.integers(min_value=1, max_value=500)), ) -def test_fuzz_transcribe_output_fields_always_compile(merged, field): - _compiles(render_transcribe_code(merged, "audio.mp3", output=field)) +def test_fuzz_transcribe_output_fields_always_compile(merged, field, chars_per_caption): + _compiles( + render_transcribe_code( + merged, "audio.mp3", output=field, chars_per_caption=chars_per_caption + ) + ) diff --git a/tests/test_command_options_seam.py b/tests/test_command_options_seam.py index 1c81af21..1ff4ed65 100644 --- a/tests/test_command_options_seam.py +++ b/tests/test_command_options_seam.py @@ -70,6 +70,7 @@ model=llm.DEFAULT_MODEL, max_tokens=llm.DEFAULT_MAX_TOKENS, output_field=None, + chars_per_caption=None, out=None, show_code=False, ) diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index cb5719ee..7e6cf694 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -126,7 +126,55 @@ def test_transcribe_output_srt_field(mocker): result = runner.invoke(app, ["transcribe", "audio.mp3", "-o", "srt"]) assert result.exit_code == 0 assert "00:00:00,000 --> 00:00:02,000" in result.output # SRT body, pipe-friendly - t.export_subtitles_srt.assert_called_once() + t.export_subtitles_srt.assert_called_once_with(chars_per_caption=None) + + +def test_transcribe_output_vtt_field(mocker): + _auth() + t = _fake_transcript(mocker) + t.export_subtitles_vtt.return_value = "WEBVTT\n\n00:00:00.000 --> 00:00:02.000\nhello world\n" + mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t) + result = runner.invoke(app, ["transcribe", "audio.mp3", "-o", "vtt"]) + assert result.exit_code == 0 + assert "WEBVTT" in result.output # VTT body, pipe-friendly + t.export_subtitles_vtt.assert_called_once_with(chars_per_caption=None) + + +def test_transcribe_chars_per_caption_forwarded_to_export(mocker): + _auth() + t = _fake_transcript(mocker) + t.export_subtitles_srt.return_value = "1\n00:00:00,000 --> 00:00:02,000\nhello\nworld\n" + mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t) + result = runner.invoke( + app, ["transcribe", "audio.mp3", "-o", "srt", "--chars-per-caption", "42"] + ) + assert result.exit_code == 0 + t.export_subtitles_srt.assert_called_once_with(chars_per_caption=42) + + +def test_transcribe_chars_per_caption_forwarded_through_out_file(tmp_path, mocker): + # --out routes through out_payload, a separate path from the stdout emit. + _auth() + t = _fake_transcript(mocker) + t.export_subtitles_vtt.return_value = "WEBVTT\n\n00:00:00.000 --> 00:00:02.000\nhello\n" + mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True, return_value=t) + out = tmp_path / "captions.vtt" + result = runner.invoke( + app, + ["transcribe", "audio.mp3", "-o", "vtt", "--chars-per-caption", "42", "--out", str(out)], + ) + assert result.exit_code == 0 + assert out.read_text().startswith("WEBVTT") + t.export_subtitles_vtt.assert_called_once_with(chars_per_caption=42) + + +def test_transcribe_chars_per_caption_requires_subtitle_output(mocker): + _auth() + tx = mocker.patch("aai_cli.transcribe_exec.client.transcribe", autospec=True) + result = runner.invoke(app, ["transcribe", "audio.mp3", "--chars-per-caption", "42"]) + assert result.exit_code == 2 + assert "--chars-per-caption only applies to subtitle output" in result.output + tx.assert_not_called() # rejected before any upload def test_transcribe_output_invalid_exits_2(mocker): diff --git a/tests/test_transcribe_show_code.py b/tests/test_transcribe_show_code.py index 0ba42de7..d3e87d07 100644 --- a/tests/test_transcribe_show_code.py +++ b/tests/test_transcribe_show_code.py @@ -102,6 +102,21 @@ def _boom(*a, **k): assert "print(transcript.text)" not in result.output +def test_transcribe_show_code_output_vtt_with_chars_per_caption(monkeypatch): + # The caption-length knob must be reflected in the generated export call. + def _boom(*a, **k): + raise AssertionError("must not transcribe") + + monkeypatch.setattr("aai_cli.transcribe_exec.client.transcribe", _boom) + result = runner.invoke( + app, + ["transcribe", "--sample", "-o", "vtt", "--chars-per-caption", "42", "--show-code"], + ) + assert result.exit_code == 0 + compile(result.output, "", "exec") + assert "print(transcript.export_subtitles_vtt(chars_per_caption=42))" in result.output + + def test_transcribe_show_code_output_utterances_generates_loop(monkeypatch): def _boom(*a, **k): raise AssertionError("must not transcribe") diff --git a/tests/test_transcripts.py b/tests/test_transcripts.py index ad6b942e..f7da4559 100644 --- a/tests/test_transcripts.py +++ b/tests/test_transcripts.py @@ -68,6 +68,32 @@ def test_get_output_id_prints_id(mocker): assert result.output.strip() == "t_42" +def test_get_output_vtt_forwards_chars_per_caption(mocker): + config.set_api_key("default", "sk_live") + fake = mocker.MagicMock() + fake.id = "t_42" + fake.status = "completed" + fake.export_subtitles_vtt.return_value = "WEBVTT\n\n00:00:00.000 --> 00:00:02.000\nhi\n" + mocker.patch( + "aai_cli.commands.transcripts.client.get_transcript", autospec=True, return_value=fake + ) + result = runner.invoke( + app, ["transcripts", "get", "t_42", "-o", "vtt", "--chars-per-caption", "42"] + ) + assert result.exit_code == 0 + assert "WEBVTT" in result.output + fake.export_subtitles_vtt.assert_called_once_with(chars_per_caption=42) + + +def test_get_chars_per_caption_requires_subtitle_output(mocker): + config.set_api_key("default", "sk_live") + get = mocker.patch("aai_cli.commands.transcripts.client.get_transcript", autospec=True) + result = runner.invoke(app, ["transcripts", "get", "t_42", "--chars-per-caption", "42"]) + assert result.exit_code == 2 + assert "--chars-per-caption only applies to subtitle output" in result.output + get.assert_not_called() # rejected before any fetch + + def test_get_json_emits_full_payload(mocker): config.set_api_key("default", "sk_live") fake = mocker.MagicMock()