Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions aai_cli/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class TranscriptOutput(enum.StrEnum):
status = "status"
utterances = "utterances"
srt = "srt"
vtt = "vtt"
json = "json"


Expand Down
46 changes: 42 additions & 4 deletions aai_cli/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import contextlib
import json
import re
from abc import abstractmethod
from collections.abc import Callable, Generator, Iterable
from pathlib import Path
from typing import Any, Literal, Protocol
Expand Down Expand Up @@ -222,24 +223,61 @@ def _render_utterances(transcript: Any) -> str:
)


def _export_srt(transcript: Any) -> str:
class _SubtitleTranscript(Protocol):
"""The slice of ``aai.Transcript`` the subtitle renderers touch."""

@abstractmethod
def export_subtitles_srt(self, chars_per_caption: int | None) -> str:
"""Fetch the transcript's SRT captions."""

@abstractmethod
def export_subtitles_vtt(self, chars_per_caption: int | None) -> str:
"""Fetch the transcript's VTT captions."""


def _export_srt(transcript: _SubtitleTranscript, chars_per_caption: int | None) -> str:
# The SDK fetches SRT from the `/srt` export endpoint, so this hits the network.
with _sdk_errors("Could not export SRT subtitles"):
return str(transcript.export_subtitles_srt())
return str(transcript.export_subtitles_srt(chars_per_caption=chars_per_caption))


def _export_vtt(transcript: _SubtitleTranscript, chars_per_caption: int | None) -> str:
# The SDK fetches VTT from the `/vtt` export endpoint, so this hits the network.
with _sdk_errors("Could not export VTT subtitles"):
return str(transcript.export_subtitles_vtt(chars_per_caption=chars_per_caption))


# Subtitle fields hit an export endpoint and take the --chars-per-caption knob.
_SUBTITLE_RENDERERS: dict[str, Callable[[_SubtitleTranscript, int | None], str]] = {
"srt": _export_srt,
"vtt": _export_vtt,
}

# Output field -> renderer. Fields absent here fall back to the plain transcript text.
_FIELD_RENDERERS: dict[str, Callable[[Any], str]] = {
"id": lambda t: str(getattr(t, "id", "") or ""),
"status": status_str,
"utterances": _render_utterances,
"srt": _export_srt,
"json": lambda t: json.dumps(transcript_json_payload(t), default=str),
}


def select_transcript_field(transcript: Any, field: str) -> str:
def validate_chars_per_caption(chars_per_caption: int | None, field: str | None) -> None:
"""``--chars-per-caption`` only shapes subtitle exports; any other ``-o`` contradicts it."""
if chars_per_caption is not None and field not in _SUBTITLE_RENDERERS:
raise UsageError(
"--chars-per-caption only applies to subtitle output.",
suggestion="Add -o srt or -o vtt.",
)


def select_transcript_field(
transcript: Any, field: str, *, chars_per_caption: int | None = None
) -> str:
"""Render a single transcript field for ``-o/--output``."""
subtitles = _SUBTITLE_RENDERERS.get(field)
if subtitles is not None:
return subtitles(transcript, chars_per_caption)
return _FIELD_RENDERERS.get(field, _transcript_text)(transcript)


Expand Down
21 changes: 17 additions & 4 deletions aai_cli/code_gen/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# ``-o/--output`` choice -> printed-result code, mirroring the run path's
# ``client._FIELD_RENDERERS`` semantics: plain fields, the speaker-labeled
# utterances loop, the SRT export endpoint, and the raw ``json_response`` payload.
# utterances loop, the SRT/VTT export endpoints, and the raw ``json_response`` payload.
_OUTPUT_SNIPPETS: dict[str, str] = {
"text": "print(transcript.text)",
"id": "print(transcript.id)",
Expand All @@ -16,16 +16,21 @@
'for utt in transcript.utterances or []:\n print(f"Speaker {utt.speaker}: {utt.text}")'
),
"srt": "print(transcript.export_subtitles_srt())",
"vtt": "print(transcript.export_subtitles_vtt())",
"json": "print(json.dumps(transcript.json_response, default=str))",
}

# The subtitle exports take the --chars-per-caption knob as a kwarg.
_SUBTITLE_FORMATS = ("srt", "vtt")


def render(
merged: dict[str, object],
source: str,
*,
llm_gateway: dict[str, object] | None = None,
output: str | None = None,
chars_per_caption: int | None = None,
download_sections: list[str] | None = None,
) -> str:
"""Generate a runnable transcribe script reproducing this CLI invocation.
Expand All @@ -37,7 +42,7 @@ def render(

When `output` (a ``-o/--output`` field name) is given, the script prints that one
field instead — and, as in the real command, it takes precedence over the LLM chain
and the analysis sections.
and the analysis sections. `chars_per_caption` shapes the srt/vtt export calls.

When `download_sections` (yt-dlp ``--download-sections`` specs) is given for a
downloadable URL, the generated yt-dlp call fetches only those parts of the source.
Expand All @@ -57,7 +62,7 @@ def render(
has_sections=ranges_expr is not None,
)
+ _transcribe_block(merged, source, needs_download=needs_download, ranges_expr=ranges_expr)
+ _result_block(merged, llm_gateway, output)
+ _result_block(merged, llm_gateway, output, chars_per_caption)
)
parts.append("")
return "\n".join(parts)
Expand Down Expand Up @@ -183,10 +188,18 @@ def _transcribe_block(


def _result_block(
merged: dict[str, object], llm_gateway: dict[str, object] | None, output: str | None
merged: dict[str, object],
llm_gateway: dict[str, object] | None,
output: str | None,
chars_per_caption: int | None,
) -> list[str]:
"""The printed-result lines: one ``-o`` field, the LLM chain, or the analysis sections."""
if output is not None:
if output in _SUBTITLE_FORMATS and chars_per_caption is not None:
return [
f"print(transcript.export_subtitles_{output}"
f"(chars_per_caption={chars_per_caption}))"
]
# Unknown names fall back to the plain text, like select_transcript_field does.
return [_OUTPUT_SNIPPETS.get(output, _OUTPUT_SNIPPETS["text"])]
if llm_gateway:
Expand Down
4 changes: 3 additions & 1 deletion aai_cli/commands/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,8 +320,9 @@ def transcribe(
None,
"-o",
"--output",
help="Print one field: text, id, status, utterances, srt (captions), or json.",
help="Print one field: text, id, status, utterances, srt or vtt (captions), or json.",
),
chars_per_caption: int | None = options.chars_per_caption_option(),
out: Path | None = typer.Option(
None,
"--out",
Expand Down Expand Up @@ -397,6 +398,7 @@ def transcribe(
model=model,
max_tokens=max_tokens,
output_field=output_field,
chars_per_caption=chars_per_caption,
out=out,
show_code=show_code,
)
Expand Down
13 changes: 10 additions & 3 deletions aai_cli/commands/transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def render(data: list[dict[str, object]]) -> object:
("Fetch a transcript's text by id", "assembly transcripts get 5551234-abcd"),
("Speaker-labeled turns", "assembly transcripts get 5551234-abcd -o utterances"),
("Save SRT subtitles", "assembly transcripts get 5551234-abcd -o srt > captions.srt"),
("Save VTT subtitles", "assembly transcripts get 5551234-abcd -o vtt > captions.vtt"),
("Get the raw JSON", "assembly transcripts get 5551234-abcd --json"),
]
)
Expand All @@ -75,14 +76,16 @@ def get(
"--output",
help="Print one field of the result.",
),
chars_per_caption: int | None = options.chars_per_caption_option(),
json_out: bool = options.json_option(),
) -> None:
"""Fetch a past transcript by id and print its text."""

def body(state: AppState, json_mode: bool) -> None:
# Cheap local id validation first: a malformed id is a usage error whether
# or not the user is signed in, so it must not trigger auth/login first.
# Cheap local validation first: a malformed id or flag conflict is a usage
# error whether or not the user is signed in, so it must not trigger auth.
client.validate_transcript_id(transcript_id)
client.validate_chars_per_caption(chars_per_caption, output_field)
api_key = state.resolve_api_key()
transcript = client.get_transcript(api_key, transcript_id)
if client.status_str(transcript) == "error":
Expand All @@ -92,7 +95,11 @@ def body(state: AppState, json_mode: bool) -> None:
)
if output_field is not None:
# Raw single-field output for pipelines (overrides --json), matching `transcribe`.
output.emit_text(client.select_transcript_field(transcript, output_field))
output.emit_text(
client.select_transcript_field(
transcript, output_field, chars_per_caption=chars_per_caption
)
)
return
if json_mode:
# The full SDK payload, identical to `assembly transcribe … --json`, so the
Expand Down
11 changes: 11 additions & 0 deletions aai_cli/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,17 @@ def json_option(help_text: str = "Output raw JSON.") -> bool:
return flag


def chars_per_caption_option() -> int | None:
"""The ``--chars-per-caption`` knob for the ``-o srt``/``-o vtt`` subtitle exports."""
value: int | None = typer.Option(
None,
"--chars-per-caption",
min=1,
help="Max characters per caption line (only with -o srt or -o vtt).",
)
return value


# Batch-mode flags for `transcribe` (see transcribe_batch.py). Defined here because
# this module owns the FBT003 carve-out for Typer's boolean positional defaults.

Expand Down
5 changes: 3 additions & 2 deletions aai_cli/skills/aai-cli/references/history.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ Fetch a past transcript by id and print its text.

Key options:

- `-o/--output text|id|status|utterances|srt|json` — print one field; omit for
the default human view.
- `-o/--output text|id|status|utterances|srt|vtt|json` — print one field; omit
for the default human view. `--chars-per-caption N` caps caption line length
for the srt/vtt exports.
- `--json` — full raw JSON.

Examples:
Expand Down
3 changes: 2 additions & 1 deletion aai_cli/skills/aai-cli/references/transcription.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ High-value flags (run `assembly transcribe --help` for the full set):
`--config-file config.json`.
- Post-process: `--llm "PROMPT"` (repeatable; chains over the transcript via LLM
Gateway), `--translate-to es` (repeatable).
- Output: `-o text|id|status|utterances|srt|json`, `--json`, `--show-code`.
- Output: `-o text|id|status|utterances|srt|vtt|json`, `--chars-per-caption N`
(caption line length, with `-o srt`/`-o vtt`), `--json`, `--show-code`.

Examples:

Expand Down
23 changes: 20 additions & 3 deletions aai_cli/transcribe_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,15 @@ def out_payload(
transcript: aai.Transcript,
output_field: choices.TranscriptOutput | None,
*,
chars_per_caption: int | None,
json_mode: bool,
) -> str:
"""The text to write for ``--out``: the chosen ``-o`` field, the ``--json`` payload,
or the plain transcript text — the same content stdout would get, as a file artifact."""
if output_field is not None:
return client.select_transcript_field(transcript, output_field)
return client.select_transcript_field(
transcript, output_field, chars_per_caption=chars_per_caption
)
if json_mode:
return json.dumps(client.transcript_json_payload(transcript), default=str)
return client.select_transcript_field(transcript, choices.TranscriptOutput.text)
Expand Down Expand Up @@ -197,6 +200,7 @@ def deliver_result(
api_key: str,
out: Path | None,
output_field: choices.TranscriptOutput | None,
chars_per_caption: int | None,
transform: TransformOptions,
json_mode: bool,
quiet: bool,
Expand All @@ -206,14 +210,23 @@ def deliver_result(
if out is not None:
# Write a clean file artifact and confirm on stderr; stdout stays empty.
# The path itself was validated up front by validate_out_path.
out.write_text(out_payload(transcript, output_field, json_mode=json_mode) + "\n")
out.write_text(
out_payload(
transcript, output_field, chars_per_caption=chars_per_caption, json_mode=json_mode
)
+ "\n"
)
if not quiet:
output.error_console.print(output.success(f"Saved to {escape(str(out))}"))
return

if output_field is not None:
# Raw single-field output for pipelines (overrides --json and analysis render).
output.emit_text(client.select_transcript_field(transcript, output_field))
output.emit_text(
client.select_transcript_field(
transcript, output_field, chars_per_caption=chars_per_caption
)
)
return

if transform.prompts:
Expand Down Expand Up @@ -295,6 +308,7 @@ class TranscribeOptions:
model: str
max_tokens: int
output_field: choices.TranscriptOutput | None
chars_per_caption: int | None
out: Path | None
show_code: bool

Expand Down Expand Up @@ -366,6 +380,7 @@ def _print_show_code(opts: TranscribeOptions, merged: dict[str, object]) -> None
audio,
llm_gateway=gateway,
output=opts.output_field,
chars_per_caption=opts.chars_per_caption,
download_sections=list(opts.download_sections or []),
)
)
Expand All @@ -384,6 +399,7 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool)
validate_out_with_llm(opts.out, opts.llm_prompt)
validate_out_path(opts.out)
validate_json_with_output(opts.output_field, json_mode=json_mode)
client.validate_chars_per_caption(opts.chars_per_caption, opts.output_field)

merged = config_builder.merge_transcribe_config(
flags=flags, overrides=opts.config_kv, config_file=opts.config_file
Expand Down Expand Up @@ -438,6 +454,7 @@ def run_transcribe(opts: TranscribeOptions, state: AppState, *, json_mode: bool)
api_key=api_key,
out=opts.out,
output_field=opts.output_field,
chars_per_caption=opts.chars_per_caption,
transform=TransformOptions(
prompts=list(opts.llm_prompt or []), model=opts.model, max_tokens=opts.max_tokens
),
Expand Down
14 changes: 10 additions & 4 deletions tests/__snapshots__/test_snapshots_help_history.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@
│ * transcript_id TEXT Transcript id. [required] │
╰──────────────────────────────────────────────────────────────────────────────╯
╭─ Options ────────────────────────────────────────────────────────────────────╮
│ --output -o [text|id|status|utterances|s Print one field of the │
│ rt|json] result. │
│ --json -j Output raw JSON. │
│ --help Show this message and exit. │
│ --output -o [text|id|status|uttera Print one field of the │
│ nces|srt|vtt|json] result. │
│ --chars-per-caption INTEGER RANGE [x>=1] Max characters per │
│ caption line (only with │
│ -o srt or -o vtt). │
│ --json -j Output raw JSON. │
│ --help Show this message and │
│ exit. │
╰──────────────────────────────────────────────────────────────────────────────╯

Examples
Expand All @@ -82,6 +86,8 @@
$ assembly transcripts get 5551234-abcd -o utterances
Save SRT subtitles
$ assembly transcripts get 5551234-abcd -o srt > captions.srt
Save VTT subtitles
$ assembly transcripts get 5551234-abcd -o vtt > captions.vtt
Get the raw JSON
$ assembly transcripts get 5551234-abcd --json

Expand Down
Loading
Loading