Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions REFERENCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,26 @@ object per dataset (not NDJSON; a single dataset is therefore one object):
the row's `llm` key (the WER score still uses the raw transcript), and
`--llm-reduce` runs one prompt over every item's result and adds a top-level
`reduce` (`{"model","prompts","output"}`) to the object.

## Recording streams to disk

`assembly stream --save-dir DIR` auto-names a capture under `DIR/YYYY-MM-DD/`
with a timestamped stem (`YYYY-MM-DD-HHMMSS[-slug]`) shared across every file it
writes:

- `<stem>.txt` — the transcript, one finalized turn per line (flushed live).
- `<stem>.wav` — the recorded audio, 16-bit mono PCM. Suppress it with
`--no-save-audio` to keep only the text. Under `--system-audio` the two channels
can't share a file, so each gets its own `<stem>-you.wav` / `<stem>-system.wav`.
- `<stem>.md` — written when `--llm "…"` is also passed: the final answer of the
live prompt chain, captured as a note next to the transcript.
- `<stem>.aai.json` — a metadata sidecar so a list/browse UI needs no transcript
parsing: `{"title", "date", "duration_seconds", "speakers", "turns",
"transcript", "audio", "note"}`. `audio` is the list of WAV file names (empty
under `--no-save-audio`, two entries under `--system-audio`); `note` is `null`
when no `--llm` note was written.

`--name "Title"` slugs an explicit title into the stem; `--auto-name` instead
derives that title from the transcript via the LLM Gateway once the stream ends,
renaming the files to match (the timestamp stem is kept if the title is empty).
The two are mutually exclusive.
2 changes: 1 addition & 1 deletion aai_cli/commands/agent/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from aai_cli.app.context import AppState
from aai_cli.core import choices, client, errors, signals
from aai_cli.core.errors import UsageError
from aai_cli.streaming.session import resolve_output_modes
from aai_cli.streaming.sources import FileSource
from aai_cli.streaming.validate import resolve_output_modes
from aai_cli.ui import output


Expand Down
2 changes: 1 addition & 1 deletion aai_cli/commands/agent_cascade/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
from aai_cli.core import choices, client, config_builder, errors, llm, signals
from aai_cli.core.errors import UsageError
from aai_cli.streaming import turn_presets
from aai_cli.streaming.session import resolve_output_modes
from aai_cli.streaming.sources import FileSource
from aai_cli.streaming.validate import resolve_output_modes
from aai_cli.tts import session as tts_session
from aai_cli.ui import output

Expand Down
2 changes: 1 addition & 1 deletion aai_cli/commands/dictate/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from aai_cli.core.config_builder import split_csv
from aai_cli.core.hotkey import CTRL_C, CTRL_D, ESC, TerminalKeys
from aai_cli.core.microphone import MicrophoneSource
from aai_cli.streaming.session import resolve_output_modes
from aai_cli.streaming.validate import resolve_output_modes
from aai_cli.ui import output

# Capture is resampled to one rate the Sync API accepts; 16 kHz mono PCM16 keeps
Expand Down
18 changes: 18 additions & 0 deletions aai_cli/commands/stream/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
"Auto-name the transcript + WAV under a dir",
'assembly stream --save-dir ~/recordings --name "Standup"',
),
(
"Name from content + save a summary note",
'assembly stream --save-dir ~/recordings --auto-name --llm "summarize as a note"',
),
(
"Boost domain terms with keyterm prompts",
'assembly stream --keyterms-prompt "AssemblyAI" --keyterms-prompt "Claude"',
Expand Down Expand Up @@ -121,6 +125,18 @@ def stream(
help="Title to slug into the --save-dir filename (e.g. a meeting title)",
rich_help_panel=help_panels.OPT_SAVING,
),
auto_name: bool = typer.Option(
False,
"--auto-name",
help="With --save-dir, derive the filename from the transcript via the LLM",
rich_help_panel=help_panels.OPT_SAVING,
),
no_save_audio: bool = typer.Option(
False,
"--no-save-audio",
help="With --save-dir, skip the WAV and save only the transcript",
rich_help_panel=help_panels.OPT_SAVING,
),
# model & input
speech_model: SpeechModel = typer.Option(
DEFAULT_SPEECH_MODEL,
Expand Down Expand Up @@ -398,5 +414,7 @@ def stream(
save_transcript=save_transcript,
save_dir=save_dir,
name=name,
auto_name=auto_name,
no_save_audio=no_save_audio,
)
run_with_options(ctx, stream_exec.run_stream, opts, json=json_out)
123 changes: 82 additions & 41 deletions aai_cli/commands/stream/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,14 @@
from aai_cli.core import choices, client, config_builder, signals, stdio, youtube
from aai_cli.core.errors import UsageError, mutually_exclusive
from aai_cli.core.microphone import MicrophoneSource
from aai_cli.streaming import naming, record, transcript, turn_presets
from aai_cli.streaming import naming, record, savedir, transcript, turn_presets
from aai_cli.streaming.batch import stream_batch_sources
from aai_cli.streaming.macos import MacSystemAudioSource
from aai_cli.streaming.render import StreamRenderer
from aai_cli.streaming.session import (
SourceOptions,
StreamSession,
resolve_output_modes,
stream_batch_sources,
validate_sources,
)
from aai_cli.streaming.session import StreamSession
from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource
from aai_cli.streaming.turn_presets import TurnDetectionPreset
from aai_cli.streaming.validate import SourceOptions, resolve_output_modes, validate_sources
from aai_cli.ui import output
from aai_cli.ui.follow import FollowRenderer

Expand Down Expand Up @@ -90,6 +86,8 @@ class StreamOptions:
save_transcript: Path | None
save_dir: Path | None
name: str | None
auto_name: bool
no_save_audio: bool

def source_options(self) -> SourceOptions:
"""The audio-input subset, in the shape the validation/dispatch helpers read."""
Expand Down Expand Up @@ -205,57 +203,97 @@ class SaveTargets:
``audio`` tees a single source to one WAV; ``audio_by_label`` instead maps each
parallel ``--system-audio`` channel ("you", "system") to its own WAV when the two
streams can't share a file. At most one of the two is set; ``transcript`` is the
single shared transcript either way.
single shared transcript either way. ``plan`` is set only under ``--save-dir`` and
carries the post-stream finalization (auto-name rename, ``--llm`` note, sidecar).
"""

transcript: Path | None = None
audio: Path | None = None
audio_by_label: dict[str, Path] | None = None
plan: savedir.SaveDirPlan | None = None


def _save_dir_targets(opts: StreamOptions, sources: SourceOptions, save_dir: Path) -> SaveTargets:
"""Resolve ``--save-dir`` into auto-named targets plus the finalization plan.

``--save-dir`` owns filename assembly, so it rejects the explicit
``--save-audio``/``--save-transcript`` paths and the conflicting ``--name``/
``--auto-name`` title pair. Two parallel ``--system-audio`` streams can't tee to one
WAV, so each channel gets its own ``<stem>-{you,system}.wav`` (one shared transcript);
``--no-save-audio`` drops the WAV(s) entirely.
"""
mutually_exclusive(
("--save-dir", True),
("--save-audio", opts.save_audio is not None),
("--save-transcript", opts.save_transcript is not None),
suggestion="--save-dir names the files for you; drop the explicit path.",
)
mutually_exclusive(
("--name", opts.name is not None),
("--auto-name", opts.auto_name),
suggestion="Both set the title — pass --name for an explicit one or "
"--auto-name to derive it from the transcript.",
)
# Local wall-clock time (what a meeting filename wants); the explicit utc-then-
# astimezone keeps the now() call timezone-aware for the linter.
now = datetime.now(UTC).astimezone()
plan = savedir.SaveDirPlan(
save_dir=save_dir,
now=now,
name=opts.name,
auto_name=opts.auto_name,
write_note=bool(opts.llm_prompt),
)
paths = plan.paths
naming.ensure_dir(paths.directory)
if opts.no_save_audio:
# Transcript + sidecar (+ note) only; no WAV teed for any source.
return SaveTargets(transcript=paths.transcript, plan=plan)
if sources.system_audio:
# Parallel mic + system: one WAV per channel beside the shared transcript.
return SaveTargets(
transcript=paths.transcript,
audio_by_label={
"you": naming.channel_audio(paths.audio, "you"),
"system": naming.channel_audio(paths.audio, "system"),
},
plan=plan,
)
if sources.system_audio_only:
# A lone system-audio stream; label its single WAV so it reads like the pair.
return SaveTargets(
transcript=paths.transcript,
audio=naming.channel_audio(paths.audio, "system"),
plan=plan,
)
return SaveTargets(transcript=paths.transcript, audio=paths.audio, plan=plan)


def _resolve_save_targets(opts: StreamOptions, sources: SourceOptions) -> SaveTargets:
"""Resolve the save flags into the destinations the session writes.

``--save-dir`` owns filename assembly — it auto-names the transcript and a matching
WAV under ``DIR/YYYY-MM-DD/`` — so it can't be combined with the explicit
``--save-audio``/``--save-transcript`` paths, and ``--name`` only feeds that assembly.
Two parallel ``--system-audio`` streams can't tee to one WAV, so under ``--save-dir``
each channel gets its own ``<stem>-{you,system}.wav`` (one shared transcript), and the
explicit single-path ``--save-audio`` is rejected outright.
``--save-dir`` owns filename assembly (see ``_save_dir_targets``); the explicit
``--save-audio``/``--save-transcript`` paths are the fallback, with the save-dir-only
``--name``/``--auto-name``/``--no-save-audio`` flags rejected outside it.
"""
if opts.save_dir is not None:
mutually_exclusive(
("--save-dir", True),
("--save-audio", opts.save_audio is not None),
("--save-transcript", opts.save_transcript is not None),
suggestion="--save-dir names the files for you; drop the explicit path.",
)
# Local wall-clock time (what a meeting filename wants); the explicit utc-then-
# astimezone keeps the now() call timezone-aware for the linter.
now = datetime.now(UTC).astimezone()
paths = naming.resolve(opts.save_dir, opts.name, now=now)
naming.ensure_dir(paths.transcript.parent)
if sources.system_audio:
# Parallel mic + system: one WAV per channel beside the shared transcript.
return SaveTargets(
transcript=paths.transcript,
audio_by_label={
"you": naming.channel_audio(paths.audio, "you"),
"system": naming.channel_audio(paths.audio, "system"),
},
)
if sources.system_audio_only:
# A lone system-audio stream; label its single WAV so it reads like the pair.
return SaveTargets(
transcript=paths.transcript, audio=naming.channel_audio(paths.audio, "system")
)
return SaveTargets(transcript=paths.transcript, audio=paths.audio)
return _save_dir_targets(opts, sources, opts.save_dir)
if opts.name is not None:
raise UsageError(
"--name applies only with --save-dir.",
suggestion="Pass --save-dir DIR to auto-name the files, "
"or --save-transcript PATH for an explicit path.",
)
if opts.auto_name:
raise UsageError(
"--auto-name applies only with --save-dir.",
suggestion="Pass --save-dir DIR so there's an auto-named file to title.",
)
if opts.no_save_audio:
raise UsageError(
"--no-save-audio applies only with --save-dir.",
suggestion="Omit --save-audio to skip the WAV, or pass --save-dir DIR.",
)
if opts.save_audio is not None:
if sources.system_audio:
raise UsageError(
Expand Down Expand Up @@ -343,6 +381,8 @@ def _collect_batch_sources(opts: StreamOptions, *, text_mode: bool) -> list[str]
("--save-transcript", opts.save_transcript is not None),
("--save-dir", opts.save_dir is not None),
("--name", opts.name is not None),
("--auto-name", opts.auto_name),
("--no-save-audio", opts.no_save_audio),
suggestion="--from-stdin streams many sources; saving applies to a single run.",
)
mutually_exclusive(
Expand Down Expand Up @@ -434,6 +474,7 @@ def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None
save_audio=targets.audio,
save_audio_by_label=targets.audio_by_label,
save_transcript=targets.transcript,
save_plan=targets.plan,
llm_interval=opts.llm_interval,
)
with signals.terminate_as_interrupt():
Expand Down
107 changes: 107 additions & 0 deletions aai_cli/streaming/batch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""Drive a ``assembly stream --from-stdin`` list of sources, one realtime session each.

The realtime API is one session at a time, so a list of files/URLs (read on stdin,
one per line) streams sequentially. This lives beside ``StreamSession`` rather than
inside it: a session owns *one* run, while this owns the sequence — fresh session per
source, per-source failure accounting, and the batch-wide Ctrl-C/pipe handling.
"""

from __future__ import annotations

from collections.abc import Callable, Iterable

import typer

from aai_cli.core.errors import CANCELLED_EXIT_CODE, CLIError, NotAuthenticated
from aai_cli.streaming.render import StreamRenderer
from aai_cli.streaming.session import StreamSession
from aai_cli.ui import output

# A batch source string resolved to its real-time audio chunks and declared rate.
_OpenedSource = tuple[Iterable[bytes], int]


def _stream_source(
source: str,
*,
index: int,
total: int,
make_session: Callable[[], StreamSession],
open_source: Callable[[str], _OpenedSource],
renderer: StreamRenderer,
json_mode: bool,
) -> bool:
"""Stream one batch source in its own session; return True when it failed.

A ``CLIError`` (bad path, missing ffmpeg, decode failure) is recorded as a warning
so the batch carries on — except ``NotAuthenticated``, which re-raises to abort the
whole batch (one rejected key fails every source identically, and auto-login should
trigger once).
"""
renderer.source(source, index=index, total=total)
try:
audio, rate = open_source(source)
# handle_interrupt=False: let a Ctrl-C/pipe close bubble to the batch loop below so
# one interrupt stops the whole sequence. Flipping it to True is behavior-equivalent
# here (the session would convert the same interrupt to the same Exit(130)/Exit(0)),
# so no test can distinguish it.
make_session().run(audio, rate, handle_interrupt=False) # pragma: no mutate
except NotAuthenticated:
raise
except CLIError as exc:
# Flatten newlines so a crafted path/URL can't inject extra log lines (CR/LF).
detail = f"{source}: {exc.message}".replace("\n", " ").replace("\r", " ")
output.emit_warning(detail, json_mode=json_mode)
return True
else:
return False


def stream_batch_sources(
sources: list[str],
*,
make_session: Callable[[], StreamSession],
open_source: Callable[[str], _OpenedSource],
renderer: StreamRenderer,
json_mode: bool,
) -> None:
"""Stream each source in ``sources`` in turn — the ``assembly stream --from-stdin``
batch mode.

The realtime API is one session at a time, so a list of files/URLs streams
sequentially: each source gets a fresh ``StreamSession`` from ``make_session`` (its
own transcript and ``--llm`` chain state) via ``_stream_source``.

A Ctrl-C stops the batch with the cancel code (exit 130); a closed downstream pipe
stops it quietly (exit 0). When any source failed, raises a ``CLIError`` at the end
so a script can trust the exit code.
"""
total = len(sources)
failures = 0
try:
for index, source in enumerate(sources, start=1):
failures += _stream_source(
source,
index=index,
total=total,
make_session=make_session,
open_source=open_source,
renderer=renderer,
json_mode=json_mode,
)
except KeyboardInterrupt:
# One Ctrl-C stops the whole batch, not just the current source. Exit 130
# (cancel) so the interrupt isn't mistaken for a clean run of every source.
renderer.stopped()
raise typer.Exit(code=CANCELLED_EXIT_CODE) from None
except BrokenPipeError:
# Downstream consumer (e.g. `| head`) closed the pipe; stop quietly.
raise typer.Exit(code=0) from None
finally:
renderer.close()
if failures:
raise CLIError(
f"{failures} of {total} sources failed.",
error_type="batch_failed",
suggestion="Check each failed path or URL, then re-run.",
)
Loading
Loading