Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aai_cli/commands/stream/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def stream(
None,
"--save-dir",
help="Auto-name the transcript and a matching WAV under DIR/YYYY-MM-DD/ "
"with a timestamped file",
"with a timestamped file; --system-audio saves one WAV per channel",
rich_help_panel=help_panels.OPT_SAVING,
file_okay=False,
),
Expand Down
66 changes: 45 additions & 21 deletions aai_cli/commands/stream/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,16 +198,30 @@ def _reject_save_with_show_code(opts: StreamOptions) -> None:
)


def _resolve_save_targets(
opts: StreamOptions, sources: SourceOptions
) -> tuple[Path | None, Path | None]:
"""Resolve the save flags into the (audio, transcript) paths the session writes.
@dataclass(frozen=True)
class SaveTargets:
"""Resolved save destinations for one streaming run.

``audio`` tees a single source to one WAV; ``audio_by_label`` instead maps each
parallel ``--system-audio`` channel ("you", "system") to its own WAV when the two
streams can't share a file. At most one of the two is set; ``transcript`` is the
single shared transcript either way.
"""

transcript: Path | None = None
audio: Path | None = None
audio_by_label: dict[str, Path] | None = None


``--save-dir`` owns filename assembly — it auto-names both the transcript and a
matching WAV under ``DIR/YYYY-MM-DD/`` — so it can't be combined with the explicit
def _resolve_save_targets(opts: StreamOptions, sources: SourceOptions) -> SaveTargets:
"""Resolve the save flags into the destinations the session writes.

``--save-dir`` owns filename assembly — it auto-names the transcript and a matching
WAV under ``DIR/YYYY-MM-DD/`` — so it can't be combined with the explicit
``--save-audio``/``--save-transcript`` paths, and ``--name`` only feeds that assembly.
Audio can't tee to a single WAV under ``--system-audio`` (two streams), which rejects
both the explicit ``--save-audio`` and ``--save-dir``'s audio leg.
Two parallel ``--system-audio`` streams can't tee to one WAV, so under ``--save-dir``
each channel gets its own ``<stem>-{you,system}.wav`` (one shared transcript), and the
explicit single-path ``--save-audio`` is rejected outright.
"""
if opts.save_dir is not None:
mutually_exclusive(
Expand All @@ -216,35 +230,44 @@ def _resolve_save_targets(
("--save-transcript", opts.save_transcript is not None),
suggestion="--save-dir names the files for you; drop the explicit path.",
)
if sources.from_system_audio:
raise UsageError(
"--save-dir cannot be combined with --system-audio; the mic and system "
"streams can't share one recording.",
suggestion="Record a single source (mic, file, URL, or - on stdin).",
)
# Local wall-clock time (what a meeting filename wants); the explicit utc-then-
# astimezone keeps the now() call timezone-aware for the linter.
now = datetime.now(UTC).astimezone()
paths = naming.resolve(opts.save_dir, opts.name, now=now)
naming.ensure_dir(paths.transcript.parent)
return paths.audio, paths.transcript
if sources.system_audio:
# Parallel mic + system: one WAV per channel beside the shared transcript.
return SaveTargets(
transcript=paths.transcript,
audio_by_label={
"you": naming.channel_audio(paths.audio, "you"),
"system": naming.channel_audio(paths.audio, "system"),
},
)
if sources.system_audio_only:
# A lone system-audio stream; label its single WAV so it reads like the pair.
return SaveTargets(
transcript=paths.transcript, audio=naming.channel_audio(paths.audio, "system")
)
return SaveTargets(transcript=paths.transcript, audio=paths.audio)
if opts.name is not None:
raise UsageError(
"--name applies only with --save-dir.",
suggestion="Pass --save-dir DIR to auto-name the files, "
"or --save-transcript PATH for an explicit path.",
)
if opts.save_audio is not None:
if sources.from_system_audio:
if sources.system_audio:
raise UsageError(
"--save-audio cannot be combined with --system-audio; the mic and system "
"streams can't share one file.",
suggestion="Record a single source (mic, file, URL, or - on stdin).",
suggestion="Pass --save-dir DIR to save one WAV per channel, "
"or record a single source.",
)
record.validate_target(opts.save_audio)
if opts.save_transcript is not None:
transcript.validate_target(opts.save_transcript)
return opts.save_audio, opts.save_transcript
return SaveTargets(transcript=opts.save_transcript, audio=opts.save_audio)


def _dispatch(session: StreamSession, opts: SourceOptions) -> None:
Expand Down Expand Up @@ -389,7 +412,7 @@ def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None
# Validate the requested sources (including that a local file exists) before
# credentials, so a typo'd path reads as "file not found" — not as a login.
validate_sources(sources, has_llm=bool(opts.llm_prompt), text_mode=text_mode)
save_audio, save_transcript = _resolve_save_targets(opts, sources)
targets = _resolve_save_targets(opts, sources)
if sources.from_file and not sources.from_stdin:
client.resolve_audio_source(sources.source, sample=sources.sample)
api_key = state.resolve_api_key()
Expand All @@ -405,8 +428,9 @@ def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None
llm_prompts=llm_prompts,
model=opts.model,
max_tokens=opts.max_tokens,
save_audio=save_audio,
save_transcript=save_transcript,
save_audio=targets.audio,
save_audio_by_label=targets.audio_by_label,
save_transcript=targets.transcript,
llm_interval=opts.llm_interval,
)
_dispatch(session, sources)
3 changes: 2 additions & 1 deletion aai_cli/skills/aai-cli/references/transcription.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ assembly transcribe call.mp3 --show-code

Omit `SOURCE` to use the microphone; pass a file/URL/media page to stream that, or
`--sample`. macOS can capture system audio with `--system-audio` (mic + system)
or `--system-audio-only`.
or `--system-audio-only`. With `--save-dir`, `--system-audio` writes one WAV per
channel (`<stem>-you.wav`, `<stem>-system.wav`) beside the shared transcript.

High-value flags (run `assembly stream --help` for the full set):

Expand Down
10 changes: 10 additions & 0 deletions aai_cli/streaming/naming.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,16 @@ def resolve(save_dir: Path, name: str | None, *, now: datetime) -> SavePaths:
return SavePaths(transcript=bucket / f"{stem}.txt", audio=bucket / f"{stem}.wav")


def channel_audio(audio: Path, channel: str) -> Path:
"""Insert a per-channel suffix into an auto-named WAV path.

``--system-audio`` records two parallel streams that can't share one WAV, so each
channel ("you", "system") gets its own file beside the shared transcript: the base
``DIR/.../<stem>.wav`` becomes ``DIR/.../<stem>-<channel>.wav``.
"""
return audio.with_name(f"{audio.stem}-{channel}{audio.suffix}")


def ensure_dir(path: Path) -> None:
"""Create ``path`` (and parents) for the auto-named files, as a clean CLIError on failure."""
try:
Expand Down
25 changes: 20 additions & 5 deletions aai_cli/streaming/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,15 @@ class StreamSession:
llm_prompts: list[str]
model: str
max_tokens: int
# When set, tee the streamed PCM to this path as a WAV (see record.tee_wav). Only
# the single-source path sets itthe parallel/batch callers reject --save-audio.
# When set, tee the streamed PCM to this path as a WAV (see record.tee_wav). The
# single-source path sets it; the batch (--from-stdin) caller rejects --save-audio.
save_audio: Path | None = None
# --system-audio runs two parallel streams that can't share one WAV, so each is teed
# to its own file keyed by source label ("you", "system"). Set instead of save_audio
# for that path; a label with no entry (or no save flag) tees nowhere.
save_audio_by_label: dict[str, Path] | None = None
# When set, write each finalized turn to this path as plain text (see TranscriptWriter).
# Like save_audio, only the single-source path sets it; batch rejects --save-transcript.
# One shared transcript even under --system-audio (both channels); batch rejects it.
save_transcript: Path | None = None
# Seconds between --llm summary refreshes; <=0 re-runs the chain on every turn.
llm_interval: float = 0.0
Expand Down Expand Up @@ -275,12 +279,23 @@ def _maybe_summarize(self, *, final: bool = False) -> None:
return
follow(answer, turns)

def _audio_target(self, source_label: str | None) -> Path | None:
"""The WAV path this source tees to, if any.

``--system-audio`` records two channels to two files (``save_audio_by_label``);
every other run tees its single source to ``save_audio``.
"""
if self.save_audio_by_label is not None:
return self.save_audio_by_label.get(source_label or "")
return self.save_audio

def stream_one(
self, audio: Iterable[bytes], rate: int, *, source_label: str | None = None
) -> None:
if self.save_audio is not None:
target = self._audio_target(source_label)
if target is not None:
# Tee verbatim to disk at the source's true rate before it hits the wire.
audio = record.tee_wav(audio, self.save_audio, rate=rate)
audio = record.tee_wav(audio, target, rate=rate)
flags = self.base_flags | {"sample_rate": rate}
if source_label == "you":
# The microphone captures you alone, so never diarize it into separate
Expand Down
3 changes: 2 additions & 1 deletion tests/__snapshots__/test_snapshots_help_run.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,8 @@
│ one turn per line │
│ --save-dir DIRECTORY Auto-name the transcript and a matching │
│ WAV under DIR/YYYY-MM-DD/ with a │
│ timestamped file │
│ timestamped file; --system-audio saves │
│ one WAV per channel │
│ --name TEXT Title to slug into the --save-dir │
│ filename (e.g. a meeting title) │
╰──────────────────────────────────────────────────────────────────────────────╯
Expand Down
38 changes: 37 additions & 1 deletion tests/test_stream_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,14 @@ def test_stream_options_are_immutable():
setattr(DEFAULTS, field_name, True)


def test_save_targets_are_immutable():
# The resolved save destinations are a frozen carrier (like StreamOptions), so a
# later step can't quietly retarget a file mid-run.
field_name = "audio"
with pytest.raises(dataclasses.FrozenInstanceError):
setattr(stream_exec.SaveTargets(), field_name, Path("x.wav"))


# --- batch streaming (--from-stdin) validation -----------------------------
# Each conflict is rejected before stdin is read, so these raise without a pipe.
@pytest.mark.parametrize(
Expand Down Expand Up @@ -294,6 +302,35 @@ def test_save_audio_rejects_system_audio():
)


def test_save_audio_allows_system_audio_only(monkeypatch, tmp_path):
# --save-audio is rejected for the two-stream --system-audio, but --system-audio-only
# is a single stream, so it tees to the one explicit WAV like any other source.
config.set_api_key("default", "sk_live")
out = tmp_path / "rec.wav"

class FakeSystemAudio:
def __init__(self, *, on_open=None):
self.sample_rate = 16000

def __iter__(self):
return iter([RecordingMic.PCM])

def fake_stream_audio(api_key, source, *, params, **_kwargs):
assert b"".join(source) == RecordingMic.PCM

monkeypatch.setattr(stream_exec, "MacSystemAudioSource", FakeSystemAudio)
monkeypatch.setattr(stream_exec.client, "stream_audio", fake_stream_audio)

stream_exec.run_stream(
dataclasses.replace(DEFAULTS, save_audio=out, system_audio_only=True),
AppState(),
json_mode=True,
)

with wave.open(str(out), "rb") as w:
assert w.readframes(w.getnframes()) == RecordingMic.PCM


def test_save_audio_rejects_show_code():
# --show-code emits SDK code that doesn't tee audio, so the combo is rejected.
with pytest.raises(UsageError):
Expand Down Expand Up @@ -425,7 +462,6 @@ def test_save_dir_auto_names_transcript_and_matching_wav(monkeypatch, tmp_path):
[
{"save_dir": Path("rec"), "save_audio": Path("a.wav")}, # save-dir owns the audio name
{"save_dir": Path("rec"), "save_transcript": Path("a.txt")}, # ...and the transcript
{"save_dir": Path("rec"), "system_audio": True}, # two streams can't share one wav
{"name": "Standup"}, # --name without --save-dir is meaningless
],
)
Expand Down
92 changes: 92 additions & 0 deletions tests/test_stream_system_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import json
import types
import wave
from collections.abc import Callable
from datetime import datetime

from typer.testing import CliRunner

Expand All @@ -18,6 +20,21 @@
runner = CliRunner()


class _FixedDatetime:
"""Freezes datetime.now() so an auto-assembled --save-dir filename is deterministic."""

@staticmethod
def now(*_args, **_kwargs):
# Naive local wall-clock; _exec's .astimezone() keeps the same 14:30:05.
return datetime(2026, 6, 16, 14, 30, 5)


def _wav_frames(path):
"""The raw PCM frames written to a tee'd WAV, for asserting per-channel contents."""
with wave.open(str(path), "rb") as w:
return w.readframes(w.getnframes())


def _capture_source(seen):
def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None):
seen["source"] = source
Expand Down Expand Up @@ -397,3 +414,78 @@ def test_stream_show_code_rejects_system_audio():
result = runner.invoke(app, ["stream", "--system-audio", "--show-code"])
assert result.exit_code == 2
assert "--show-code" in result.output


def test_stream_system_audio_save_dir_writes_one_wav_per_channel(monkeypatch, tmp_path):
# --save-dir + --system-audio can't tee two streams into one WAV, so each channel
# gets its own <stem>-{you,system}.wav beside the single shared transcript.
config.set_api_key("default", "sk_live")
monkeypatch.setattr("aai_cli.commands.stream._exec.datetime", _FixedDatetime)

class FakeSystemAudio:
def __init__(self, *, on_open=None):
self.sample_rate = 16000

def __iter__(self):
return iter([b"\x10\x11\x12\x13"])

class FakeMic:
def __init__(self, *, target_rate=None, device=None, capture_rate=None, on_open=None):
self.sample_rate = 16000

def __iter__(self):
return iter([b"\x20\x21\x22\x23"])

def fake_stream_audio(api_key, source, *, params, on_turn=None, **_kwargs):
b"".join(source) # draining the tee'd generator is what writes the channel WAV
if on_turn:
on_turn(types.SimpleNamespace(transcript="hi", end_of_turn=True, speaker_label=None))

monkeypatch.setattr("aai_cli.commands.stream._exec.MacSystemAudioSource", FakeSystemAudio)
monkeypatch.setattr("aai_cli.commands.stream._exec.MicrophoneSource", FakeMic)
monkeypatch.setattr("aai_cli.commands.stream._exec.client.stream_audio", fake_stream_audio)

result = runner.invoke(
app,
["stream", "--system-audio", "--save-dir", str(tmp_path / "rec"), "--name", "Irma", "-j"],
)
assert result.exit_code == 0
bucket = tmp_path / "rec" / "2026-06-16"
assert _wav_frames(bucket / "2026-06-16-143005-irma-you.wav") == b"\x20\x21\x22\x23"
assert _wav_frames(bucket / "2026-06-16-143005-irma-system.wav") == b"\x10\x11\x12\x13"
# One shared transcript carries both channels' turns, each with its source prefix.
transcript = (bucket / "2026-06-16-143005-irma.txt").read_text(encoding="utf-8")
assert "You: hi" in transcript
assert "System: hi" in transcript


def test_stream_system_audio_only_save_dir_writes_one_labeled_wav(monkeypatch, tmp_path):
# A lone --system-audio-only stream saves to a single channel-labeled WAV (never the
# bare <stem>.wav a mic recording uses) and still never opens the microphone.
config.set_api_key("default", "sk_live")
monkeypatch.setattr("aai_cli.commands.stream._exec.datetime", _FixedDatetime)

class FakeSystemAudio:
def __init__(self, *, on_open=None):
self.sample_rate = 16000

def __iter__(self):
return iter([b"\x30\x31\x32\x33"])

def fail_mic(**_kwargs):
raise AssertionError("system-audio-only must not open the microphone")

def fake_stream_audio(api_key, source, *, params, **_kwargs):
b"".join(source)

monkeypatch.setattr("aai_cli.commands.stream._exec.MacSystemAudioSource", FakeSystemAudio)
monkeypatch.setattr("aai_cli.commands.stream._exec.MicrophoneSource", fail_mic)
monkeypatch.setattr("aai_cli.commands.stream._exec.client.stream_audio", fake_stream_audio)

result = runner.invoke(
app, ["stream", "--system-audio-only", "--save-dir", str(tmp_path / "rec"), "-j"]
)
assert result.exit_code == 0
bucket = tmp_path / "rec" / "2026-06-16"
assert _wav_frames(bucket / "2026-06-16-143005-system.wav") == b"\x30\x31\x32\x33"
assert not (bucket / "2026-06-16-143005.wav").exists()
9 changes: 9 additions & 0 deletions tests/test_streaming_naming.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@ def test_resolve_without_name_is_just_the_timestamp():
assert naming.resolve(Path("rec"), "!!!", now=NOW).transcript.name == "2026-06-16-143005.txt"


def test_channel_audio_inserts_channel_suffix_before_extension():
# --system-audio splits the auto-named WAV into one file per channel, keeping the
# date-bucketed stem and .wav extension and only appending the channel name.
audio = naming.resolve(Path("rec"), "Sync", now=NOW).audio
assert naming.channel_audio(audio, "you").name == "2026-06-16-143005-sync-you.wav"
assert naming.channel_audio(audio, "system").name == "2026-06-16-143005-sync-system.wav"
assert naming.channel_audio(audio, "you").parent == audio.parent # same date bucket


def test_ensure_dir_creates_nested_dirs(tmp_path):
target = tmp_path / "rec" / "2026-06-16"
naming.ensure_dir(target)
Expand Down
Loading