From 35cc52e557153154e454cf56d53268d831c44650 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 16 Jun 2026 22:28:31 +0000 Subject: [PATCH] Save one WAV per channel for stream --save-dir --system-audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --save-dir --system-audio previously errored ("the mic and system streams can't share one recording") because the parallel mic + system streams can't tee to a single WAV. Instead of refusing, --save-dir now writes one file per channel — -you.wav and -system.wav — beside the single shared transcript, so a meeting recorder gets both sides captured. --system-audio-only saves its lone stream to -system.wav, and explicit --save-audio is now also allowed with --system-audio-only (a single stream, one file) while still rejected for the two-stream --system-audio. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01DKJdMMr8hdLEiBnZWS9A3R --- aai_cli/commands/stream/__init__.py | 2 +- aai_cli/commands/stream/_exec.py | 66 ++++++++----- .../aai-cli/references/transcription.md | 3 +- aai_cli/streaming/naming.py | 10 ++ aai_cli/streaming/session.py | 25 ++++- .../test_snapshots_help_run.ambr | 3 +- tests/test_stream_exec.py | 38 +++++++- tests/test_stream_system_audio.py | 92 +++++++++++++++++++ tests/test_streaming_naming.py | 9 ++ 9 files changed, 218 insertions(+), 30 deletions(-) diff --git a/aai_cli/commands/stream/__init__.py b/aai_cli/commands/stream/__init__.py index 7fb00569..fcfeb5d4 100644 --- a/aai_cli/commands/stream/__init__.py +++ b/aai_cli/commands/stream/__init__.py @@ -111,7 +111,7 @@ def stream( None, "--save-dir", help="Auto-name the transcript and a matching WAV under DIR/YYYY-MM-DD/ " - "with a timestamped file", + "with a timestamped file; --system-audio saves one WAV per channel", rich_help_panel=help_panels.OPT_SAVING, file_okay=False, ), diff --git a/aai_cli/commands/stream/_exec.py b/aai_cli/commands/stream/_exec.py index c0bc8b43..fc5c2743 100644 --- a/aai_cli/commands/stream/_exec.py +++ b/aai_cli/commands/stream/_exec.py @@ -198,16 +198,30 @@ def _reject_save_with_show_code(opts: StreamOptions) -> None: ) -def _resolve_save_targets( - opts: StreamOptions, sources: SourceOptions -) -> tuple[Path | None, Path | None]: - """Resolve the save flags into the (audio, transcript) paths the session writes. +@dataclass(frozen=True) +class SaveTargets: + """Resolved save destinations for one streaming run. + + ``audio`` tees a single source to one WAV; ``audio_by_label`` instead maps each + parallel ``--system-audio`` channel ("you", "system") to its own WAV when the two + streams can't share a file. At most one of the two is set; ``transcript`` is the + single shared transcript either way. + """ + + transcript: Path | None = None + audio: Path | None = None + audio_by_label: dict[str, Path] | None = None + - ``--save-dir`` owns filename assembly — it auto-names both the transcript and a - matching WAV under ``DIR/YYYY-MM-DD/`` — so it can't be combined with the explicit +def _resolve_save_targets(opts: StreamOptions, sources: SourceOptions) -> SaveTargets: + """Resolve the save flags into the destinations the session writes. + + ``--save-dir`` owns filename assembly — it auto-names the transcript and a matching + WAV under ``DIR/YYYY-MM-DD/`` — so it can't be combined with the explicit ``--save-audio``/``--save-transcript`` paths, and ``--name`` only feeds that assembly. - Audio can't tee to a single WAV under ``--system-audio`` (two streams), which rejects - both the explicit ``--save-audio`` and ``--save-dir``'s audio leg. + Two parallel ``--system-audio`` streams can't tee to one WAV, so under ``--save-dir`` + each channel gets its own ``-{you,system}.wav`` (one shared transcript), and the + explicit single-path ``--save-audio`` is rejected outright. """ if opts.save_dir is not None: mutually_exclusive( @@ -216,18 +230,26 @@ def _resolve_save_targets( ("--save-transcript", opts.save_transcript is not None), suggestion="--save-dir names the files for you; drop the explicit path.", ) - if sources.from_system_audio: - raise UsageError( - "--save-dir cannot be combined with --system-audio; the mic and system " - "streams can't share one recording.", - suggestion="Record a single source (mic, file, URL, or - on stdin).", - ) # Local wall-clock time (what a meeting filename wants); the explicit utc-then- # astimezone keeps the now() call timezone-aware for the linter. now = datetime.now(UTC).astimezone() paths = naming.resolve(opts.save_dir, opts.name, now=now) naming.ensure_dir(paths.transcript.parent) - return paths.audio, paths.transcript + if sources.system_audio: + # Parallel mic + system: one WAV per channel beside the shared transcript. + return SaveTargets( + transcript=paths.transcript, + audio_by_label={ + "you": naming.channel_audio(paths.audio, "you"), + "system": naming.channel_audio(paths.audio, "system"), + }, + ) + if sources.system_audio_only: + # A lone system-audio stream; label its single WAV so it reads like the pair. + return SaveTargets( + transcript=paths.transcript, audio=naming.channel_audio(paths.audio, "system") + ) + return SaveTargets(transcript=paths.transcript, audio=paths.audio) if opts.name is not None: raise UsageError( "--name applies only with --save-dir.", @@ -235,16 +257,17 @@ def _resolve_save_targets( "or --save-transcript PATH for an explicit path.", ) if opts.save_audio is not None: - if sources.from_system_audio: + if sources.system_audio: raise UsageError( "--save-audio cannot be combined with --system-audio; the mic and system " "streams can't share one file.", - suggestion="Record a single source (mic, file, URL, or - on stdin).", + suggestion="Pass --save-dir DIR to save one WAV per channel, " + "or record a single source.", ) record.validate_target(opts.save_audio) if opts.save_transcript is not None: transcript.validate_target(opts.save_transcript) - return opts.save_audio, opts.save_transcript + return SaveTargets(transcript=opts.save_transcript, audio=opts.save_audio) def _dispatch(session: StreamSession, opts: SourceOptions) -> None: @@ -389,7 +412,7 @@ def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None # Validate the requested sources (including that a local file exists) before # credentials, so a typo'd path reads as "file not found" — not as a login. validate_sources(sources, has_llm=bool(opts.llm_prompt), text_mode=text_mode) - save_audio, save_transcript = _resolve_save_targets(opts, sources) + targets = _resolve_save_targets(opts, sources) if sources.from_file and not sources.from_stdin: client.resolve_audio_source(sources.source, sample=sources.sample) api_key = state.resolve_api_key() @@ -405,8 +428,9 @@ def run_stream(opts: StreamOptions, state: AppState, *, json_mode: bool) -> None llm_prompts=llm_prompts, model=opts.model, max_tokens=opts.max_tokens, - save_audio=save_audio, - save_transcript=save_transcript, + save_audio=targets.audio, + save_audio_by_label=targets.audio_by_label, + save_transcript=targets.transcript, llm_interval=opts.llm_interval, ) _dispatch(session, sources) diff --git a/aai_cli/skills/aai-cli/references/transcription.md b/aai_cli/skills/aai-cli/references/transcription.md index f0c2ec56..0b107432 100644 --- a/aai_cli/skills/aai-cli/references/transcription.md +++ b/aai_cli/skills/aai-cli/references/transcription.md @@ -43,7 +43,8 @@ assembly transcribe call.mp3 --show-code Omit `SOURCE` to use the microphone; pass a file/URL/media page to stream that, or `--sample`. macOS can capture system audio with `--system-audio` (mic + system) -or `--system-audio-only`. +or `--system-audio-only`. With `--save-dir`, `--system-audio` writes one WAV per +channel (`-you.wav`, `-system.wav`) beside the shared transcript. High-value flags (run `assembly stream --help` for the full set): diff --git a/aai_cli/streaming/naming.py b/aai_cli/streaming/naming.py index 24a4d4d7..c1d2fc13 100644 --- a/aai_cli/streaming/naming.py +++ b/aai_cli/streaming/naming.py @@ -61,6 +61,16 @@ def resolve(save_dir: Path, name: str | None, *, now: datetime) -> SavePaths: return SavePaths(transcript=bucket / f"{stem}.txt", audio=bucket / f"{stem}.wav") +def channel_audio(audio: Path, channel: str) -> Path: + """Insert a per-channel suffix into an auto-named WAV path. + + ``--system-audio`` records two parallel streams that can't share one WAV, so each + channel ("you", "system") gets its own file beside the shared transcript: the base + ``DIR/.../.wav`` becomes ``DIR/.../-.wav``. + """ + return audio.with_name(f"{audio.stem}-{channel}{audio.suffix}") + + def ensure_dir(path: Path) -> None: """Create ``path`` (and parents) for the auto-named files, as a clean CLIError on failure.""" try: diff --git a/aai_cli/streaming/session.py b/aai_cli/streaming/session.py index 71d0ce67..2513b5e4 100644 --- a/aai_cli/streaming/session.py +++ b/aai_cli/streaming/session.py @@ -154,11 +154,15 @@ class StreamSession: llm_prompts: list[str] model: str max_tokens: int - # When set, tee the streamed PCM to this path as a WAV (see record.tee_wav). Only - # the single-source path sets it — the parallel/batch callers reject --save-audio. + # When set, tee the streamed PCM to this path as a WAV (see record.tee_wav). The + # single-source path sets it; the batch (--from-stdin) caller rejects --save-audio. save_audio: Path | None = None + # --system-audio runs two parallel streams that can't share one WAV, so each is teed + # to its own file keyed by source label ("you", "system"). Set instead of save_audio + # for that path; a label with no entry (or no save flag) tees nowhere. + save_audio_by_label: dict[str, Path] | None = None # When set, write each finalized turn to this path as plain text (see TranscriptWriter). - # Like save_audio, only the single-source path sets it; batch rejects --save-transcript. + # One shared transcript even under --system-audio (both channels); batch rejects it. save_transcript: Path | None = None # Seconds between --llm summary refreshes; <=0 re-runs the chain on every turn. llm_interval: float = 0.0 @@ -275,12 +279,23 @@ def _maybe_summarize(self, *, final: bool = False) -> None: return follow(answer, turns) + def _audio_target(self, source_label: str | None) -> Path | None: + """The WAV path this source tees to, if any. + + ``--system-audio`` records two channels to two files (``save_audio_by_label``); + every other run tees its single source to ``save_audio``. + """ + if self.save_audio_by_label is not None: + return self.save_audio_by_label.get(source_label or "") + return self.save_audio + def stream_one( self, audio: Iterable[bytes], rate: int, *, source_label: str | None = None ) -> None: - if self.save_audio is not None: + target = self._audio_target(source_label) + if target is not None: # Tee verbatim to disk at the source's true rate before it hits the wire. - audio = record.tee_wav(audio, self.save_audio, rate=rate) + audio = record.tee_wav(audio, target, rate=rate) flags = self.base_flags | {"sample_rate": rate} if source_label == "you": # The microphone captures you alone, so never diarize it into separate diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr index ffc0ae1a..7534b1f8 100644 --- a/tests/__snapshots__/test_snapshots_help_run.ambr +++ b/tests/__snapshots__/test_snapshots_help_run.ambr @@ -758,7 +758,8 @@ │ one turn per line │ │ --save-dir DIRECTORY Auto-name the transcript and a matching │ │ WAV under DIR/YYYY-MM-DD/ with a │ - │ timestamped file │ + │ timestamped file; --system-audio saves │ + │ one WAV per channel │ │ --name TEXT Title to slug into the --save-dir │ │ filename (e.g. a meeting title) │ ╰──────────────────────────────────────────────────────────────────────────────╯ diff --git a/tests/test_stream_exec.py b/tests/test_stream_exec.py index 47af253a..d4cf5bb4 100644 --- a/tests/test_stream_exec.py +++ b/tests/test_stream_exec.py @@ -165,6 +165,14 @@ def test_stream_options_are_immutable(): setattr(DEFAULTS, field_name, True) +def test_save_targets_are_immutable(): + # The resolved save destinations are a frozen carrier (like StreamOptions), so a + # later step can't quietly retarget a file mid-run. + field_name = "audio" + with pytest.raises(dataclasses.FrozenInstanceError): + setattr(stream_exec.SaveTargets(), field_name, Path("x.wav")) + + # --- batch streaming (--from-stdin) validation ----------------------------- # Each conflict is rejected before stdin is read, so these raise without a pipe. @pytest.mark.parametrize( @@ -294,6 +302,35 @@ def test_save_audio_rejects_system_audio(): ) +def test_save_audio_allows_system_audio_only(monkeypatch, tmp_path): + # --save-audio is rejected for the two-stream --system-audio, but --system-audio-only + # is a single stream, so it tees to the one explicit WAV like any other source. + config.set_api_key("default", "sk_live") + out = tmp_path / "rec.wav" + + class FakeSystemAudio: + def __init__(self, *, on_open=None): + self.sample_rate = 16000 + + def __iter__(self): + return iter([RecordingMic.PCM]) + + def fake_stream_audio(api_key, source, *, params, **_kwargs): + assert b"".join(source) == RecordingMic.PCM + + monkeypatch.setattr(stream_exec, "MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr(stream_exec.client, "stream_audio", fake_stream_audio) + + stream_exec.run_stream( + dataclasses.replace(DEFAULTS, save_audio=out, system_audio_only=True), + AppState(), + json_mode=True, + ) + + with wave.open(str(out), "rb") as w: + assert w.readframes(w.getnframes()) == RecordingMic.PCM + + def test_save_audio_rejects_show_code(): # --show-code emits SDK code that doesn't tee audio, so the combo is rejected. with pytest.raises(UsageError): @@ -425,7 +462,6 @@ def test_save_dir_auto_names_transcript_and_matching_wav(monkeypatch, tmp_path): [ {"save_dir": Path("rec"), "save_audio": Path("a.wav")}, # save-dir owns the audio name {"save_dir": Path("rec"), "save_transcript": Path("a.txt")}, # ...and the transcript - {"save_dir": Path("rec"), "system_audio": True}, # two streams can't share one wav {"name": "Standup"}, # --name without --save-dir is meaningless ], ) diff --git a/tests/test_stream_system_audio.py b/tests/test_stream_system_audio.py index d47bdc6e..8ffecfa6 100644 --- a/tests/test_stream_system_audio.py +++ b/tests/test_stream_system_audio.py @@ -7,7 +7,9 @@ import json import types +import wave from collections.abc import Callable +from datetime import datetime from typer.testing import CliRunner @@ -18,6 +20,21 @@ runner = CliRunner() +class _FixedDatetime: + """Freezes datetime.now() so an auto-assembled --save-dir filename is deterministic.""" + + @staticmethod + def now(*_args, **_kwargs): + # Naive local wall-clock; _exec's .astimezone() keeps the same 14:30:05. + return datetime(2026, 6, 16, 14, 30, 5) + + +def _wav_frames(path): + """The raw PCM frames written to a tee'd WAV, for asserting per-channel contents.""" + with wave.open(str(path), "rb") as w: + return w.readframes(w.getnframes()) + + def _capture_source(seen): def fake(api_key, source, *, params, on_begin=None, on_turn=None, on_termination=None): seen["source"] = source @@ -397,3 +414,78 @@ def test_stream_show_code_rejects_system_audio(): result = runner.invoke(app, ["stream", "--system-audio", "--show-code"]) assert result.exit_code == 2 assert "--show-code" in result.output + + +def test_stream_system_audio_save_dir_writes_one_wav_per_channel(monkeypatch, tmp_path): + # --save-dir + --system-audio can't tee two streams into one WAV, so each channel + # gets its own -{you,system}.wav beside the single shared transcript. + config.set_api_key("default", "sk_live") + monkeypatch.setattr("aai_cli.commands.stream._exec.datetime", _FixedDatetime) + + class FakeSystemAudio: + def __init__(self, *, on_open=None): + self.sample_rate = 16000 + + def __iter__(self): + return iter([b"\x10\x11\x12\x13"]) + + class FakeMic: + def __init__(self, *, target_rate=None, device=None, capture_rate=None, on_open=None): + self.sample_rate = 16000 + + def __iter__(self): + return iter([b"\x20\x21\x22\x23"]) + + def fake_stream_audio(api_key, source, *, params, on_turn=None, **_kwargs): + b"".join(source) # draining the tee'd generator is what writes the channel WAV + if on_turn: + on_turn(types.SimpleNamespace(transcript="hi", end_of_turn=True, speaker_label=None)) + + monkeypatch.setattr("aai_cli.commands.stream._exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.commands.stream._exec.MicrophoneSource", FakeMic) + monkeypatch.setattr("aai_cli.commands.stream._exec.client.stream_audio", fake_stream_audio) + + result = runner.invoke( + app, + ["stream", "--system-audio", "--save-dir", str(tmp_path / "rec"), "--name", "Irma", "-j"], + ) + assert result.exit_code == 0 + bucket = tmp_path / "rec" / "2026-06-16" + assert _wav_frames(bucket / "2026-06-16-143005-irma-you.wav") == b"\x20\x21\x22\x23" + assert _wav_frames(bucket / "2026-06-16-143005-irma-system.wav") == b"\x10\x11\x12\x13" + # One shared transcript carries both channels' turns, each with its source prefix. + transcript = (bucket / "2026-06-16-143005-irma.txt").read_text(encoding="utf-8") + assert "You: hi" in transcript + assert "System: hi" in transcript + + +def test_stream_system_audio_only_save_dir_writes_one_labeled_wav(monkeypatch, tmp_path): + # A lone --system-audio-only stream saves to a single channel-labeled WAV (never the + # bare .wav a mic recording uses) and still never opens the microphone. + config.set_api_key("default", "sk_live") + monkeypatch.setattr("aai_cli.commands.stream._exec.datetime", _FixedDatetime) + + class FakeSystemAudio: + def __init__(self, *, on_open=None): + self.sample_rate = 16000 + + def __iter__(self): + return iter([b"\x30\x31\x32\x33"]) + + def fail_mic(**_kwargs): + raise AssertionError("system-audio-only must not open the microphone") + + def fake_stream_audio(api_key, source, *, params, **_kwargs): + b"".join(source) + + monkeypatch.setattr("aai_cli.commands.stream._exec.MacSystemAudioSource", FakeSystemAudio) + monkeypatch.setattr("aai_cli.commands.stream._exec.MicrophoneSource", fail_mic) + monkeypatch.setattr("aai_cli.commands.stream._exec.client.stream_audio", fake_stream_audio) + + result = runner.invoke( + app, ["stream", "--system-audio-only", "--save-dir", str(tmp_path / "rec"), "-j"] + ) + assert result.exit_code == 0 + bucket = tmp_path / "rec" / "2026-06-16" + assert _wav_frames(bucket / "2026-06-16-143005-system.wav") == b"\x30\x31\x32\x33" + assert not (bucket / "2026-06-16-143005.wav").exists() diff --git a/tests/test_streaming_naming.py b/tests/test_streaming_naming.py index a2d5bf48..7f135a25 100644 --- a/tests/test_streaming_naming.py +++ b/tests/test_streaming_naming.py @@ -46,6 +46,15 @@ def test_resolve_without_name_is_just_the_timestamp(): assert naming.resolve(Path("rec"), "!!!", now=NOW).transcript.name == "2026-06-16-143005.txt" +def test_channel_audio_inserts_channel_suffix_before_extension(): + # --system-audio splits the auto-named WAV into one file per channel, keeping the + # date-bucketed stem and .wav extension and only appending the channel name. + audio = naming.resolve(Path("rec"), "Sync", now=NOW).audio + assert naming.channel_audio(audio, "you").name == "2026-06-16-143005-sync-you.wav" + assert naming.channel_audio(audio, "system").name == "2026-06-16-143005-sync-system.wav" + assert naming.channel_audio(audio, "you").parent == audio.parent # same date bucket + + def test_ensure_dir_creates_nested_dirs(tmp_path): target = tmp_path / "rec" / "2026-06-16" naming.ensure_dir(target)