Skip to content

Commit 95effaf

Browse files
alexkromanclaude
andauthored
Support piped stdout in dictate: auto-start single utterance (#193)
Enable `assembly dictate` to work in pipelines by detecting when stdout is not a TTY and automatically recording a single utterance without requiring a toggle keystroke. ## Summary When `assembly dictate` is piped to another command (e.g., `assembly dictate | assembly llm "…"`), the downstream consumer blocks waiting for input while dictate idles in its interactive loop. This change detects piped stdout and switches to single-shot mode, which auto-starts recording and exits after one utterance so the transcript flows to the next stage. ## Key Changes - **Extract `_capture_and_transcribe()` helper**: Consolidates the record-and-transcribe logic previously duplicated in the session loop, reducing code duplication and enabling reuse for both interactive and single-shot modes. - **Add `single` parameter to `_session()`**: Controls whether to auto-start one utterance (piped or `--once`) or enter the interactive idle-toggle loop. The docstring clarifies the two modes and their use cases. - **Detect piped stdout in `run_dictate()`**: Import `stdio` module and call `stdio.stdout_is_tty()` to determine if stdout is a pipe. Set `single = opts.once or not stdio.stdout_is_tty()` to enable single-shot mode for both `--once` flag and piped scenarios. - **Conditional start prompt**: Only show the interactive "Press Enter to start recording…" prompt when in interactive mode (`not single`), since single-shot mode announces "● Recording" when the mic opens. - **Update help text and examples**: - Clarify `--once` help: "Record one utterance immediately, then exit" - Expand command docstring to document piped and `--once` behavior - Add example: `assembly dictate | assembly llm "write a conventional commit"` - **Test coverage**: Add `test_piped_stdout_auto_starts_one_utterance_then_exits()` to verify that piped stdout triggers single-shot mode, auto-starts recording, and exits after one utterance. Mock `stdio.stdout_is_tty()` to return `False` and verify the session reads no blocking idle key (only the zero-timeout in-recording poll). ## Implementation Details - The `stdio` module is imported alongside `sync_stt` in `_exec.py` to check TTY status. - Test seams mock `stdio.stdout_is_tty()` to default to `True` (interactive), preventing capsys from forcing single-utterance mode in unrelated tests. - The single-shot path calls `_capture_and_transcribe()` once and returns, while the interactive path loops until a quit key or `--once` flag. https://claude.ai/code/session_01KchiKPHFyhKBpQf6QkeyfT Co-authored-by: Claude <noreply@anthropic.com>
1 parent 3e18379 commit 95effaf

4 files changed

Lines changed: 88 additions & 24 deletions

File tree

aai_cli/commands/dictate/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
[
2525
("Dictate: Enter starts a recording, Enter transcribes it", "assembly dictate"),
2626
("One utterance, then exit", "assembly dictate --once"),
27+
(
28+
"Pipe one utterance into another command",
29+
'assembly dictate | assembly llm "write a conventional commit"',
30+
),
2731
("Dictate in Spanish", "assembly dictate --language es"),
2832
(
2933
"Bias recognition toward tricky terms",
@@ -51,7 +55,7 @@ def dictate(
5155
None, "--word-boost", help="Bias recognition toward a term (repeatable)"
5256
),
5357
device: int | None = typer.Option(None, "--device", help="Microphone device index"),
54-
once: bool = typer.Option(False, "--once", help="Transcribe one utterance, then exit"),
58+
once: bool = typer.Option(False, "--once", help="Record one utterance immediately, then exit"),
5559
max_seconds: float = typer.Option(
5660
float(MAX_AUDIO_SECONDS),
5761
"--max-seconds",
@@ -72,7 +76,9 @@ def dictate(
7276
Press Enter (or Space) to start recording and press it again to stop; the
7377
utterance is sent to the AssemblyAI Sync API and the transcript prints
7478
immediately — no polling. Press q (or Esc/Ctrl-C) to finish. Each utterance
75-
can be up to 120 seconds long.
79+
can be up to 120 seconds long. With --once, or when stdout is piped,
80+
recording starts immediately and dictate exits after one utterance so the
81+
transcript flows to the next command.
7682
"""
7783
opts = dictate_exec.DictateOptions(
7884
language=language,

aai_cli/commands/dictate/_exec.py

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from dataclasses import dataclass
1414

1515
from aai_cli.app.context import AppState
16-
from aai_cli.core import choices, sync_stt
16+
from aai_cli.core import choices, stdio, sync_stt
1717
from aai_cli.core.config_builder import split_csv
1818
from aai_cli.core.hotkey import CTRL_C, CTRL_D, ESC, TerminalKeys
1919
from aai_cli.core.microphone import MicrophoneSource
@@ -138,32 +138,52 @@ def _transcribe_utterance(
138138
_emit(result, json_mode=json_mode)
139139

140140

141+
def _capture_and_transcribe(
142+
keys: TerminalKeys,
143+
api_key: str,
144+
opts: DictateOptions,
145+
state: AppState,
146+
*,
147+
json_mode: bool,
148+
) -> None:
149+
"""Record one utterance from the mic and print its transcript."""
150+
mic = MicrophoneSource(
151+
target_rate=TARGET_RATE,
152+
device=opts.device,
153+
on_open=lambda: _note(
154+
"● Recording — press Enter to stop.", json_mode=json_mode, quiet=state.quiet
155+
),
156+
)
157+
pcm = _record(keys, mic, max_seconds=opts.max_seconds)
158+
_transcribe_utterance(api_key, pcm, opts, state, json_mode=json_mode)
159+
160+
141161
def _session(
142162
keys: TerminalKeys,
143163
api_key: str,
144164
opts: DictateOptions,
145165
state: AppState,
146166
*,
147167
json_mode: bool,
168+
single: bool,
148169
) -> None:
149-
"""The dictation loop: idle until a toggle key, record, transcribe, repeat."""
170+
"""Drive recording: one auto-started utterance, or the idle-toggle loop.
171+
172+
``single`` (a piped stdout or --once) starts recording immediately so a
173+
one-off capture takes a single keystroke to stop and then exits — which
174+
closes a piped stdout and unblocks the downstream command. Otherwise it's
175+
the interactive loop: idle until a toggle key, record, transcribe, repeat.
176+
"""
177+
if single:
178+
_capture_and_transcribe(keys, api_key, opts, state, json_mode=json_mode)
179+
return
150180
while True:
151181
key = keys.read(None)
152182
if key is None or key in QUIT_KEYS:
153183
return
154184
if key not in TOGGLE_KEYS:
155185
continue
156-
mic = MicrophoneSource(
157-
target_rate=TARGET_RATE,
158-
device=opts.device,
159-
on_open=lambda: _note(
160-
"● Recording — press Enter to stop.", json_mode=json_mode, quiet=state.quiet
161-
),
162-
)
163-
pcm = _record(keys, mic, max_seconds=opts.max_seconds)
164-
_transcribe_utterance(api_key, pcm, opts, state, json_mode=json_mode)
165-
if opts.once:
166-
return
186+
_capture_and_transcribe(keys, api_key, opts, state, json_mode=json_mode)
167187

168188

169189
def run_dictate(opts: DictateOptions, state: AppState, *, json_mode: bool) -> None:
@@ -187,12 +207,20 @@ def run_dictate(opts: DictateOptions, state: AppState, *, json_mode: bool) -> No
187207
"state the language inside the prompt.",
188208
json_mode=json_mode,
189209
)
190-
_note(
191-
"Press Enter to start recording, Enter again to transcribe. q quits.",
192-
json_mode=json_mode,
193-
quiet=state.quiet,
194-
)
195-
_session(keys, api_key, opts, state, json_mode=json_mode)
210+
# A piped stdout (`assembly dictate | assembly llm …`) only closes when
211+
# dictate exits, so a looping session would keep the downstream consumer
212+
# blocked on stdin forever. Single-shot mode (piped or --once) records
213+
# one utterance and exits so the transcript drains to the next stage.
214+
single = opts.once or not stdio.stdout_is_tty()
215+
if not single:
216+
# Only the interactive loop needs a start prompt; single-shot
217+
# auto-starts and announces "● Recording" when the mic opens.
218+
_note(
219+
"Press Enter to start recording, Enter again to transcribe. q quits.",
220+
json_mode=json_mode,
221+
quiet=state.quiet,
222+
)
223+
_session(keys, api_key, opts, state, json_mode=json_mode, single=single)
196224
except KeyboardInterrupt:
197225
# Ctrl-C is the normal "done dictating" signal: end cleanly, not as an error.
198226
return

tests/__snapshots__/test_snapshots_help_run.ambr

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,9 @@
353353
Press Enter (or Space) to start recording and press it again to stop; the
354354
utterance is sent to the AssemblyAI Sync API and the transcript prints
355355
immediately — no polling. Press q (or Esc/Ctrl-C) to finish. Each utterance
356-
can be up to 120 seconds long.
356+
can be up to 120 seconds long. With --once, or when stdout is piped,
357+
recording starts immediately and dictate exits after one utterance so the
358+
transcript flows to the next command.
357359

358360
╭─ Options ────────────────────────────────────────────────────────────────────╮
359361
│ --language TEXT ISO 639-1 language code, │
@@ -366,8 +368,8 @@
366368
│ --word-boost TEXT Bias recognition toward a │
367369
│ term (repeatable) │
368370
│ --device INTEGER Microphone device index │
369-
│ --once Transcribe one utterance,
370-
│ then exit
371+
│ --once Record one utterance
372+
immediately, then exit │
371373
│ --max-seconds FLOAT RANGE Auto-stop a recording │
372374
│ [1.0<=x<=120.0] after this many seconds │
373375
│ [default: 120.0] │
@@ -386,6 +388,8 @@
386388
$ assembly dictate
387389
One utterance, then exit
388390
$ assembly dictate --once
391+
Pipe one utterance into another command
392+
$ assembly dictate | assembly llm "write a conventional commit"
389393
Dictate in Spanish
390394
$ assembly dictate --language es
391395
Bias recognition toward tricky terms

tests/test_dictate_exec.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ def seams(monkeypatch):
7070
harness = {"keys": FakeKeys([]), "chunks": [CHUNK, CHUNK], "mic": {}, "calls": []}
7171

7272
monkeypatch.setattr(dictate_exec, "TerminalKeys", lambda: harness["keys"])
73+
# Default to interactive stdout (a real terminal); the piped tests flip this.
74+
# capsys leaves stdout a non-tty, which would otherwise force single-utterance
75+
# mode and end every looping session after one utterance.
76+
monkeypatch.setattr(dictate_exec.stdio, "stdout_is_tty", lambda: True)
7377

7478
def fake_mic(*, target_rate, device=None, on_open=None):
7579
harness["mic"].update(target_rate=target_rate, device=device)
@@ -189,6 +193,28 @@ def test_once_exits_after_a_single_utterance(seams):
189193
assert seams["keys"].script
190194

191195

196+
def test_piped_stdout_auto_starts_one_utterance_then_exits(seams, monkeypatch, capsys):
197+
# `assembly dictate | assembly llm …`: stdout is a pipe, not a tty. A looping
198+
# session would keep the pipe open and hang the consumer, so recording
199+
# auto-starts, the first Enter stops it, and the session exits on its own.
200+
monkeypatch.setattr(dictate_exec.stdio, "stdout_is_tty", lambda: False)
201+
# No leading toggle to *start* and no quit key: a single read(0) pops the
202+
# Enter that stops the auto-started recording, then dictate exits.
203+
seams["keys"] = FakeKeys(["\r", "\r", "\r"])
204+
_run()
205+
assert len(seams["calls"]) == 1
206+
# Ended on the single-shot, not by draining the key script.
207+
assert seams["keys"].script
208+
# Auto-start: the only key read is the zero-timeout in-recording poll — no
209+
# blocking idle read(None) waiting for a start keypress.
210+
assert seams["keys"].timeouts == [0]
211+
captured = capsys.readouterr()
212+
assert captured.out.strip() == "hello world"
213+
# The mic-open note fires immediately; the interactive start prompt is absent.
214+
assert "Recording — press Enter to stop" in captured.err
215+
assert "start recording" not in captured.err
216+
217+
192218
@pytest.mark.parametrize("quit_key", ["q", "Q", "\x1b", "\x04"])
193219
def test_quit_keys_end_the_session_without_recording(seams, quit_key, capsys):
194220
seams["keys"] = FakeKeys([quit_key, "\r", "\r"])

0 commit comments

Comments
 (0)