Support piped stdout in dictate: auto-start single utterance (#193)

alexkroman · claude · web-flow · commit 95effaf6f97a · 2026-06-16T22:41:30.000Z
Enable `assembly dictate` to work in pipelines by detecting when stdout is not a TTY and automatically recording a single utterance without requiring a toggle keystroke. ## Summary When `assembly dictate` is piped to another command (e.g., `assembly dictate | assembly llm "…"`), the downstream consumer blocks waiting for input while dictate idles in its interactive loop. This change detects piped stdout and switches to single-shot mode, which auto-starts recording and exits after one utterance so the transcript flows to the next stage. ## Key Changes - **Extract `_capture_and_transcribe()` helper**: Consolidates the record-and-transcribe logic previously duplicated in the session loop, reducing code duplication and enabling reuse for both interactive and single-shot modes. - **Add `single` parameter to `_session()`**: Controls whether to auto-start one utterance (piped or `--once`) or enter the interactive idle-toggle loop. The docstring clarifies the two modes and their use cases. - **Detect piped stdout in `run_dictate()`**: Import `stdio` module and call `stdio.stdout_is_tty()` to determine if stdout is a pipe. Set `single = opts.once or not stdio.stdout_is_tty()` to enable single-shot mode for both `--once` flag and piped scenarios. - **Conditional start prompt**: Only show the interactive "Press Enter to start recording…" prompt when in interactive mode (`not single`), since single-shot mode announces "● Recording" when the mic opens. - **Update help text and examples**: - Clarify `--once` help: "Record one utterance immediately, then exit" - Expand command docstring to document piped and `--once` behavior - Add example: `assembly dictate | assembly llm "write a conventional commit"` - **Test coverage**: Add `test_piped_stdout_auto_starts_one_utterance_then_exits()` to verify that piped stdout triggers single-shot mode, auto-starts recording, and exits after one utterance. Mock `stdio.stdout_is_tty()` to return `False` and verify the session reads no blocking idle key (only the zero-timeout in-recording poll). ## Implementation Details - The `stdio` module is imported alongside `sync_stt` in `_exec.py` to check TTY status. - Test seams mock `stdio.stdout_is_tty()` to default to `True` (interactive), preventing capsys from forcing single-utterance mode in unrelated tests. - The single-shot path calls `_capture_and_transcribe()` once and returns, while the interactive path loops until a quit key or `--once` flag. https://claude.ai/code/session_01KchiKPHFyhKBpQf6QkeyfT Co-authored-by: Claude <noreply@anthropic.com>
diff --git a/aai_cli/commands/dictate/__init__.py b/aai_cli/commands/dictate/__init__.py
@@ -24,6 +24,10 @@
         [
             ("Dictate: Enter starts a recording, Enter transcribes it", "assembly dictate"),
             ("One utterance, then exit", "assembly dictate --once"),
+            (
+                "Pipe one utterance into another command",
+                'assembly dictate | assembly llm "write a conventional commit"',
+            ),
             ("Dictate in Spanish", "assembly dictate --language es"),
             (
                 "Bias recognition toward tricky terms",
@@ -51,7 +55,7 @@ def dictate(
         None, "--word-boost", help="Bias recognition toward a term (repeatable)"
     ),
     device: int | None = typer.Option(None, "--device", help="Microphone device index"),
-    once: bool = typer.Option(False, "--once", help="Transcribe one utterance, then exit"),
+    once: bool = typer.Option(False, "--once", help="Record one utterance immediately, then exit"),
     max_seconds: float = typer.Option(
         float(MAX_AUDIO_SECONDS),
         "--max-seconds",
@@ -72,7 +76,9 @@ def dictate(
     Press Enter (or Space) to start recording and press it again to stop; the
     utterance is sent to the AssemblyAI Sync API and the transcript prints
     immediately — no polling. Press q (or Esc/Ctrl-C) to finish. Each utterance
-    can be up to 120 seconds long.
+    can be up to 120 seconds long. With --once, or when stdout is piped,
+    recording starts immediately and dictate exits after one utterance so the
+    transcript flows to the next command.
     """
     opts = dictate_exec.DictateOptions(
         language=language,
diff --git a/aai_cli/commands/dictate/_exec.py b/aai_cli/commands/dictate/_exec.py
@@ -13,7 +13,7 @@
 from dataclasses import dataclass
 
 from aai_cli.app.context import AppState
-from aai_cli.core import choices, sync_stt
+from aai_cli.core import choices, stdio, sync_stt
 from aai_cli.core.config_builder import split_csv
 from aai_cli.core.hotkey import CTRL_C, CTRL_D, ESC, TerminalKeys
 from aai_cli.core.microphone import MicrophoneSource
@@ -138,32 +138,52 @@ def _transcribe_utterance(
     _emit(result, json_mode=json_mode)
 
 
+def _capture_and_transcribe(
+    keys: TerminalKeys,
+    api_key: str,
+    opts: DictateOptions,
+    state: AppState,
+    *,
+    json_mode: bool,
+) -> None:
+    """Record one utterance from the mic and print its transcript."""
+    mic = MicrophoneSource(
+        target_rate=TARGET_RATE,
+        device=opts.device,
+        on_open=lambda: _note(
+            "● Recording — press Enter to stop.", json_mode=json_mode, quiet=state.quiet
+        ),
+    )
+    pcm = _record(keys, mic, max_seconds=opts.max_seconds)
+    _transcribe_utterance(api_key, pcm, opts, state, json_mode=json_mode)
+
+
 def _session(
     keys: TerminalKeys,
     api_key: str,
     opts: DictateOptions,
     state: AppState,
     *,
     json_mode: bool,
+    single: bool,
 ) -> None:
-    """The dictation loop: idle until a toggle key, record, transcribe, repeat."""
+    """Drive recording: one auto-started utterance, or the idle-toggle loop.
+
+    ``single`` (a piped stdout or --once) starts recording immediately so a
+    one-off capture takes a single keystroke to stop and then exits — which
+    closes a piped stdout and unblocks the downstream command. Otherwise it's
+    the interactive loop: idle until a toggle key, record, transcribe, repeat.
+    """
+    if single:
+        _capture_and_transcribe(keys, api_key, opts, state, json_mode=json_mode)
+        return
     while True:
         key = keys.read(None)
         if key is None or key in QUIT_KEYS:
             return
         if key not in TOGGLE_KEYS:
             continue
-        mic = MicrophoneSource(
-            target_rate=TARGET_RATE,
-            device=opts.device,
-            on_open=lambda: _note(
-                "● Recording — press Enter to stop.", json_mode=json_mode, quiet=state.quiet
-            ),
-        )
-        pcm = _record(keys, mic, max_seconds=opts.max_seconds)
-        _transcribe_utterance(api_key, pcm, opts, state, json_mode=json_mode)
-        if opts.once:
-            return
+        _capture_and_transcribe(keys, api_key, opts, state, json_mode=json_mode)
 
 
 def run_dictate(opts: DictateOptions, state: AppState, *, json_mode: bool) -> None:
@@ -187,12 +207,20 @@ def run_dictate(opts: DictateOptions, state: AppState, *, json_mode: bool) -> No
                     "state the language inside the prompt.",
                     json_mode=json_mode,
                 )
-            _note(
-                "Press Enter to start recording, Enter again to transcribe. q quits.",
-                json_mode=json_mode,
-                quiet=state.quiet,
-            )
-            _session(keys, api_key, opts, state, json_mode=json_mode)
+            # A piped stdout (`assembly dictate | assembly llm …`) only closes when
+            # dictate exits, so a looping session would keep the downstream consumer
+            # blocked on stdin forever. Single-shot mode (piped or --once) records
+            # one utterance and exits so the transcript drains to the next stage.
+            single = opts.once or not stdio.stdout_is_tty()
+            if not single:
+                # Only the interactive loop needs a start prompt; single-shot
+                # auto-starts and announces "● Recording" when the mic opens.
+                _note(
+                    "Press Enter to start recording, Enter again to transcribe. q quits.",
+                    json_mode=json_mode,
+                    quiet=state.quiet,
+                )
+            _session(keys, api_key, opts, state, json_mode=json_mode, single=single)
     except KeyboardInterrupt:
         # Ctrl-C is the normal "done dictating" signal: end cleanly, not as an error.
         return
diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr
@@ -353,7 +353,9 @@
    Press Enter (or Space) to start recording and press it again to stop; the
    utterance is sent to the AssemblyAI Sync API and the transcript prints
    immediately — no polling. Press q (or Esc/Ctrl-C) to finish. Each utterance
-   can be up to 120 seconds long.
+   can be up to 120 seconds long. With --once, or when stdout is piped,
+   recording starts immediately and dictate exits after one utterance so the
+   transcript flows to the next command.
   
   ╭─ Options ────────────────────────────────────────────────────────────────────╮
   │ --language             TEXT                       ISO 639-1 language code,   │
@@ -366,8 +368,8 @@
   │ --word-boost           TEXT                       Bias recognition toward a  │
   │                                                   term (repeatable)          │
   │ --device               INTEGER                    Microphone device index    │
-  │ --once                                            Transcribe one utterance,  │
-  │                                                   then exit                  │
+  │ --once                                            Record one utterance       │
+  │                                                   immediately, then exit     │
   │ --max-seconds          FLOAT RANGE                Auto-stop a recording      │
   │                        [1.0<=x<=120.0]            after this many seconds    │
   │                                                   [default: 120.0]           │
@@ -386,6 +388,8 @@
    $ assembly dictate
    One utterance, then exit
    $ assembly dictate --once
+   Pipe one utterance into another command
+   $ assembly dictate | assembly llm "write a conventional commit"
    Dictate in Spanish
    $ assembly dictate --language es
    Bias recognition toward tricky terms
diff --git a/tests/test_dictate_exec.py b/tests/test_dictate_exec.py
@@ -70,6 +70,10 @@ def seams(monkeypatch):
     harness = {"keys": FakeKeys([]), "chunks": [CHUNK, CHUNK], "mic": {}, "calls": []}
 
     monkeypatch.setattr(dictate_exec, "TerminalKeys", lambda: harness["keys"])
+    # Default to interactive stdout (a real terminal); the piped tests flip this.
+    # capsys leaves stdout a non-tty, which would otherwise force single-utterance
+    # mode and end every looping session after one utterance.
+    monkeypatch.setattr(dictate_exec.stdio, "stdout_is_tty", lambda: True)
 
     def fake_mic(*, target_rate, device=None, on_open=None):
         harness["mic"].update(target_rate=target_rate, device=device)
@@ -189,6 +193,28 @@ def test_once_exits_after_a_single_utterance(seams):
     assert seams["keys"].script
 
 
+def test_piped_stdout_auto_starts_one_utterance_then_exits(seams, monkeypatch, capsys):
+    # `assembly dictate | assembly llm …`: stdout is a pipe, not a tty. A looping
+    # session would keep the pipe open and hang the consumer, so recording
+    # auto-starts, the first Enter stops it, and the session exits on its own.
+    monkeypatch.setattr(dictate_exec.stdio, "stdout_is_tty", lambda: False)
+    # No leading toggle to *start* and no quit key: a single read(0) pops the
+    # Enter that stops the auto-started recording, then dictate exits.
+    seams["keys"] = FakeKeys(["\r", "\r", "\r"])
+    _run()
+    assert len(seams["calls"]) == 1
+    # Ended on the single-shot, not by draining the key script.
+    assert seams["keys"].script
+    # Auto-start: the only key read is the zero-timeout in-recording poll — no
+    # blocking idle read(None) waiting for a start keypress.
+    assert seams["keys"].timeouts == [0]
+    captured = capsys.readouterr()
+    assert captured.out.strip() == "hello world"
+    # The mic-open note fires immediately; the interactive start prompt is absent.
+    assert "Recording — press Enter to stop" in captured.err
+    assert "start recording" not in captured.err
+
+
 @pytest.mark.parametrize("quit_key", ["q", "Q", "\x1b", "\x04"])
 def test_quit_keys_end_the_session_without_recording(seams, quit_key, capsys):
     seams["keys"] = FakeKeys([quit_key, "\r", "\r"])