Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions aai_cli/commands/dictate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from aai_cli import command_registry, help_panels, options
from aai_cli.app.context import run_with_options
from aai_cli.commands.dictate import _exec as dictate_exec
from aai_cli.core import choices
from aai_cli.core.sync_stt import MAX_AUDIO_SECONDS
from aai_cli.ui.help_text import examples_epilog

Expand All @@ -29,6 +30,7 @@
"assembly dictate --word-boost AssemblyAI --word-boost LeMUR",
),
("One JSON object per utterance", "assembly dictate --json"),
("Pipe the bare transcript onward", "assembly dictate -o text | assembly llm -f"),
]
),
)
Expand Down Expand Up @@ -58,6 +60,12 @@ def dictate(
max=float(MAX_AUDIO_SECONDS),
),
json_out: bool = options.json_option("Emit one JSON object per utterance"),
output_field: choices.TextOrJson | None = typer.Option(
None,
"-o",
"--output",
help="Output mode: text (the bare transcript per utterance, pipe-friendly) or json",
),
) -> None:
"""Push-to-talk dictation: record the mic, get the transcript back

Expand All @@ -73,5 +81,6 @@ def dictate(
device=device,
once=once,
max_seconds=max_seconds,
output_field=output_field,
)
run_with_options(ctx, dictate_exec.run_dictate, opts, json=json_out)
10 changes: 9 additions & 1 deletion aai_cli/commands/dictate/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
from dataclasses import dataclass

from aai_cli.app.context import AppState
from aai_cli.core import sync_stt
from aai_cli.core import choices, sync_stt
from aai_cli.core.config_builder import split_csv
from aai_cli.core.hotkey import CTRL_C, CTRL_D, ESC, TerminalKeys
from aai_cli.core.microphone import MicrophoneSource
from aai_cli.streaming.session import resolve_output_modes
from aai_cli.ui import output

# Capture is resampled to one rate the Sync API accepts; 16 kHz mono PCM16 keeps
Expand All @@ -41,6 +42,8 @@ class DictateOptions:
device: int | None
once: bool
max_seconds: float
# -o/--output: text (the default bare-transcript shape) or json (== --json).
output_field: choices.TextOrJson | None = None


def _note(message: str, *, json_mode: bool, quiet: bool) -> None:
Expand Down Expand Up @@ -165,6 +168,11 @@ def _session(

def run_dictate(opts: DictateOptions, state: AppState, *, json_mode: bool) -> None:
"""Execute one `assembly dictate` invocation from already-parsed flags."""
# Fold -o/--output into json_mode (-o json == --json) and reject the
# contradictory --json + -o text pair, the same way `stream`/`agent` do.
# dictate has no live panel, so the text_mode half is unused — plain
# transcript text is already the non-JSON default in `_emit`.
_, json_mode = resolve_output_modes(opts.output_field, json_mode=json_mode)
try:
# Entering TerminalKeys validates the terminal (a usage precondition)
# before credentials, so a piped stdin reads as "needs a terminal" — not
Expand Down
6 changes: 6 additions & 0 deletions tests/__snapshots__/test_snapshots_help_run.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,10 @@
│ [default: 120.0] │
│ --json -j Emit one JSON object per │
│ utterance │
│ --output -o [text|json] Output mode: text (the │
│ bare transcript per │
│ utterance, pipe-friendly) │
│ or json │
│ --help Show this message and │
│ exit. │
╰──────────────────────────────────────────────────────────────────────────────╯
Expand All @@ -388,6 +392,8 @@
$ assembly dictate --word-boost AssemblyAI --word-boost LeMUR
One JSON object per utterance
$ assembly dictate --json
Pipe the bare transcript onward
$ assembly dictate -o text | assembly llm -f



Expand Down
32 changes: 30 additions & 2 deletions tests/test_dictate_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

from aai_cli.app.context import AppState
from aai_cli.commands.dictate import _exec as dictate_exec
from aai_cli.core import config, sync_stt
from aai_cli.core.errors import CLIError
from aai_cli.core import choices, config, sync_stt
from aai_cli.core.errors import CLIError, UsageError

DICTATE_DEFAULTS = dictate_exec.DictateOptions(
language=None,
Expand Down Expand Up @@ -145,6 +145,34 @@ def test_json_mode_emits_one_ndjson_object_per_utterance(seams, capsys):
assert captured.err == ""


def test_output_json_folds_into_ndjson_without_the_json_flag(seams, capsys):
# -o json must enable NDJSON on its own (json_mode stays the --json flag,
# which is False here) — proving the -o/--output resolution runs.
seams["keys"] = FakeKeys(["\r", "\r"])
_run(dataclasses.replace(DICTATE_DEFAULTS, output_field=choices.TextOrJson.json))
assert json.loads(capsys.readouterr().out)["text"] == "hello world"


def test_output_text_emits_bare_transcript(seams, capsys):
# -o text is the explicit spelling of the human default: bare text, no JSON.
seams["keys"] = FakeKeys(["\r", "\r"])
_run(dataclasses.replace(DICTATE_DEFAULTS, output_field=choices.TextOrJson.text))
out = capsys.readouterr().out
assert out.strip() == "hello world"
assert "{" not in out


def test_output_text_conflicts_with_json_flag(seams):
# --json + -o text are contradictory output shapes: a clean usage error,
# the same as `stream`/`agent`.
seams["keys"] = FakeKeys(["\r", "\r"])
with pytest.raises(UsageError):
_run(
dataclasses.replace(DICTATE_DEFAULTS, output_field=choices.TextOrJson.text),
json_mode=True,
)


def test_quiet_suppresses_the_interactive_hints(seams, capsys):
seams["keys"] = FakeKeys(["\r", "\r"])
_run(state=AppState(quiet=True))
Expand Down
Loading