Skip to content

Commit e22191c

Browse files
alexkromanalexkroman-assemblyclaude
authored
Add --turn-detection presets to assembly stream (#169)
## What Adds `assembly stream --turn-detection [aggressive|balanced|conservative]`, mapping the documented turn-detection quick-start configurations to one flag instead of three raw numbers. | Preset | `end_of_turn_confidence` | `min_turn_silence` | `max_turn_silence` | |---|---|---|---| | aggressive | 0.4 | 160 | 400 | | balanced | 0.4 | 400 | 1280 | | conservative | 0.7 | 800 | 3600 | Values are verbatim from `streaming/universal-streaming/turn-detection` in the docs. ## Behavior - **Explicit flag wins its slot.** `--turn-detection conservative --min-turn-silence 200` → min=200, max stays 3600. Lets users start from a preset and tweak one knob. - **No preset → unchanged.** All three turn flags pass through as before (server defaults apply). - `--vad-threshold` is not part of any preset and is untouched. - `--show-code` shows the resolved numbers for free, since the live path and code-gen both read `base_flags()`. ## Design A new isolated module `aai_cli/streaming/turn_presets.py` holds the preset table + a `resolve()` merge function — disjoint from shared files, testable on its own. `stream/__init__.py` gains the option; `_exec.py`'s `base_flags()` calls `resolve()`. ## Scope `stream` only. `agent` has no turn-detection surface today (its `AgentRunConfig`/voice-agent session never sets turn params), so presets there are a larger follow-up. ## Tests - `tests/test_turn_presets.py` — resolver unit tests (each preset's exact values, per-slot override, passthrough) - `tests/test_stream_exec.py` — `base_flags()` seam - `tests/test_stream_command_flags.py` — argv → `StreamingParameters` end-to-end - Regenerated `test_snapshots_help_run.ambr` Full gate (`./scripts/check.sh`) green: 100% patch coverage, mutation gate clean. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Alex Kroman <alex@assemblyai.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 13c02ef commit e22191c

7 files changed

Lines changed: 184 additions & 3 deletions

File tree

aai_cli/commands/stream/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from aai_cli.app.context import run_with_options
1111
from aai_cli.commands.stream import _exec as stream_exec
1212
from aai_cli.core import choices, llm
13+
from aai_cli.streaming.turn_presets import TurnDetectionPreset
1314
from aai_cli.ui.help_text import examples_epilog
1415

1516
app = typer.Typer()
@@ -113,6 +114,12 @@ def stream(
113114
rich_help_panel=help_panels.OPT_MODEL,
114115
),
115116
# turn detection
117+
turn_detection: TurnDetectionPreset | None = typer.Option(
118+
None,
119+
"--turn-detection",
120+
help="Turn-detection sensitivity preset",
121+
rich_help_panel=help_panels.OPT_TURNS,
122+
),
116123
end_of_turn_confidence_threshold: float | None = typer.Option(
117124
None,
118125
# Not "--end-of-turn-confidence-threshold": at 34 chars the name can't render
@@ -315,6 +322,7 @@ def stream(
315322
end_of_turn_confidence_threshold=end_of_turn_confidence_threshold,
316323
min_turn_silence=min_turn_silence,
317324
max_turn_silence=max_turn_silence,
325+
turn_detection=turn_detection,
318326
vad_threshold=vad_threshold,
319327
format_turns=format_turns,
320328
include_partial_turns=include_partial_turns,

aai_cli/commands/stream/_exec.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from aai_cli.core import choices, client, config_builder, youtube
2222
from aai_cli.core.errors import UsageError
2323
from aai_cli.core.microphone import MicrophoneSource
24+
from aai_cli.streaming import turn_presets
2425
from aai_cli.streaming.macos import MacSystemAudioSource
2526
from aai_cli.streaming.render import StreamRenderer
2627
from aai_cli.streaming.session import (
@@ -30,6 +31,7 @@
3031
validate_sources,
3132
)
3233
from aai_cli.streaming.sources import TARGET_RATE, FileSource, StdinSource
34+
from aai_cli.streaming.turn_presets import TurnDetectionPreset
3335
from aai_cli.ui import output
3436
from aai_cli.ui.follow import FollowRenderer
3537

@@ -57,6 +59,7 @@ class StreamOptions:
5759
end_of_turn_confidence_threshold: float | None
5860
min_turn_silence: int | None
5961
max_turn_silence: int | None
62+
turn_detection: TurnDetectionPreset | None
6063
vad_threshold: float | None
6164
format_turns: bool | None
6265
include_partial_turns: bool | None
@@ -93,15 +96,21 @@ def source_options(self) -> SourceOptions:
9396

9497
def base_flags(self) -> dict[str, object]:
9598
"""Every streaming flag except sample_rate, which is set per source at stream time."""
99+
end_of_turn_confidence_threshold, min_turn_silence, max_turn_silence = turn_presets.resolve(
100+
self.turn_detection,
101+
self.end_of_turn_confidence_threshold,
102+
self.min_turn_silence,
103+
self.max_turn_silence,
104+
)
96105
flags: dict[str, object] = {
97106
"speech_model": config_builder.enum_value(self.speech_model),
98107
"format_turns": self.format_turns if self.format_turns is not None else True,
99108
"encoding": config_builder.enum_value(self.encoding),
100109
"language_detection": self.language_detection,
101110
"domain": self.domain,
102-
"end_of_turn_confidence_threshold": self.end_of_turn_confidence_threshold,
103-
"min_turn_silence": self.min_turn_silence,
104-
"max_turn_silence": self.max_turn_silence,
111+
"end_of_turn_confidence_threshold": end_of_turn_confidence_threshold,
112+
"min_turn_silence": min_turn_silence,
113+
"max_turn_silence": max_turn_silence,
105114
"vad_threshold": self.vad_threshold,
106115
"include_partial_turns": self.include_partial_turns,
107116
"keyterms_prompt": list(self.keyterms_prompt) if self.keyterms_prompt else None,

aai_cli/streaming/turn_presets.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
"""Documented turn-detection quick-start presets for `assembly stream`.
2+
3+
The Aggressive/Balanced/Conservative configurations mirror the streaming
4+
turn-detection docs (streaming/universal-streaming/turn-detection). A preset
5+
sets the three end-of-turn knobs together; `resolve` lets any explicitly-passed
6+
raw flag override its slot so users can start from a preset and tweak one value.
7+
"""
8+
9+
from __future__ import annotations
10+
11+
import enum
12+
13+
14+
class TurnDetectionPreset(enum.StrEnum):
15+
"""Named end-of-turn sensitivity presets from the streaming turn-detection docs."""
16+
17+
aggressive = "aggressive"
18+
balanced = "balanced"
19+
conservative = "conservative"
20+
21+
22+
# (end_of_turn_confidence_threshold, min_turn_silence, max_turn_silence) per the docs'
23+
# quick-start configurations. Keep these verbatim — they're the published recommendations.
24+
_PRESETS: dict[TurnDetectionPreset, tuple[float, int, int]] = {
25+
TurnDetectionPreset.aggressive: (0.4, 160, 400),
26+
TurnDetectionPreset.balanced: (0.4, 400, 1280),
27+
TurnDetectionPreset.conservative: (0.7, 800, 3600),
28+
}
29+
30+
31+
def resolve(
32+
preset: TurnDetectionPreset | None,
33+
end_of_turn_confidence_threshold: float | None,
34+
min_turn_silence: int | None,
35+
max_turn_silence: int | None,
36+
) -> tuple[float | None, int | None, int | None]:
37+
"""Merge a preset with raw flags, where an explicitly-passed value wins its slot.
38+
39+
With no preset the three values pass through unchanged (server defaults apply).
40+
"""
41+
if preset is None:
42+
return end_of_turn_confidence_threshold, min_turn_silence, max_turn_silence
43+
preset_eot, preset_min, preset_max = _PRESETS[preset]
44+
return (
45+
end_of_turn_confidence_threshold
46+
if end_of_turn_confidence_threshold is not None
47+
else preset_eot,
48+
min_turn_silence if min_turn_silence is not None else preset_min,
49+
max_turn_silence if max_turn_silence is not None else preset_max,
50+
)

tests/__snapshots__/test_snapshots_help_run.ambr

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,9 @@
615615
│ (repeatable) │
616616
╰──────────────────────────────────────────────────────────────────────────────╯
617617
╭─ Turn Detection ─────────────────────────────────────────────────────────────╮
618+
│ --turn-detection [aggressive| Turn-detecti… │
619+
│ balanced|con sensitivity │
620+
│ servative] preset │
618621
│ --end-of-turn-confidence FLOAT RANGE End-of-turn │
619622
│ [0.0<=x<=1.0 confidence │
620623
│ ] (0-1) │

tests/test_stream_command_flags.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,41 @@ def test_stream_turn_silence_below_minimum_is_rejected(monkeypatch):
8181
assert result.exit_code == 2
8282

8383

84+
def test_stream_turn_detection_preset_reaches_params(monkeypatch):
85+
# --turn-detection balanced must thread through the command wiring into the
86+
# documented (0.4, 400, 1280) trio on StreamingParameters.
87+
config.set_api_key("default", "sk_live")
88+
captured = {}
89+
monkeypatch.setattr(
90+
"aai_cli.commands.stream._exec.client.stream_audio",
91+
lambda api_key, source, *, params, **kw: captured.update(params=params),
92+
)
93+
94+
runner.invoke(app, ["stream", "--sample", "--turn-detection", "balanced"])
95+
params = captured["params"]
96+
assert params.end_of_turn_confidence_threshold == 0.4
97+
assert params.min_turn_silence == 400
98+
assert params.max_turn_silence == 1280
99+
100+
101+
def test_stream_explicit_flag_overrides_preset_via_cli(monkeypatch):
102+
# A raw flag passed alongside the preset wins its slot through the real argv path.
103+
config.set_api_key("default", "sk_live")
104+
captured = {}
105+
monkeypatch.setattr(
106+
"aai_cli.commands.stream._exec.client.stream_audio",
107+
lambda api_key, source, *, params, **kw: captured.update(params=params),
108+
)
109+
110+
runner.invoke(
111+
app,
112+
["stream", "--sample", "--turn-detection", "conservative", "--min-turn-silence", "200"],
113+
)
114+
params = captured["params"]
115+
assert params.min_turn_silence == 200 # explicit flag, not the preset's 800
116+
assert params.max_turn_silence == 3600 # preset's value survives
117+
118+
84119
def test_stream_config_escape_hatch(monkeypatch):
85120
config.set_api_key("default", "sk_live")
86121
captured = {}

tests/test_stream_exec.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from aai_cli.commands.stream import _exec as stream_exec
1818
from aai_cli.core import config, llm
1919
from aai_cli.core.errors import UsageError
20+
from aai_cli.streaming.turn_presets import TurnDetectionPreset
2021

2122
# The CLI's flag defaults, as data. Tests override per-case with dataclasses.replace.
2223
DEFAULTS = stream_exec.StreamOptions(
@@ -35,6 +36,7 @@
3536
end_of_turn_confidence_threshold=None,
3637
min_turn_silence=None,
3738
max_turn_silence=None,
39+
turn_detection=None,
3840
vad_threshold=None,
3941
format_turns=None,
4042
include_partial_turns=None,
@@ -123,6 +125,32 @@ def test_redact_pii_sub_enum_maps_to_its_string_value():
123125
assert DEFAULTS.base_flags()["redact_pii_sub"] is None # unset stays None
124126

125127

128+
def test_turn_detection_preset_fills_base_flags():
129+
# --turn-detection balanced supplies the documented (0.4, 400, 1280) trio.
130+
opts = dataclasses.replace(DEFAULTS, turn_detection=TurnDetectionPreset.balanced)
131+
flags = opts.base_flags()
132+
assert flags["end_of_turn_confidence_threshold"] == 0.4
133+
assert flags["min_turn_silence"] == 400
134+
assert flags["max_turn_silence"] == 1280
135+
136+
137+
def test_explicit_turn_flag_overrides_the_preset_slot():
138+
# A raw --min-turn-silence wins over the preset's value; the other slots stay.
139+
opts = dataclasses.replace(
140+
DEFAULTS, turn_detection=TurnDetectionPreset.balanced, min_turn_silence=900
141+
)
142+
flags = opts.base_flags()
143+
assert flags["min_turn_silence"] == 900
144+
assert flags["max_turn_silence"] == 1280
145+
146+
147+
def test_no_preset_leaves_turn_flags_unset():
148+
flags = DEFAULTS.base_flags()
149+
assert flags["end_of_turn_confidence_threshold"] is None
150+
assert flags["min_turn_silence"] is None
151+
assert flags["max_turn_silence"] is None
152+
153+
126154
def test_stream_options_are_immutable():
127155
field_name = "sample"
128156
with pytest.raises(dataclasses.FrozenInstanceError):

tests/test_turn_presets.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
"""Unit tests for the streaming turn-detection presets (aai_cli.streaming.turn_presets).
2+
3+
The presets mirror the documented Aggressive/Balanced/Conservative quick-start
4+
configurations (streaming/universal-streaming/turn-detection). `resolve` merges a
5+
preset with explicitly-passed raw flags, where an explicit value always wins.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import pytest
11+
12+
from aai_cli.streaming import turn_presets
13+
from aai_cli.streaming.turn_presets import TurnDetectionPreset
14+
15+
16+
def test_no_preset_passes_raw_values_through_unchanged():
17+
assert turn_presets.resolve(None, None, None, None) == (None, None, None)
18+
assert turn_presets.resolve(None, 0.5, 300, 900) == (0.5, 300, 900)
19+
20+
21+
@pytest.mark.parametrize(
22+
("preset", "expected"),
23+
[
24+
(TurnDetectionPreset.aggressive, (0.4, 160, 400)),
25+
(TurnDetectionPreset.balanced, (0.4, 400, 1280)),
26+
(TurnDetectionPreset.conservative, (0.7, 800, 3600)),
27+
],
28+
)
29+
def test_preset_supplies_documented_values(preset, expected):
30+
assert turn_presets.resolve(preset, None, None, None) == expected
31+
32+
33+
def test_explicit_min_turn_silence_overrides_only_its_slot():
34+
# balanced is (0.4, 400, 1280); overriding min_turn_silence keeps the other two.
35+
assert turn_presets.resolve(TurnDetectionPreset.balanced, None, 500, None) == (0.4, 500, 1280)
36+
37+
38+
def test_explicit_confidence_overrides_preset_confidence():
39+
# conservative is (0.7, 800, 3600); an explicit eot threshold wins.
40+
assert turn_presets.resolve(TurnDetectionPreset.conservative, 0.9, None, None) == (
41+
0.9,
42+
800,
43+
3600,
44+
)
45+
46+
47+
def test_all_explicit_flags_override_every_preset_slot():
48+
assert turn_presets.resolve(TurnDetectionPreset.aggressive, 0.1, 50, 100) == (0.1, 50, 100)

0 commit comments

Comments
 (0)