Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .importlinter
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ name = Core modules do not import command modules
type = forbidden
source_modules =
aai_cli.agent
aai_cli.agent_exec
aai_cli.argscan
aai_cli.auth
aai_cli.client
Expand All @@ -24,11 +25,14 @@ source_modules =
aai_cli.help_text
aai_cli.init
aai_cli.llm
aai_cli.llm_exec
aai_cli.microphone
aai_cli.options
aai_cli.output
aai_cli.render
aai_cli.speak_exec
aai_cli.stdio
aai_cli.stream_exec
aai_cli.streaming
aai_cli.telemetry
aai_cli.theme
Expand Down
2 changes: 2 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ A Typer CLI. `aai_cli/main.py` builds the `app`, registers each command sub-app,

Each file in `aai_cli/commands/` is a Typer sub-app (`transcribe`, `stream`, `agent`, `speak`, `llm`, `transcripts`, `login` (login/logout/whoami), `doctor`, `init`, `dev`, `share`, `deploy`, `setup`, `onboard`, `account` (balance/usage/limits), `keys`, `sessions`, `audit`, `telemetry` (status/enable/disable)). Command bodies run through `context.run_command(ctx, fn, json=...)`, which maps any `CLIError` to clean stderr output + the error's exit code. Commands never print tracebacks for expected failures.

**Options/run split for flag-heavy commands** (gh-CLI style): the Typer function only parses argv into a frozen `<Cmd>Options` dataclass and hands it to a module-level `run_<cmd>(opts, state, *, json_mode)` through a thin lambda adapter in `run_command(ctx, ..., json=...)`. The five run commands follow it — `aai_cli/stream_exec.py` (the reference implementation), `transcribe_exec.py`, `agent_exec.py`, `speak_exec.py`, `llm_exec.py`. Because the run path is a plain function of data, tests construct options directly (`dataclasses.replace` off a defaults instance, see `tests/test_stream_exec.py` and `tests/test_command_options_seam.py`) instead of round-tripping argv through `CliRunner` — which is also the cheap way to kill mutation-gate mutants on orchestration lines. Follow this for new or heavily-reworked commands with long bodies; small commands keep the inline `body()` closure — the dataclass is pure ceremony there.

### Cross-cutting state (resolution order matters)

- **`context.py`** — `AppState` (profile, env) is attached to the Typer context in the root `@app.callback()`. `run_command` is the standard command wrapper.
Expand Down
154 changes: 154 additions & 0 deletions aai_cli/agent_exec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""Run logic for `assembly agent`: the options/run split (see AGENTS.md).

The command module (aai_cli/commands/agent.py) only parses argv — it builds an
``AgentOptions`` and hands it to ``run_agent`` via ``context.run_command``, so tests
can drive validation, --show-code, and session wiring by constructing options
directly, with no CliRunner argv round-trip.
"""

from __future__ import annotations

import contextlib
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import typer

from aai_cli import choices, client, code_gen, output
from aai_cli.agent.audio import SAMPLE_RATE, DuplexAudio, NullPlayer
from aai_cli.agent.render import AgentRenderer
from aai_cli.agent.session import AgentRunConfig, run_session
from aai_cli.agent.voices import VOICE_NAMES
from aai_cli.context import AppState
from aai_cli.errors import CLIError, UsageError
from aai_cli.streaming.session import validate_output_flags
from aai_cli.streaming.sources import FileSource


@dataclass(frozen=True)
class AgentOptions:
"""Every `assembly agent` conversation flag as plain data.

``--list-voices`` is excluded: it dispatches to its own auth-free body in the
command module. ``--json`` is excluded: run_command resolves it into the
``json_mode`` argument.
"""

source: str | None
sample: bool
voice: str
system_prompt: str
system_prompt_file: Path | None
greeting: str
device: int | None
output_field: choices.TextOrJson | None
show_code: bool


def _resolve_system_prompt(system_prompt: str, system_prompt_file: Path | None) -> str:
"""The persona text: a --system-prompt-file (if given) overrides --system-prompt."""
if system_prompt_file is None:
return system_prompt
try:
return system_prompt_file.read_text(encoding="utf-8")
except OSError as exc:
raise CLIError(
f"Could not read --system-prompt-file {system_prompt_file}: {exc}",
error_type="file_not_found",
exit_code=2,
suggestion="Check the path and that the file is readable.",
) from exc


def _open_audio(
renderer: AgentRenderer,
*,
source: str | None,
sample: bool,
device: int | None,
from_file: bool,
) -> tuple[Any, Any]:
"""Build the (mic, player) pair for either file-driven or live-mic input."""
if from_file:
# Stream the clip as the user's speech and stop after the agent replies.
# No greeting and full-duplex so no part of the clip is muted/dropped,
# and a NullPlayer since there is no listener for the reply audio.
return FileSource(client.resolve_audio_source(source, sample=sample)), NullPlayer()
# One full-duplex stream for mic + speaker: macOS rejects two separate
# streams on a device, which silently kills capture.
duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device)
# notice() self-suppresses in JSON mode and routes to stderr otherwise, so a
# piped `assembly agent | …` never reads this advisory as transcript data.
renderer.notice(
"Use headphones — the mic stays open while the agent speaks, "
"so speakers would let it hear itself.\n"
)
return duplex.mic, duplex.player


def _print_show_code(opts: AgentOptions, system_prompt_text: str) -> None:
"""Print the equivalent agent script and exit without authenticating or opening
audio. Raw stdout for `> script.py`."""
if opts.source or opts.sample:
# A faithful file-driven agent script would need the CLI's whole
# ffmpeg-decode + ready-gate + exit-after-reply machinery, which is
# impractical to inline; the snippet is microphone-driven, so say so
# on stderr instead of silently dropping the source. stderr keeps
# `--show-code > script.py` byte-clean.
output.error_console.print(
"[aai.warn]Note:[/aai.warn] the generated script uses the microphone; "
"it does not stream the audio source you passed."
)
output.print_code(code_gen.agent(opts.voice, system_prompt_text, opts.greeting))


def run_agent(opts: AgentOptions, state: AppState, *, json_mode: bool) -> None:
"""Execute one `assembly agent` conversation from already-parsed flags."""
validate_output_flags(json_mode=json_mode, output_field=opts.output_field)
text_mode, json_mode = output.stream_output_modes(opts.output_field, json_mode=json_mode)
if opts.voice not in VOICE_NAMES:
raise UsageError(
f"Unknown voice {opts.voice!r}.",
suggestion="Run 'assembly agent --list-voices' to see the options.",
)
system_prompt_text = _resolve_system_prompt(opts.system_prompt, opts.system_prompt_file)

if opts.show_code:
_print_show_code(opts, system_prompt_text)
return

from_file = bool(opts.source) or opts.sample
if from_file and opts.device is not None:
raise UsageError("--device applies only to microphone input.")
if from_file:
# Existence-check the clip before credentials, so a typo'd path reads as
# "file not found" instead of triggering a login.
client.resolve_audio_source(opts.source, sample=opts.sample)
api_key = state.resolve_api_key()

renderer = AgentRenderer(
json_mode=json_mode,
text_mode=text_mode,
mic_input=not from_file,
)
audio, player = _open_audio(
renderer, source=opts.source, sample=opts.sample, device=opts.device, from_file=from_file
)
run_config = AgentRunConfig(
voice=opts.voice,
system_prompt=system_prompt_text,
greeting="" if from_file else opts.greeting,
full_duplex=True, # one duplex stream -> mic always open (use headphones)
exit_after_reply=from_file,
)
try:
run_session(api_key, renderer=renderer, player=player, mic=audio, config=run_config)
except KeyboardInterrupt:
renderer.stopped()
except BrokenPipeError as exc:
# Downstream consumer (e.g. `| head`) closed the pipe; stop quietly.
raise typer.Exit(code=0) from exc
finally:
with contextlib.suppress(BrokenPipeError):
renderer.close()
136 changes: 18 additions & 118 deletions aai_cli/commands/agent.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,23 @@
from __future__ import annotations

import contextlib
from pathlib import Path
from typing import Any

import typer

from aai_cli import choices, client, code_gen, help_panels, options, output
from aai_cli.agent.audio import SAMPLE_RATE, DuplexAudio, NullPlayer
from aai_cli.agent.render import AgentRenderer
from aai_cli.agent.session import (
DEFAULT_GREETING,
DEFAULT_PROMPT,
AgentRunConfig,
run_session,
)
from aai_cli import agent_exec, choices, help_panels, options, output
from aai_cli.agent.session import DEFAULT_GREETING, DEFAULT_PROMPT
from aai_cli.agent.voices import (
DEFAULT_VOICE,
VOICE_NAMES,
VOICES,
complete_voice,
format_voice_list,
)
from aai_cli.context import AppState, run_command
from aai_cli.errors import CLIError, UsageError
from aai_cli.help_text import examples_epilog
from aai_cli.streaming.session import validate_output_flags
from aai_cli.streaming.sources import FileSource

app = typer.Typer()


def _resolve_system_prompt(system_prompt: str, system_prompt_file: Path | None) -> str:
"""The persona text: a --system-prompt-file (if given) overrides --system-prompt."""
if system_prompt_file is None:
return system_prompt
try:
return system_prompt_file.read_text(encoding="utf-8")
except OSError as exc:
raise CLIError(
f"Could not read --system-prompt-file {system_prompt_file}: {exc}",
error_type="file_not_found",
exit_code=2,
suggestion="Check the path and that the file is readable.",
) from exc


def _open_audio(
renderer: AgentRenderer,
*,
source: str | None,
sample: bool,
device: int | None,
from_file: bool,
) -> tuple[Any, Any]:
"""Build the (mic, player) pair for either file-driven or live-mic input."""
if from_file:
# Stream the clip as the user's speech and stop after the agent replies.
# No greeting and full-duplex so no part of the clip is muted/dropped,
# and a NullPlayer since there is no listener for the reply audio.
return FileSource(client.resolve_audio_source(source, sample=sample)), NullPlayer()
# One full-duplex stream for mic + speaker: macOS rejects two separate
# streams on a device, which silently kills capture.
duplex = DuplexAudio(target_rate=SAMPLE_RATE, device=device)
# notice() self-suppresses in JSON mode and routes to stderr otherwise, so a
# piped `assembly agent | …` never reads this advisory as transcript data.
renderer.notice(
"Use headphones — the mic stays open while the agent speaks, "
"so speakers would let it hear itself.\n"
)
return duplex.mic, duplex.player


def _emit_voice_list(_state: AppState, json_mode: bool) -> None:
"""--list-voices body, routed through run_command so --json yields a
machine-readable array instead of the human list; needs no auth."""
Expand Down Expand Up @@ -149,65 +95,19 @@ def agent(
run_command(ctx, _emit_voice_list, json=json_out)
return

def body(state: AppState, json_mode: bool) -> None:
validate_output_flags(json_mode=json_mode, output_field=output_field)
text_mode, json_mode = output.stream_output_modes(output_field, json_mode=json_mode)
if voice not in VOICE_NAMES:
raise UsageError(
f"Unknown voice {voice!r}.",
suggestion="Run 'assembly agent --list-voices' to see the options.",
)
system_prompt_text = _resolve_system_prompt(system_prompt, system_prompt_file)

if show_code:
# Print-only: emit the equivalent agent script from the flags and exit
# without authenticating or opening audio. Raw stdout for `> script.py`.
if source or sample:
# A faithful file-driven agent script would need the CLI's whole
# ffmpeg-decode + ready-gate + exit-after-reply machinery, which is
# impractical to inline; the snippet is microphone-driven, so say so
# on stderr instead of silently dropping the source. stderr keeps
# `--show-code > script.py` byte-clean.
output.error_console.print(
"[aai.warn]Note:[/aai.warn] the generated script uses the microphone; "
"it does not stream the audio source you passed."
)
output.print_code(code_gen.agent(voice, system_prompt_text, greeting))
return

from_file = bool(source) or sample
if from_file and device is not None:
raise UsageError("--device applies only to microphone input.")
if from_file:
# Existence-check the clip before credentials, so a typo'd path reads as
# "file not found" instead of triggering a login.
client.resolve_audio_source(source, sample=sample)
api_key = state.resolve_api_key()

renderer = AgentRenderer(
json_mode=json_mode,
text_mode=text_mode,
mic_input=not from_file,
)
audio, player = _open_audio(
renderer, source=source, sample=sample, device=device, from_file=from_file
)
run_config = AgentRunConfig(
voice=voice,
system_prompt=system_prompt_text,
greeting="" if from_file else greeting,
full_duplex=True, # one duplex stream -> mic always open (use headphones)
exit_after_reply=from_file,
)
try:
run_session(api_key, renderer=renderer, player=player, mic=audio, config=run_config)
except KeyboardInterrupt:
renderer.stopped()
except BrokenPipeError as exc:
# Downstream consumer (e.g. `| head`) closed the pipe; stop quietly.
raise typer.Exit(code=0) from exc
finally:
with contextlib.suppress(BrokenPipeError):
renderer.close()

run_command(ctx, body, json=json_out)
opts = agent_exec.AgentOptions(
source=source,
sample=sample,
voice=voice,
system_prompt=system_prompt,
system_prompt_file=system_prompt_file,
greeting=greeting,
device=device,
output_field=output_field,
show_code=show_code,
)
run_command(
ctx,
lambda state, json_mode: agent_exec.run_agent(opts, state, json_mode=json_mode),
json=json_out,
)
Loading
Loading