Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion aai_cli/code_agent/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,19 @@ class AssistantText:
text: str


@dataclass(frozen=True)
class AssistantDelta:
"""One streamed token of the in-progress reply, shown live then superseded by AssistantText.

Emitted from langgraph's per-token ``messages`` stream so the front-end can render the
reply as it's generated; the authoritative full text still arrives as an AssistantText
when the step lands, so a consumer that ignores deltas (the headless renderer) loses
nothing.
"""

text: str


@dataclass(frozen=True)
class ToolCall:
"""The agent's request to run a tool (announced when not gated by approval)."""
Expand All @@ -44,7 +57,21 @@ class ErrorText:
text: str


Event = AssistantText | ToolCall | ToolResult | ErrorText
Event = AssistantText | AssistantDelta | ToolCall | ToolResult | ErrorText


def assistant_delta(payload: object) -> AssistantDelta | None:
"""Extract a streaming assistant-text token from a ``messages``-mode stream payload.

langgraph's ``messages`` mode yields ``(message_chunk, metadata)``; we surface only the
AI message's text tokens (tool-call requests and tool results carry no prose, and other
message kinds aren't the assistant talking), so the live region streams just the reply.
"""
chunk = payload[0] if isinstance(payload, tuple) and payload else payload
if type(chunk).__name__ not in ("AIMessage", "AIMessageChunk"):
return None
text = _text_of(getattr(chunk, "content", ""))
return AssistantDelta(text) if text else None


def _text_of(content: object) -> str:
Expand Down
110 changes: 110 additions & 0 deletions aai_cli/code_agent/messages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Mounted transcript widgets for the coding-agent TUI.

The transcript is a ``VerticalScroll`` of these widgets rather than an append-only ``RichLog``,
which buys two things deepagents-code has: the assistant reply updates *in place* as it streams
(no separate live region), and a tool's output is a collapsible row — a clipped preview that
expands to the full output on Ctrl+O or a click.

Dynamic content (model/tool/user strings) is wrapped in ``rich.text.Text`` so it's shown
literally — Text doesn't parse console markup, so a stray ``[`` can't raise or inject styling.
"""

from __future__ import annotations

from collections.abc import Mapping

from rich.markdown import Markdown
from rich.text import Text
from textual.widgets import Static

from aai_cli.code_agent.summarize import summarize_call, summarize_result

_DIM = "#8a8f98" # muted gray for tool lines / notes
_ERROR = "#f04438"


class Note(Static):
"""A dim one-line transcript aside (``cancelling…``, ``copied…``, ``voice off…``)."""

def __init__(self, text: str) -> None:
super().__init__(Text(text, style=_DIM))


class UserMessage(Static):
"""The echoed user prompt, with a top margin so each turn is visually separated."""

DEFAULT_CSS = "UserMessage { margin-top: 1; }"

def __init__(self, text: str) -> None:
super().__init__(Text(f"» {text}", style="bold #38bdf8"))


class AssistantMessage(Static):
"""The assistant's reply: streams plain text token-by-token, then renders as Markdown."""

def __init__(self) -> None:
super().__init__()
self._tokens: list[str] = [] # accumulate tokens, not str +=, to avoid quadratic growth

@property
def text(self) -> str:
"""The reply text streamed so far (used to finalize a cancelled generation)."""
return "".join(self._tokens)

def stream(self, delta: str) -> None:
"""Append a streamed token and repaint as plain text (cheap; no per-token markdown)."""
self._tokens.append(delta)
self.update(Text(self.text))

def finalize(self, text: str) -> None:
"""Replace the streamed text with the authoritative reply, rendered as Markdown."""
self._tokens = [text]
self.update(Markdown(text))


class ToolCallLine(Static):
"""A compact tool-call line, e.g. ``→ write_file(app.py)``."""

def __init__(self, name: str, args: Mapping[str, object]) -> None:
super().__init__(Text(f"→ {summarize_call(name, args)}", style=_DIM))


class ErrorMessage(Static):
"""A failed turn, shown instead of crashing the UI."""

def __init__(self, text: str) -> None:
super().__init__(Text(f"✗ {text}", style=_ERROR))


class ToolOutput(Static):
"""A tool's output: a clipped preview that expands to the full content (Ctrl+O / click)."""

def __init__(self, name: str, content: str) -> None:
super().__init__()
self._name = name
self._full = content.strip()
self._preview = summarize_result(content)
self._expandable = self._preview != self._full # nothing to expand when it fits already
self._expanded = False

def on_mount(self) -> None:
self._repaint()

def on_click(self) -> None:
self.toggle()

def toggle(self) -> None:
"""Flip between the clipped preview and the full output (no-op when it all fits)."""
if not self._expandable:
return
self._expanded = not self._expanded
self._repaint()

def _repaint(self) -> None:
body = self._full if self._expanded else self._preview
line = Text(f" {self._name}: ", style=_DIM)
line.append(body, style=_DIM)
if self._expandable:
hint = " (Ctrl+O to collapse)" if self._expanded else " (Ctrl+O to expand)"
line.append(hint, style=f"{_DIM} italic")
self.update(line)
202 changes: 202 additions & 0 deletions aai_cli/code_agent/modals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
"""Bottom-docked modal screens for the coding-agent TUI: tool approval and agent questions.

Split out of `tui.py` to keep each module under the file-length gate. Both are transparent
``ModalScreen``s docked at the bottom, so the transcript stays visible above them (see the
``ModalScreen { background: transparent }`` rule in :class:`~aai_cli.code_agent.tui.CodeAgentApp`).

In voice mode each modal is also **spoken and voice-answerable**: when constructed with a
``voice`` IO it speaks the prompt and listens for a spoken reply (approve / auto / reject, or a
free-text answer), off the UI thread. The keyboard path always stays available as a fallback.
"""

from __future__ import annotations

import re
import threading
from typing import TYPE_CHECKING, ClassVar

from rich.markup import escape
from textual.app import ComposeResult
from textual.containers import Vertical
from textual.screen import ModalScreen
from textual.widgets import Input, Label

from aai_cli.code_agent import banner, risk
from aai_cli.code_agent.summarize import describe_args, full_args
from aai_cli.core import errors

if TYPE_CHECKING:
from collections.abc import Callable, Mapping

from aai_cli.code_agent.voice_ui import _VoiceIO


def _spawn(target: Callable[[], None]) -> None:
"""Run ``target`` on a daemon thread — the voice legs block, so they stay off the UI thread."""
threading.Thread(target=target, daemon=True).start() # pragma: no mutate


# Spoken-answer vocabulary. "auto" wins first (it implies approval); an unclear answer falls
# back to "reject" — the same safe default as the keyboard, so a tool never runs on a guess.
_REJECT_WORDS = frozenset({"no", "reject", "deny", "stop", "cancel", "nope", "nah"})
_APPROVE_WORDS = frozenset({"yes", "approve", "yeah", "yep", "yup", "sure", "ok", "okay"})


def approval_from_speech(text: str) -> str:
"""Map a spoken reply to ``"approve"`` / ``"auto"`` / ``"reject"`` (unclear → reject)."""
lowered = text.lower()
words = set(re.findall(r"[a-z]+", lowered))
if "auto" in lowered or "always" in lowered:
return "auto"
if words & _REJECT_WORDS or "don't" in lowered or "do not" in lowered:
return "reject"
if words & _APPROVE_WORDS or "go ahead" in lowered or "do it" in lowered:
return "approve"
return "reject"


class ApprovalScreen(ModalScreen[str]):
"""A compact, bottom-docked prompt to approve/auto-approve/reject one tool call.

Keyboard ``y / a / n`` (and ``e`` to expand the args); in voice mode it also speaks the
prompt and accepts a spoken approve/auto/reject. The transparent background leaves the
transcript visible, and a risky call (``rm -rf``, an internal fetch) carries a warning.
"""

DEFAULT_CSS = """
ApprovalScreen { align: center bottom; background: transparent; }
ApprovalScreen #approvalbox {
dock: bottom; width: 1fr; height: auto;
border: round #f59e0b; background: #000000; padding: 0 1; margin: 0 1 1 1;
}
ApprovalScreen #approvalbox Label { height: auto; }
"""
BINDINGS: ClassVar = [
("y", "approve", "Approve"),
("a", "auto", "Auto-approve"),
("n", "reject", "Reject"),
("e", "expand", "Expand"),
]

def __init__(
self, name: str, args: Mapping[str, object], *, voice: _VoiceIO | None = None
) -> None:
super().__init__()
self._tool_name = name # not _name: that shadows Textual Widget's str|None attr
self._args = args
self._expanded = False # toggled by `e`; collapsed (one-line) by default
self._voice = voice # when set, the prompt is spoken and a spoken answer is accepted
self._answered = False # guards against a voice answer and a keypress both dismissing

def compose(self) -> ComposeResult:
with Vertical(id="approvalbox"):
warning = risk.risk_warning(self._tool_name, self._args)
if warning:
yield Label(f"[b #f04438]⚠ {escape(warning)}[/]", id="approvalwarn")
yield Label(self._detail_markup(), id="approvaldetail")
yield Label(
f"[b #22c55e]y[/] approve [b {banner.BRAND_HEX}]a[/] auto-approve "
"[b #f04438]n[/] reject [b]e[/] expand"
)

def on_mount(self) -> None:
if (voice := self._voice) is not None: # drive the decision by voice, off the UI thread
_spawn(lambda: self._drive_by_voice(voice))

def _drive_by_voice(self, voice: _VoiceIO) -> None:
"""Speak the prompt and accept a spoken approve/auto/reject (keyboard still works)."""
try:
voice.speak(self._spoken_prompt())
transcript = voice.listen()
except errors.CLIError:
return # mic/STT failed: leave the keyboard hint as the way to answer
if transcript: # silence (None) must not auto-reject a tool — wait for speech or a key
self.app.call_from_thread(self._decide, approval_from_speech(transcript))

def _spoken_prompt(self) -> str:
"""The read-aloud version of the prompt: the tool, its arg, any warning, the options."""
parts = [f"Run {self._tool_name}."]
detail = describe_args(self._args)
if detail:
parts.append(f"{detail}.")
warning = risk.risk_warning(self._tool_name, self._args)
if warning:
parts.append(f"Warning: {warning}")
parts.append("Say approve, auto-approve, or reject.")
return " ".join(parts)

def _decide(self, decision: str) -> None:
"""Dismiss once, whether the answer came by spoken reply or keypress."""
if self._answered:
return
self._answered = True
self.dismiss(decision)

def _detail_markup(self) -> str:
"""The 'Run tool X?' line — the compact arg, or the full args when expanded."""
args = full_args(self._args) if self._expanded else describe_args(self._args)
return f"Run tool [b]{escape(self._tool_name)}[/b]? [dim]{escape(args)}[/dim]"

def action_expand(self) -> None:
"""Toggle between the compact identifying arg and the full args (``e``)."""
self._expanded = not self._expanded
self.query_one("#approvaldetail", Label).update(self._detail_markup())

def action_approve(self) -> None:
self._decide("approve")

def action_auto(self) -> None:
self._decide("auto")

def action_reject(self) -> None:
self._decide("reject")


class AskScreen(ModalScreen[str]):
"""A bottom-docked prompt that relays a question from the agent and returns the answer.

In voice mode it speaks the question and takes a spoken answer; otherwise the user types.
"""

DEFAULT_CSS = """
AskScreen { align: center bottom; background: transparent; }
AskScreen #askbox {
dock: bottom; width: 1fr; height: auto;
border: round #3a3f55; background: #000000; padding: 0 1; margin: 0 1 1 1;
}
"""

def __init__(self, question: str, *, voice: _VoiceIO | None = None) -> None:
super().__init__()
self._question = question
self._voice = voice
self._answered = False

def compose(self) -> ComposeResult:
with Vertical(id="askbox"):
yield Label(f"[b]The agent asks:[/b] {escape(self._question)}")
yield Input(id="answer", placeholder="Type your answer and press Enter…")

def on_mount(self) -> None:
if (voice := self._voice) is not None:
_spawn(lambda: self._drive_by_voice(voice))

def _drive_by_voice(self, voice: _VoiceIO) -> None:
"""Speak the question and submit a spoken answer (typing still works)."""
try:
voice.speak(f"The agent asks: {self._question}")
transcript = voice.listen()
except errors.CLIError:
return
if transcript:
self.app.call_from_thread(self._answer, transcript)

def _answer(self, text: str) -> None:
"""Dismiss once with the answer, whether spoken or typed."""
if self._answered:
return
self._answered = True
self.dismiss(text)

def on_input_submitted(self, event: Input.Submitted) -> None:
self._answer(event.value)
19 changes: 8 additions & 11 deletions aai_cli/code_agent/render.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,14 @@

from collections.abc import Callable

from rich.markdown import Markdown
from rich.markup import escape

from aai_cli.code_agent.events import AssistantText, ErrorText, Event, ToolCall, ToolResult
from aai_cli.code_agent.session import Approver
from aai_cli.code_agent.summarize import summarize_call, summarize_result
from aai_cli.ui import output

# Tool output can be long; clip it for the inline transcript.
_RESULT_PREVIEW = 2000


def _format_args(args: dict[str, object]) -> str:
"""A compact one-line view of a tool call's arguments."""
return ", ".join(f"{key}={value!r}" for key, value in args.items())


class RichRenderer:
"""An :data:`~aai_cli.code_agent.session.EventSink` that prints to the Rich console."""
Expand All @@ -31,13 +25,16 @@ def __call__(self, event: Event) -> None:
# escape() dynamic content so a model/tool string with "[" can't inject Rich
# markup or raise MarkupError (matches the inline-escape convention in output.py).
if isinstance(event, AssistantText):
output.console.print(escape(event.text))
# Render as Markdown so fenced code blocks are syntax-highlighted (and lists/
# headings format) instead of showing raw ``` markers — Markdown parses its own
# syntax, not console markup, so no escape()/injection concern.
output.console.print(Markdown(event.text))
elif isinstance(event, ToolCall):
output.console.print(
f"[aai.muted]→ {escape(event.name)}({escape(_format_args(event.args))})[/aai.muted]"
f"[aai.muted]→ {escape(summarize_call(event.name, event.args))}[/aai.muted]"
)
elif isinstance(event, ToolResult):
preview = escape(event.content.strip()[:_RESULT_PREVIEW])
preview = escape(summarize_result(event.content))
output.console.print(f"[aai.muted] {escape(event.name)}: {preview}[/aai.muted]")
elif isinstance(event, ErrorText):
output.error_console.print(output.fail(escape(event.text)))
Expand Down
Loading
Loading