AssemblyAI · alexkroman · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 18, 2026
diff --git a/aai_cli/code_agent/events.py b/aai_cli/code_agent/events.py
@@ -21,6 +21,19 @@ class AssistantText:
     text: str
 
 
+@dataclass(frozen=True)
+class AssistantDelta:
+    """One streamed token of the in-progress reply, shown live then superseded by AssistantText.
+
+    Emitted from langgraph's per-token ``messages`` stream so the front-end can render the
+    reply as it's generated; the authoritative full text still arrives as an AssistantText
+    when the step lands, so a consumer that ignores deltas (the headless renderer) loses
+    nothing.
+    """
+
+    text: str
+
+
 @dataclass(frozen=True)
 class ToolCall:
     """The agent's request to run a tool (announced when not gated by approval)."""
@@ -44,7 +57,21 @@ class ErrorText:
     text: str
 
 
-Event = AssistantText | ToolCall | ToolResult | ErrorText
+Event = AssistantText | AssistantDelta | ToolCall | ToolResult | ErrorText
+
+
+def assistant_delta(payload: object) -> AssistantDelta | None:
+    """Extract a streaming assistant-text token from a ``messages``-mode stream payload.
+
+    langgraph's ``messages`` mode yields ``(message_chunk, metadata)``; we surface only the
+    AI message's text tokens (tool-call requests and tool results carry no prose, and other
+    message kinds aren't the assistant talking), so the live region streams just the reply.
+    """
+    chunk = payload[0] if isinstance(payload, tuple) and payload else payload
+    if type(chunk).__name__ not in ("AIMessage", "AIMessageChunk"):
+        return None
+    text = _text_of(getattr(chunk, "content", ""))
+    return AssistantDelta(text) if text else None
 
 
 def _text_of(content: object) -> str:

diff --git a/aai_cli/code_agent/messages.py b/aai_cli/code_agent/messages.py
@@ -0,0 +1,110 @@
+"""Mounted transcript widgets for the coding-agent TUI.
+
+The transcript is a ``VerticalScroll`` of these widgets rather than an append-only ``RichLog``,
+which buys two things deepagents-code has: the assistant reply updates *in place* as it streams
+(no separate live region), and a tool's output is a collapsible row — a clipped preview that
+expands to the full output on Ctrl+O or a click.
+
+Dynamic content (model/tool/user strings) is wrapped in ``rich.text.Text`` so it's shown
+literally — Text doesn't parse console markup, so a stray ``[`` can't raise or inject styling.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from rich.markdown import Markdown
+from rich.text import Text
+from textual.widgets import Static
+
+from aai_cli.code_agent.summarize import summarize_call, summarize_result
+
+_DIM = "#8a8f98"  # muted gray for tool lines / notes
+_ERROR = "#f04438"
+
+
+class Note(Static):
+    """A dim one-line transcript aside (``cancelling…``, ``copied…``, ``voice off…``)."""
+
+    def __init__(self, text: str) -> None:
+        super().__init__(Text(text, style=_DIM))
+
+
+class UserMessage(Static):
+    """The echoed user prompt, with a top margin so each turn is visually separated."""
+
+    DEFAULT_CSS = "UserMessage { margin-top: 1; }"
+
+    def __init__(self, text: str) -> None:
+        super().__init__(Text(f"» {text}", style="bold #38bdf8"))
+
+
+class AssistantMessage(Static):
+    """The assistant's reply: streams plain text token-by-token, then renders as Markdown."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._tokens: list[str] = []  # accumulate tokens, not str +=, to avoid quadratic growth
+
+    @property
+    def text(self) -> str:
+        """The reply text streamed so far (used to finalize a cancelled generation)."""
+        return "".join(self._tokens)
+
+    def stream(self, delta: str) -> None:
+        """Append a streamed token and repaint as plain text (cheap; no per-token markdown)."""
+        self._tokens.append(delta)
+        self.update(Text(self.text))
+
+    def finalize(self, text: str) -> None:
+        """Replace the streamed text with the authoritative reply, rendered as Markdown."""
+        self._tokens = [text]
+        self.update(Markdown(text))
+
+
+class ToolCallLine(Static):
+    """A compact tool-call line, e.g. ``→ write_file(app.py)``."""
+
+    def __init__(self, name: str, args: Mapping[str, object]) -> None:
+        super().__init__(Text(f"→ {summarize_call(name, args)}", style=_DIM))
+
+
+class ErrorMessage(Static):
+    """A failed turn, shown instead of crashing the UI."""
+
+    def __init__(self, text: str) -> None:
+        super().__init__(Text(f"✗ {text}", style=_ERROR))
+
+
+class ToolOutput(Static):
+    """A tool's output: a clipped preview that expands to the full content (Ctrl+O / click)."""
+
+    def __init__(self, name: str, content: str) -> None:
+        super().__init__()
+        self._name = name
+        self._full = content.strip()
+        self._preview = summarize_result(content)
+        self._expandable = self._preview != self._full  # nothing to expand when it fits already
+        self._expanded = False
+
+    def on_mount(self) -> None:
+        self._repaint()
+
+    def on_click(self) -> None:
+        self.toggle()
+
+    def toggle(self) -> None:
+        """Flip between the clipped preview and the full output (no-op when it all fits)."""
+        if not self._expandable:
+            return
+        self._expanded = not self._expanded
+        self._repaint()
+
+    def _repaint(self) -> None:
+        body = self._full if self._expanded else self._preview
+        line = Text(f"  {self._name}: ", style=_DIM)
+        line.append(body, style=_DIM)
+        if self._expandable:
+            hint = " (Ctrl+O to collapse)" if self._expanded else " (Ctrl+O to expand)"
+            line.append(hint, style=f"{_DIM} italic")
+        self.update(line)
diff --git a/aai_cli/code_agent/modals.py b/aai_cli/code_agent/modals.py
@@ -0,0 +1,202 @@
+"""Bottom-docked modal screens for the coding-agent TUI: tool approval and agent questions.
+
+Split out of `tui.py` to keep each module under the file-length gate. Both are transparent
+``ModalScreen``s docked at the bottom, so the transcript stays visible above them (see the
+``ModalScreen { background: transparent }`` rule in :class:`~aai_cli.code_agent.tui.CodeAgentApp`).
+
+In voice mode each modal is also **spoken and voice-answerable**: when constructed with a
+``voice`` IO it speaks the prompt and listens for a spoken reply (approve / auto / reject, or a
+free-text answer), off the UI thread. The keyboard path always stays available as a fallback.
+"""
+
+from __future__ import annotations
+
+import re
+import threading
+from typing import TYPE_CHECKING, ClassVar
+
+from rich.markup import escape
+from textual.app import ComposeResult
+from textual.containers import Vertical
+from textual.screen import ModalScreen
+from textual.widgets import Input, Label
+
+from aai_cli.code_agent import banner, risk
+from aai_cli.code_agent.summarize import describe_args, full_args
+from aai_cli.core import errors
+
+if TYPE_CHECKING:
+    from collections.abc import Callable, Mapping
+
+    from aai_cli.code_agent.voice_ui import _VoiceIO
+
+
+def _spawn(target: Callable[[], None]) -> None:
+    """Run ``target`` on a daemon thread — the voice legs block, so they stay off the UI thread."""
+    threading.Thread(target=target, daemon=True).start()  # pragma: no mutate
+
+
+# Spoken-answer vocabulary. "auto" wins first (it implies approval); an unclear answer falls
+# back to "reject" — the same safe default as the keyboard, so a tool never runs on a guess.
+_REJECT_WORDS = frozenset({"no", "reject", "deny", "stop", "cancel", "nope", "nah"})
+_APPROVE_WORDS = frozenset({"yes", "approve", "yeah", "yep", "yup", "sure", "ok", "okay"})
+
+
+def approval_from_speech(text: str) -> str:
+    """Map a spoken reply to ``"approve"`` / ``"auto"`` / ``"reject"`` (unclear → reject)."""
+    lowered = text.lower()
+    words = set(re.findall(r"[a-z]+", lowered))
+    if "auto" in lowered or "always" in lowered:
+        return "auto"
+    if words & _REJECT_WORDS or "don't" in lowered or "do not" in lowered:
+        return "reject"
+    if words & _APPROVE_WORDS or "go ahead" in lowered or "do it" in lowered:
+        return "approve"
+    return "reject"
+
+
+class ApprovalScreen(ModalScreen[str]):
+    """A compact, bottom-docked prompt to approve/auto-approve/reject one tool call.
+
+    Keyboard ``y / a / n`` (and ``e`` to expand the args); in voice mode it also speaks the
+    prompt and accepts a spoken approve/auto/reject. The transparent background leaves the
+    transcript visible, and a risky call (``rm -rf``, an internal fetch) carries a warning.
+    """
+
+    DEFAULT_CSS = """
+    ApprovalScreen { align: center bottom; background: transparent; }
+    ApprovalScreen #approvalbox {
+        dock: bottom; width: 1fr; height: auto;
+        border: round #f59e0b; background: #000000; padding: 0 1; margin: 0 1 1 1;
+    }
+    ApprovalScreen #approvalbox Label { height: auto; }
+    """
+    BINDINGS: ClassVar = [
+        ("y", "approve", "Approve"),
+        ("a", "auto", "Auto-approve"),
+        ("n", "reject", "Reject"),
+        ("e", "expand", "Expand"),
+    ]
+
+    def __init__(
+        self, name: str, args: Mapping[str, object], *, voice: _VoiceIO | None = None
+    ) -> None:
+        super().__init__()
+        self._tool_name = name  # not _name: that shadows Textual Widget's str|None attr
+        self._args = args
+        self._expanded = False  # toggled by `e`; collapsed (one-line) by default
+        self._voice = voice  # when set, the prompt is spoken and a spoken answer is accepted
+        self._answered = False  # guards against a voice answer and a keypress both dismissing
+
+    def compose(self) -> ComposeResult:
+        with Vertical(id="approvalbox"):
+            warning = risk.risk_warning(self._tool_name, self._args)
+            if warning:
+                yield Label(f"[b #f04438]⚠ {escape(warning)}[/]", id="approvalwarn")
+            yield Label(self._detail_markup(), id="approvaldetail")
+            yield Label(
+                f"[b #22c55e]y[/] approve   [b {banner.BRAND_HEX}]a[/] auto-approve   "
+                "[b #f04438]n[/] reject   [b]e[/] expand"
+            )
+
+    def on_mount(self) -> None:
+        if (voice := self._voice) is not None:  # drive the decision by voice, off the UI thread
+            _spawn(lambda: self._drive_by_voice(voice))
+
+    def _drive_by_voice(self, voice: _VoiceIO) -> None:
+        """Speak the prompt and accept a spoken approve/auto/reject (keyboard still works)."""
+        try:
+            voice.speak(self._spoken_prompt())
+            transcript = voice.listen()
+        except errors.CLIError:
+            return  # mic/STT failed: leave the keyboard hint as the way to answer
+        if transcript:  # silence (None) must not auto-reject a tool — wait for speech or a key
+            self.app.call_from_thread(self._decide, approval_from_speech(transcript))
+
+    def _spoken_prompt(self) -> str:
+        """The read-aloud version of the prompt: the tool, its arg, any warning, the options."""
+        parts = [f"Run {self._tool_name}."]
+        detail = describe_args(self._args)
+        if detail:
+            parts.append(f"{detail}.")
+        warning = risk.risk_warning(self._tool_name, self._args)
+        if warning:
+            parts.append(f"Warning: {warning}")
+        parts.append("Say approve, auto-approve, or reject.")
+        return " ".join(parts)
+
+    def _decide(self, decision: str) -> None:
+        """Dismiss once, whether the answer came by spoken reply or keypress."""
+        if self._answered:
+            return
+        self._answered = True
+        self.dismiss(decision)
+
+    def _detail_markup(self) -> str:
+        """The 'Run tool X?' line — the compact arg, or the full args when expanded."""
+        args = full_args(self._args) if self._expanded else describe_args(self._args)
+        return f"Run tool [b]{escape(self._tool_name)}[/b]?  [dim]{escape(args)}[/dim]"
+
+    def action_expand(self) -> None:
+        """Toggle between the compact identifying arg and the full args (``e``)."""
+        self._expanded = not self._expanded
+        self.query_one("#approvaldetail", Label).update(self._detail_markup())
+
+    def action_approve(self) -> None:
+        self._decide("approve")
+
+    def action_auto(self) -> None:
+        self._decide("auto")
+
+    def action_reject(self) -> None:
+        self._decide("reject")
+
+
+class AskScreen(ModalScreen[str]):
+    """A bottom-docked prompt that relays a question from the agent and returns the answer.
+
+    In voice mode it speaks the question and takes a spoken answer; otherwise the user types.
+    """
+
+    DEFAULT_CSS = """
+    AskScreen { align: center bottom; background: transparent; }
+    AskScreen #askbox {
+        dock: bottom; width: 1fr; height: auto;
+        border: round #3a3f55; background: #000000; padding: 0 1; margin: 0 1 1 1;
+    }
+    """
+
+    def __init__(self, question: str, *, voice: _VoiceIO | None = None) -> None:
+        super().__init__()
+        self._question = question
+        self._voice = voice
+        self._answered = False
+
+    def compose(self) -> ComposeResult:
+        with Vertical(id="askbox"):
+            yield Label(f"[b]The agent asks:[/b] {escape(self._question)}")
+            yield Input(id="answer", placeholder="Type your answer and press Enter…")
+
+    def on_mount(self) -> None:
+        if (voice := self._voice) is not None:
+            _spawn(lambda: self._drive_by_voice(voice))
+
+    def _drive_by_voice(self, voice: _VoiceIO) -> None:
+        """Speak the question and submit a spoken answer (typing still works)."""
+        try:
+            voice.speak(f"The agent asks: {self._question}")
+            transcript = voice.listen()
+        except errors.CLIError:
+            return
+        if transcript:
+            self.app.call_from_thread(self._answer, transcript)
+
+    def _answer(self, text: str) -> None:
+        """Dismiss once with the answer, whether spoken or typed."""
+        if self._answered:
+            return
+        self._answered = True
+        self.dismiss(text)
+
+    def on_input_submitted(self, event: Input.Submitted) -> None:
+        self._answer(event.value)
diff --git a/aai_cli/code_agent/render.py b/aai_cli/code_agent/render.py
@@ -9,20 +9,14 @@
 
 from collections.abc import Callable
 
+from rich.markdown import Markdown
 from rich.markup import escape
 
 from aai_cli.code_agent.events import AssistantText, ErrorText, Event, ToolCall, ToolResult
 from aai_cli.code_agent.session import Approver
+from aai_cli.code_agent.summarize import summarize_call, summarize_result
 from aai_cli.ui import output
 
-# Tool output can be long; clip it for the inline transcript.
-_RESULT_PREVIEW = 2000
-
-
-def _format_args(args: dict[str, object]) -> str:
-    """A compact one-line view of a tool call's arguments."""
-    return ", ".join(f"{key}={value!r}" for key, value in args.items())
-
 
 class RichRenderer:
     """An :data:`~aai_cli.code_agent.session.EventSink` that prints to the Rich console."""
@@ -31,13 +25,16 @@ def __call__(self, event: Event) -> None:
         # escape() dynamic content so a model/tool string with "[" can't inject Rich
         # markup or raise MarkupError (matches the inline-escape convention in output.py).
         if isinstance(event, AssistantText):
-            output.console.print(escape(event.text))
+            # Render as Markdown so fenced code blocks are syntax-highlighted (and lists/
+            # headings format) instead of showing raw ``` markers — Markdown parses its own
+            # syntax, not console markup, so no escape()/injection concern.
+            output.console.print(Markdown(event.text))
         elif isinstance(event, ToolCall):
             output.console.print(
-                f"[aai.muted]→ {escape(event.name)}({escape(_format_args(event.args))})[/aai.muted]"
+                f"[aai.muted]→ {escape(summarize_call(event.name, event.args))}[/aai.muted]"
             )
         elif isinstance(event, ToolResult):
-            preview = escape(event.content.strip()[:_RESULT_PREVIEW])
+            preview = escape(summarize_result(event.content))
             output.console.print(f"[aai.muted]  {escape(event.name)}: {preview}[/aai.muted]")
         elif isinstance(event, ErrorText):
             output.error_console.print(output.fail(escape(event.text)))