Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion aai_cli/agent_cascade/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
from aai_cli.agent_cascade.voices import DEFAULT_VOICE
from aai_cli.core import llm

DEFAULT_MODEL = llm.DEFAULT_MODEL
# `assembly live` defaults to a capable gateway model (override with --model); kept a
# literal rather than llm.DEFAULT_MODEL so the live agent's default is independent of the
# one-shot `assembly llm` default.
DEFAULT_MODEL = "gpt-5.1"
DEFAULT_MAX_TOKENS = llm.DEFAULT_MAX_TOKENS
# The realtime model the cascade transcribes with (same as the agent-cascade template).
DEFAULT_SPEECH_MODEL = "u3-rt-pro"
Expand Down
119 changes: 97 additions & 22 deletions aai_cli/code_agent/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@

from __future__ import annotations

import json
from collections.abc import Mapping
from typing import TYPE_CHECKING

from aai_cli.core import environments

# The gateway omits Anthropic's required ``tool_use.input`` when an OpenAI tool call's
# ``arguments`` is empty (``""`` / ``"{}"``); substitute a minimal non-empty object so the
# field is emitted. See :func:`_ensure_tool_call_arguments`.
_PLACEHOLDER_ARGUMENTS = '{"_": ""}'

if TYPE_CHECKING:
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.outputs import ChatGenerationChunk
Expand Down Expand Up @@ -40,18 +46,23 @@ def _flatten_content(messages: object) -> None:


def _hoist_tool_call_ids(chunk: object) -> None:
"""Move each streamed tool-call ``id`` from inside ``function`` up to the tool-call top level.

The AssemblyAI LLM Gateway's *streaming* ``/v1/chat/completions`` nests the tool-call
``id`` under ``function`` — ``{"function": {"id": …, "name": …}}`` — instead of at the
tool-call's top level, which is where the OpenAI streaming spec (and
``langchain_openai``, via ``id=rtc.get("id")``) reads it. Left alone, every streamed
tool call parses with a name and arguments but ``id=None``, so the reply ``ToolMessage``
fails Pydantic validation (``tool_call_id`` must be a string) and the whole turn errors
out. We move the id back up before langchain converts the chunk; the id rides only the
first delta of a call, so later argument-only deltas (no ``function.id``) are left
untouched. (The non-streaming endpoint already places the id correctly, so only the
streaming path needs this.)
"""Normalize a streamed chunk's tool-call deltas: drop blank ones, hoist nested ids.

Two AssemblyAI LLM Gateway streaming quirks, both fixed in place before langchain
converts the chunk:

1. **Spurious blank deltas.** Every streamed turn (when tools are available) starts with
an empty tool-call delta — ``{"function": {"id": "", "name": "", "arguments": ""}}``.
On a pure-text turn no real call follows, so langchain is left with a tool call whose
``name`` is ``""``; deepagents then dispatches it and the turn dies with
``Error: is not a valid tool``. We drop any delta with no name, id, or arguments
(which also harmlessly drops the gateway's empty argument-continuation deltas).
2. **Misplaced id.** The id is nested under ``function`` instead of at the tool-call top
level where the OpenAI spec and ``langchain_openai`` (``id=rtc.get("id")``) read it,
so without help every call parses with ``id=None`` and its reply ``ToolMessage`` fails
validation. We move it back up; the id rides only a call's first delta.

(The non-streaming endpoint has neither quirk, so only the streaming path needs this.)
"""
if not isinstance(chunk, dict):
return
Expand All @@ -62,11 +73,26 @@ def _hoist_tool_call_ids(chunk: object) -> None:


def _hoist_in_choice(choice: object) -> None:
"""Hoist tool-call ids within one streamed choice's delta (helper for ``_hoist_tool_call_ids``)."""
delta = choice.get("delta") if isinstance(choice, dict) else None
tool_calls = delta.get("tool_calls") if isinstance(delta, dict) else None
"""Drop blank tool-call deltas, then hoist ids, within one streamed choice's delta."""
if not isinstance(choice, dict):
return
delta = choice.get("delta")
if not isinstance(delta, dict):
return
tool_calls = delta.get("tool_calls")
if isinstance(tool_calls, list):
_hoist_call_list(tool_calls)
delta["tool_calls"] = [tc for tc in tool_calls if not _is_blank_tool_call(tc)]
_hoist_call_list(delta["tool_calls"])


def _is_blank_tool_call(tool_call: object) -> bool:
"""True for the gateway's spurious empty tool-call delta (no name, id, or arguments)."""
if not isinstance(tool_call, dict):
return False
function = tool_call.get("function")
if not isinstance(function, dict):
return False
return not function.get("name") and not function.get("id") and not function.get("arguments")


def _hoist_call_list(tool_calls: list[object]) -> None:
Expand All @@ -86,6 +112,52 @@ def _hoist_call_list(tool_calls: list[object]) -> None:
tool_call["id"] = function.pop("id")


def _ensure_tool_call_arguments(messages: object) -> None:
"""Give every empty tool-call ``arguments`` a non-empty placeholder object, in place.

The AssemblyAI LLM Gateway maps each OpenAI tool call's ``arguments`` (a JSON string)
onto Anthropic's ``tool_use.input`` object, but drops ``input`` entirely when the
arguments are empty (``""`` or ``"{}"``). Anthropic *requires* ``input`` to be present,
so replaying any argument-less tool call is rejected (400, surfaced as a 500 while
streaming) — and because the failing call sits in the conversation history, every later
turn fails too, wedging the session. We swap in a minimal non-empty object so the gateway
emits a valid ``input``. This only rewrites the request we send: the tool already ran
locally with its real (empty) arguments, and the gateway accepts the placeholder even for
tools that declare ``additionalProperties: false``. (Drop this once the gateway maps empty
arguments to ``input: {}`` itself.)
"""
if not isinstance(messages, list):
return
for message in messages:
tool_calls = message.get("tool_calls") if isinstance(message, dict) else None
if isinstance(tool_calls, list):
_fill_empty_arguments(tool_calls)


def _fill_empty_arguments(tool_calls: list[object]) -> None:
"""Replace each empty ``function.arguments`` with the placeholder (helper for the above)."""
for tool_call in tool_calls:
if not isinstance(tool_call, dict):
continue
function = tool_call.get("function")
if isinstance(function, dict) and _is_empty_arguments(function.get("arguments")):
function["arguments"] = _PLACEHOLDER_ARGUMENTS


def _is_empty_arguments(arguments: object) -> bool:
"""True when ``arguments`` is an OpenAI args string carrying no fields (``""``/``"{}"``)."""
if not isinstance(arguments, str):
return False
stripped = arguments.strip()
if not stripped:
return True
try:
parsed = json.loads(stripped)
except ValueError:
return False
return isinstance(parsed, dict) and not parsed


def build_model(
api_key: str,
*,
Expand Down Expand Up @@ -114,18 +186,21 @@ def build_model(
class _GatewayChatOpenAI(ChatOpenAI):
"""ChatOpenAI that adapts the gateway's OpenAI-incompatible quirks for langchain.

Two fix-ups, each working around a gateway response/request bug the upstream client
doesn't expect: flatten list-content messages the gateway 500s on (request side, see
:func:`_flatten_content`), and hoist each streamed tool-call ``id`` back to the
tool-call top level where langchain reads it (response side, see
:func:`_hoist_tool_call_ids`).
Three fix-ups, each working around a gateway request/response bug the upstream client
doesn't expect: flatten list-content messages the gateway 500s on and give empty
tool-call arguments a placeholder the gateway can map to ``tool_use.input`` (request
side, see :func:`_flatten_content` / :func:`_ensure_tool_call_arguments`), and hoist
each streamed tool-call ``id`` back to the tool-call top level where langchain reads it
(response side, see :func:`_hoist_tool_call_ids`).
"""

def _get_request_payload(
self, input_: object, *, stop: list[str] | None = None, **kwargs: object
) -> dict:
payload = super()._get_request_payload(input_, stop=stop, **kwargs)
_flatten_content(payload.get("messages"))
messages = payload.get("messages")
_flatten_content(messages)
_ensure_tool_call_arguments(messages)
return payload

def _convert_chunk_to_generation_chunk(
Expand Down
2 changes: 1 addition & 1 deletion aai_cli/code_agent/prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# A capable gateway model by default; override with `--model`. The gateway is the
# source of truth for what's accepted, so this is only a sensible default.
DEFAULT_MODEL = "claude-sonnet-4-6"
DEFAULT_MODEL = "gpt-5.1"
# Generous ceiling so long edits/explanations aren't clipped; the gateway only bills
# tokens actually generated, so a high cap costs nothing on short replies.
DEFAULT_MAX_TOKENS = 8192
Expand Down
82 changes: 70 additions & 12 deletions aai_cli/code_agent/skills.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
"""Import installed agent skills (notably the `assemblyai` skill) into the agent.

`assembly setup` installs the `assemblyai` skill under the coding-agent config root
(`~/.claude/skills/assemblyai/`, honoring `CLAUDE_CONFIG_DIR`). deepagents can surface
skills to the model via progressive disclosure, but its `SkillsMiddleware` reads them
through a backend — and our main file backend is confined to the working directory.
So we give skills their *own* `FilesystemBackend` rooted at the skills directory and
inject a standalone `SkillsMiddleware`, independent of the cwd-scoped file tools.
`assembly setup` installs skills under the coding-agent config root
(`~/.claude/skills/<skill>/SKILL.md`, honoring `CLAUDE_CONFIG_DIR`). deepagents can
surface skills to the model via progressive disclosure, but its `SkillsMiddleware` reads
them through a backend — and our main file backend is confined to the working directory.
So we give skills their *own* `FilesystemBackend` rooted at the skills directory.

deepagents' stock skills prompt tells the model to open each `SKILL.md` with `read_file`,
but that tool is bound to the cwd-scoped backend and so can't reach a skill living under
`~/.claude/skills` (the model just gets ``File '/aai-cli/SKILL.md' not found``). We close
that gap with a dedicated read-only `read_skill` tool bound to the skills directory, and a
prompt that points the model at it instead of `read_file`.
"""

from __future__ import annotations
Expand All @@ -17,11 +22,34 @@

if TYPE_CHECKING:
from langchain.agents.middleware import AgentMiddleware
from langchain_core.tools import BaseTool

# Mirrors aai_cli.app.coding_agent.skills_root without importing the app layer (a
# feature slice stays below it): the agent config root, overridable for tests/agents.
_CLAUDE_CONFIG_DIR = "CLAUDE_CONFIG_DIR"

READ_SKILL_TOOL_NAME = "read_skill"

# Skills prompt fragment. Must keep the three slots deepagents substitutes at runtime
# (`{skills_locations}`, `{skills_load_warnings}`, `{skills_list}`); the constructor
# raises if any is missing. The one behavioral change from deepagents' stock prompt is
# steering the model to `read_skill` — skills live outside the cwd sandbox, so the
# ordinary `read_file` tool can't open them.
_SKILLS_PROMPT = """## Skills

You have a library of skills — specialized instructions and workflows for specific tasks.

{skills_locations}{skills_load_warnings}
**Available skills:**

{skills_list}

**How to use a skill (progressive disclosure):** you see each skill's name, description, and
path above, but read its full instructions only when a skill matches the task. Read it with
the `read_skill` tool, passing the path shown above — e.g. `read_skill("/assemblyai/SKILL.md")`
— then follow what it says. Do **not** use `read_file` for these paths: skills live outside the
working directory, so only `read_skill` can reach them."""


def skills_root() -> Path:
"""Directory holding installed skills (one subdir per skill, each with SKILL.md)."""
Expand All @@ -35,12 +63,41 @@ def _has_skills(root: Path) -> bool:
return root.is_dir() and any(child.joinpath("SKILL.md").is_file() for child in root.iterdir())


def build_skills_middleware(root: Path | None = None) -> AgentMiddleware | None:
"""A ``SkillsMiddleware`` over the installed skills, or ``None`` if none are present.
def _read_skill_file(root: Path, path: str) -> str:
"""Read ``path`` (as surfaced in the skills list) from under ``root``, guarding traversal.

``path`` is the backend-virtual path shown in the prompt (e.g. ``/assemblyai/SKILL.md``),
so it is resolved relative to ``root``. A path that escapes ``root`` (``..`` segments) or
names a missing file returns an error string the model can recover from rather than raising.
"""
target = (root / path.lstrip("/")).resolve()
if not target.is_relative_to(root.resolve()):
return f"Error: '{path}' is outside the skills directory."
if not target.is_file():
return f"Error: skill file '{path}' not found."
return target.read_text(encoding="utf-8")


def build_skill_reader(root: Path) -> BaseTool:
"""Wrap :func:`_read_skill_file` as the ``read_skill`` tool, bound to ``root``."""
from langchain_core.tools import tool

@tool(READ_SKILL_TOOL_NAME)
def read_skill(path: str) -> str:
"""Read a skill's file (e.g. its SKILL.md) by the path shown in the skills list.
Use this — not read_file — for any path under the skills library."""
return _read_skill_file(root, path)

return read_skill


def build_skills(root: Path | None = None) -> tuple[AgentMiddleware, BaseTool] | None:
"""The skills ``(middleware, read_skill tool)`` pair, or ``None`` if no skills are present.

Returns ``None`` (rather than an empty middleware) so the caller simply omits it
from the stack when the user has run no `assembly setup` — the agent then starts
with no skills section instead of an empty one.
Returns ``None`` (rather than an empty middleware) so the caller simply omits both from
the stack when the user has run no `assembly setup` — the agent then starts with no skills
section and no `read_skill` tool instead of empty ones. The tool is paired with the
middleware because the prompt the middleware injects directs the model to it.
"""
root = root if root is not None else skills_root()
if not _has_skills(root):
Expand All @@ -50,4 +107,5 @@ def build_skills_middleware(root: Path | None = None) -> AgentMiddleware | None:
from deepagents.middleware.skills import SkillsMiddleware

backend = FilesystemBackend(root_dir=str(root), virtual_mode=True)
return SkillsMiddleware(backend=backend, sources=["/"])
middleware = SkillsMiddleware(backend=backend, sources=["/"], system_prompt=_SKILLS_PROMPT)
return middleware, build_skill_reader(root)
4 changes: 3 additions & 1 deletion aai_cli/code_agent/tui.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,9 @@ def _stop_spinner(self) -> None:
self.query_one("#spinner", Static).display = False

def on_worker_state_changed(self, event: Worker.StateChanged) -> None:
if event.worker.is_finished:
# Guard on is_running: a worker finishing *after* the app tears down (quit / test exit)
# would drive _finish_turn against an unmounted DOM — NoMatches on "#spinner", a flake.
if event.worker.is_finished and self.is_running:
self._finish_turn()

def _finish_turn(self) -> None:
Expand Down
12 changes: 9 additions & 3 deletions aai_cli/code_agent/voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,10 @@ def __enter__(self) -> Player:
def __exit__(self, exc_type: object, *exc: object) -> object:
"""Drain on a clean exit, abort otherwise; never suppress."""

def feed(self, pcm: bytes, sample_rate: int) -> None:
"""Play one PCM chunk, opening the output device on the first call."""
def feed(
self, pcm: bytes, sample_rate: int, *, cancelled: Callable[[], bool] | None = None
) -> None:
"""Play one PCM chunk, polling ``cancelled`` between writes to stop mid-chunk."""


def _stt_params(sample_rate: int) -> StreamingParameters:
Expand Down Expand Up @@ -219,7 +221,11 @@ def speak(self, text: str) -> None:
def feed(pcm: bytes, sample_rate: int) -> None:
if self._cancel.is_set():
_abort_readback()
player.feed(pcm, sample_rate)
# Poll cancel *during* playback too: a chunk can be seconds of audio, and
# in the TUI the only cancel signal is this flag set from another thread.
player.feed(pcm, sample_rate, cancelled=self._cancel.is_set)
if self._cancel.is_set():
_abort_readback()

self.synth_fn(self.api_key, config, on_audio=feed)
except _ReadbackInterrupted:
Expand Down
22 changes: 14 additions & 8 deletions aai_cli/commands/code/_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from aai_cli.code_agent.prompt import DEFAULT_MODEL
from aai_cli.code_agent.render import RichRenderer, make_approver
from aai_cli.code_agent.session import CodeSession, EventSink, run_repl
from aai_cli.code_agent.skills import build_skills_middleware
from aai_cli.code_agent.skills import build_skills
from aai_cli.code_agent.store import build_checkpointer
from aai_cli.code_agent.voice import (
AUDIO_ERROR_TYPES,
Expand Down Expand Up @@ -82,24 +82,30 @@ def _assemble_tools(api_key: str, opts: CodeOptions, bridge: AskBridge) -> list[


def _assemble_middlewares(opts: CodeOptions) -> list[AgentMiddleware]:
"""Skills + long-term memory middleware, in load order."""
"""The long-term memory middleware (skills are wired in :func:`_build_agent`, since the
skills middleware pairs with a tool)."""
middlewares: list[AgentMiddleware] = []
if opts.skills:
skills = build_skills_middleware()
if skills is not None:
middlewares.append(skills)
if opts.memory:
middlewares.append(build_memory_middleware())
return middlewares


def _build_agent(api_key: str, opts: CodeOptions, bridge: AskBridge) -> CompiledAgent:
"""Wire the gateway model + tools + middlewares + checkpointer into the agent."""
tools = _assemble_tools(api_key, opts, bridge)
middlewares = _assemble_middlewares(opts)
# Skills add both a middleware (the skills prompt section) and the `read_skill` tool the
# prompt directs the model to; load the middleware ahead of memory to match prior order.
skills = build_skills() if opts.skills else None
if skills is not None:
middleware, reader = skills
middlewares.insert(0, middleware)
tools.append(reader)
return build_agent(
model=build_model(api_key, model=opts.model),
root_dir=opts.root_dir.resolve(),
tools=_assemble_tools(api_key, opts, bridge),
middlewares=_assemble_middlewares(opts),
tools=tools,
middlewares=middlewares,
checkpointer=build_checkpointer(persist=opts.persist),
auto_approve=opts.auto,
)
Expand Down
Loading
Loading