AssemblyAI · alexkroman · Jun 18, 2026 · Jun 18, 2026 · aikido-pr-checks · Jun 18, 2026
diff --git a/REFERENCE.md b/REFERENCE.md
@@ -139,3 +139,27 @@ writes:
 derives that title from the transcript via the LLM Gateway once the stream ends,
 renaming the files to match (the timestamp stem is kept if the title is empty).
 The two are mutually exclusive.
+
+## Live agent tools (MCP)
+
+`assembly live` answers each spoken turn with a tool-using agent, so it can reach
+external tools mid-conversation. Out of the box it loads its built-in URL fetch,
+the AssemblyAI docs, and a curated, no-auth MCP toolset: `time` and `fetch`
+(`uvx`), `memory` and `filesystem` (`npx`, the latter rooted at the working
+directory), and an NWS-backed `weather` server.
+
+Firecrawl web search also loads when a `FIRECRAWL_API_KEY` is set; without it the
+session prints a one-line notice and runs without web search (every other default
+tool needs no key).
+
+`--mcp-config FILE` adds your own servers on top of the defaults, from a standard
+`mcpServers` JSON file — the same
+`{"mcpServers": {"name": {"command": "…", "args": […]}}}` shape Claude Desktop and
+Claude Code use. Repeat the flag to merge several files; a later file (or a config
+entry sharing a default's name) wins on a clash. Remote servers use `{"url": "…"}`
+instead of `command`/`args`.
+
+Each server is launched independently and best-effort: one that won't start (a
+missing `npx`/`uvx`, an offline host) drops only its own tools, so a single broken
+tool never sinks the session. MCP tools are a live-run feature and are not
+reflected in `--show-code` output.
diff --git a/aai_cli/agent_cascade/brain.py b/aai_cli/agent_cascade/brain.py
@@ -22,7 +22,7 @@
 from aai_cli.agent_cascade.config import CascadeConfig
 from aai_cli.code_agent.agent import CompiledAgent
 from aai_cli.code_agent.fetch_tool import FETCH_TOOL_NAME
-from aai_cli.code_agent.web_search import WEB_SEARCH_TOOL_NAME
+from aai_cli.code_agent.firecrawl_search import WEB_SEARCH_TOOL_NAME
 
 if TYPE_CHECKING:
     from langchain_core.tools import BaseTool
@@ -74,15 +74,35 @@ def _tool_capabilities(tools: Sequence[BaseTool]) -> list[str]:
     return capabilities
 
 
-def build_system_prompt(persona: str, *, tools: Sequence[BaseTool]) -> str:
+def _extra_capability(extra_tools: Sequence[BaseTool]) -> str | None:
+    """The spoken-capability phrase for user-configured MCP tools, listing them by name.
+
+    The deepagents graph already shows the model each tool's schema, so this only has to
+    name the tools so the guidance doesn't claim "no external tools" when MCP tools are
+    bound — and so the model knows to reach for them.
+    """
+    names = sorted(tool.name for tool in extra_tools)
+    if not names:
+        return None
+    return f"use your connected tools ({', '.join(names)})"
+
+
+def build_system_prompt(
+    persona: str, *, tools: Sequence[BaseTool], extra_tools: Sequence[BaseTool] = ()
+) -> str:
     """The live agent's system prompt: the user's persona plus tool guidance.
 
-    The guidance is tailored to ``tools`` so the model is only told about capabilities it
-    actually has — advertising a missing tool (web search without a ``TAVILY_API_KEY``) made
-    the agent announce an action it then couldn't take, leaving the turn hanging with no
-    answer. With no tools at all the model is told to answer from its own knowledge.
+    The guidance is tailored to the bound tools so the model is only told about
+    capabilities it actually has — advertising a missing tool (web search without a
+    ``TAVILY_API_KEY``) made the agent announce an action it then couldn't take, leaving
+    the turn hanging with no answer. ``tools`` are the built-in legs (web search, URL
+    fetch, AssemblyAI docs); ``extra_tools`` are user-configured MCP tools, advertised
+    generically by name. With no tools at all the model answers from its own knowledge.
     """
     capabilities = _tool_capabilities(tools)
+    extra = _extra_capability(extra_tools)
+    if extra is not None:
+        capabilities.append(extra)
     if not capabilities:
         return f"{persona}\n\n{_NO_TOOLS_GUIDANCE}"
     guidance = (
@@ -100,12 +120,12 @@ def build_live_tools() -> list[BaseTool]:
     All three are reused from the coding agent's tool modules. Unlike there they are
     *not* approval-gated — a spoken turn can't wait for a keyboard confirmation, so the
     live agent only gets read-only tools and runs them automatically. Web search is
-    present only when ``TAVILY_API_KEY`` is set; the docs MCP is best-effort (an empty
+    present only when ``FIRECRAWL_API_KEY`` is set; the docs MCP is best-effort (an empty
     list when the host is unreachable), so neither blocks a session.
     """
     from aai_cli.code_agent.docs_mcp import load_docs_tools
     from aai_cli.code_agent.fetch_tool import build_fetch_tool
-    from aai_cli.code_agent.web_search import build_web_search_tool
+    from aai_cli.code_agent.firecrawl_search import build_web_search_tool
 
     tools: list[BaseTool] = [build_fetch_tool()]
     search = build_web_search_tool()
@@ -116,27 +136,36 @@ def build_live_tools() -> list[BaseTool]:
 
 
 def build_graph(
-    api_key: str, config: CascadeConfig, *, tools: Sequence[BaseTool] | None = None
+    api_key: str,
+    config: CascadeConfig,
+    *,
+    tools: Sequence[BaseTool] | None = None,
+    mcp_tools: Sequence[BaseTool] | None = None,
 ) -> CompiledAgent:
     """Compile the deepagents graph for one live session over the gateway model.
 
     Reuses the coding agent's gateway-bound ``ChatOpenAI`` (so the live agent can only
     ever reach AssemblyAI), threading the cascade's ``--max-tokens``/``--llm-config``
-    through it. ``tools`` defaults to :func:`build_live_tools`; tests pass an explicit
-    (possibly empty) list to skip the network-touching docs probe.
+    through it. ``tools`` defaults to :func:`build_live_tools`; ``mcp_tools`` defaults to
+    the tools of the servers in ``config.mcp_servers``. The two are kept apart so the
+    system prompt advertises the built-in legs and the MCP tools differently, but the
+    model is bound to both. Tests pass explicit (possibly empty) lists to skip the
+    network-touching docs/MCP probes.
     """
     from deepagents import create_deep_agent
 
+    from aai_cli.agent_cascade.mcp_tools import load_mcp_tools
     from aai_cli.code_agent.model import build_model
 
     model = build_model(
         api_key, model=config.model, max_tokens=config.max_tokens, extra=config.llm_extra
     )
-    resolved = build_live_tools() if tools is None else list(tools)
+    builtin = build_live_tools() if tools is None else list(tools)
+    extra = load_mcp_tools(config.mcp_servers) if mcp_tools is None else list(mcp_tools)
     return create_deep_agent(
         model=model,
-        tools=resolved,
-        system_prompt=build_system_prompt(config.system_prompt, tools=resolved),
+        tools=builtin + extra,
+        system_prompt=build_system_prompt(config.system_prompt, tools=builtin, extra_tools=extra),
     )
 
 

diff --git a/aai_cli/agent_cascade/config.py b/aai_cli/agent_cascade/config.py
@@ -43,6 +43,11 @@ class CascadeConfig:
     llm_extra: Mapping[str, object] = field(default_factory=dict[str, object])
     # Extra streaming-TTS query params (the --tts-config escape hatch).
     tts_extra: Mapping[str, str] = field(default_factory=dict[str, str])
+    # MCP servers (name -> launch spec) whose tools the deepagents brain can call. Empty
+    # by default; populated from --mcp-config files and/or the --demo-tools curated set.
-    # by default; populated from --mcp-config files and/or the --demo-tools curated set.
+    # by default; populated from --mcp-config files.
-    # by default; populated from --mcp-config files and/or the --demo-tools curated set.
+    # by default; populated from --mcp-config files.
+    mcp_servers: Mapping[str, Mapping[str, object]] = field(
+        default_factory=dict[str, Mapping[str, object]]
+    )
     # Whether STT formats finalized turns. The reply trigger waits for the formatted
     # turn when on; with it off, an unformatted end-of-turn is the cue instead.
     format_turns: bool = True
diff --git a/aai_cli/agent_cascade/mcp_tools.py b/aai_cli/agent_cascade/mcp_tools.py
@@ -0,0 +1,146 @@
+"""Load tools from user-configured MCP servers for the `assembly live` agent.
+
+The live voice agent's brain is a deepagents graph, so any Model Context Protocol
+server's tools can be threaded into it through ``langchain-mcp-adapters`` — the same
+adapter `docs_mcp.py` uses for the hosted AssemblyAI docs. This lets a spoken
+conversation reach real tools (clock, weather, memory, a notes folder, …), bringing
+`assembly live` toward Gemini-Live / ChatGPT-voice parity.
+
+Two entry points feed the brain:
+
+- :func:`default_servers` returns a curated, zero/low-auth set (time, fetch, memory,
+  filesystem, weather) that every live session loads out of the box.
+- :func:`parse_mcp_config` reads one or more standard ``mcpServers`` JSON files — the
+  exact shape Claude Desktop / Claude Code use — so an existing config drops in
+  unchanged and can extend or override the defaults.
+
+Launching a server is **best-effort per server**: a missing ``npx``/``uvx`` or an
+offline run skips that one server (the others still load) rather than aborting the
+session — a single broken tool can't sink a live demo.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+from collections.abc import Callable, Mapping, Sequence
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from aai_cli.core import jsonshape
+from aai_cli.core.errors import UsageError
+
+if TYPE_CHECKING:
+    from langchain_core.tools import BaseTool
+    from langchain_mcp_adapters.sessions import Connection
+
+# One MCP server's launch spec, as it appears under "mcpServers" in a standard config:
+# stdio servers carry {command, args, env}; remote servers carry {url}.
+ServerSpec = Mapping[str, object]
+# A loader maps (server name, adapter connection dict) -> the server's tools. Injected in
+# tests so the per-server orchestration runs without subprocesses or sockets.
+Loader = Callable[[str, "Connection"], "list[BaseTool]"]
+
+
+def default_servers(filesystem_root: Path) -> dict[str, ServerSpec]:
+    """The curated server set every live session loads: zero/low-auth, fast, speakable.
+
+    Every entry is a published reference server runnable with no API key:
+    ``time``/``fetch`` over ``uvx`` (PyPI), ``memory``/``filesystem`` over ``npx`` (npm),
+    and an NWS-backed ``weather`` server. ``filesystem`` is rooted at ``filesystem_root``
+    (the working directory) so "summarize my notes file" stays scoped to one folder.
+    """
+    return {
+        "time": {"command": "uvx", "args": ["mcp-server-time"]},
+        "fetch": {"command": "uvx", "args": ["mcp-server-fetch"]},
+        "memory": {"command": "npx", "args": ["-y", "@modelcontextprotocol/server-memory"]},
+        "filesystem": {
+            "command": "npx",
+            "args": ["-y", "@modelcontextprotocol/server-filesystem", str(filesystem_root)],
+        },
+        "weather": {"command": "npx", "args": ["-y", "@h1deya/mcp-server-weather"]},
+    }
+
+
+def parse_mcp_config(paths: Sequence[Path]) -> dict[str, ServerSpec]:
+    """Merge the ``mcpServers`` maps from one or more standard MCP config JSON files.
+
+    Each file must be ``{"mcpServers": {name: spec, …}}`` (the Claude Desktop / Claude
+    Code shape). Later files win on a name clash. A malformed file, a missing
+    ``mcpServers`` key, or a spec with neither ``command`` nor ``url`` is a usage error,
+    surfaced before any audio device opens.
+    """
+    servers: dict[str, ServerSpec] = {}
+    for path in paths:
+        try:
+            data = jsonshape.as_mapping(json.loads(path.read_text(encoding="utf-8")))
+        except (OSError, json.JSONDecodeError) as exc:
+            raise UsageError(f"Could not read MCP config {str(path)!r}: {exc}") from exc
+        entries = jsonshape.as_mapping(data.get("mcpServers")) if data is not None else None
+        if entries is None:
+            raise UsageError(
+                f"MCP config {str(path)!r} has no 'mcpServers' object.",
+                suggestion='Expected {"mcpServers": {"name": {"command": "…"}}}.',
+            )
+        for name, spec in entries.items():
+            servers[name] = _validate_spec(name, spec)
+    return servers
+
+
+def _validate_spec(name: str, spec: object) -> dict[str, object]:
+    """Return the spec as a mapping, or reject one naming neither a ``command`` nor ``url``."""
+    mapping = jsonshape.as_mapping(spec)
+    if mapping is None or ("command" not in mapping and "url" not in mapping):
+        raise UsageError(
+            f"MCP server {name!r} needs a 'command' or 'url'.",
+            suggestion='e.g. {"command": "uvx", "args": ["mcp-server-time"]}.',
+        )
+    return mapping
+
+
+def _to_connection(spec: ServerSpec) -> Connection:
+    """Translate a standard ``mcpServers`` spec into a langchain-mcp-adapters connection.
+
+    A ``url`` spec becomes a ``streamable_http`` transport; otherwise it's a ``stdio``
+    transport launched from ``command``/``args`` (passing ``env`` through when present).
+    """
+    if "url" in spec:
+        return {"transport": "streamable_http", "url": str(spec["url"])}
+    args = [str(arg) for arg in jsonshape.object_list(spec.get("args"))]
+    env_map = jsonshape.as_mapping(spec.get("env"))
+    env = {str(k): str(v) for k, v in env_map.items()} if env_map is not None else None
+    return {"transport": "stdio", "command": str(spec["command"]), "args": args, "env": env}
+
+
+def _load_server(name: str, conn: Connection) -> list[BaseTool]:
+    """Connect to one MCP server and return its tools (drives the async adapter)."""
+    from langchain_mcp_adapters.client import MultiServerMCPClient
+
+    async def _fetch() -> list[BaseTool]:
+        client = MultiServerMCPClient({name: conn})
+        return await client.get_tools()
+
+    return asyncio.run(_fetch())
+
+
+def _safe_load(loader: Loader, name: str, spec: ServerSpec) -> list[BaseTool]:
+    """One server's tools, or ``[]`` if it won't start — so a failure is never fatal."""
+    try:
+        return loader(name, _to_connection(spec))
+    except Exception:
+        return []
+
+
+def load_mcp_tools(
+    servers: Mapping[str, ServerSpec], *, loader: Loader = _load_server
+) -> list[BaseTool]:
+    """Load the tools from every configured MCP server, skipping any that fail to start.
+
+    Each server is launched independently so one unreachable server (npx not installed,
+    an offline host) drops only its own tools — the rest still load. ``loader`` is the
+    only network/subprocess seam, injected in tests.
+    """
+    tools: list[BaseTool] = []
+    for name, spec in servers.items():
+        tools.extend(_safe_load(loader, name, spec))
+    return tools
diff --git a/aai_cli/code_agent/firecrawl_search.py b/aai_cli/code_agent/firecrawl_search.py
@@ -0,0 +1,37 @@
+"""Optional Firecrawl web search for the live voice agent.
+
+Firecrawl grounds the agent with live web search, enabled when a ``FIRECRAWL_API_KEY``
+is present in the environment. Search is read-only, so it is *not* gated behind the
+approval flow. With no key set we simply omit the tool (the agent still has its URL
+fetch and the AssemblyAI docs MCP), rather than erroring.
+
+This mirrors ``web_search.py`` (Tavily) but reuses Firecrawl's official LangChain
+integration; the live agent prefers it as its default search tool.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from aai_cli.core import env
+
+if TYPE_CHECKING:
+    from langchain_core.tools import BaseTool
+
+# Firecrawl's SDK reads this from the environment; we gate on its presence so we never
+# hand the agent a search tool that will fail on first use for lack of a key.
+FIRECRAWL_API_KEY_ENV = "FIRECRAWL_API_KEY"
+
+# The name ``FirecrawlSearch`` registers itself under. The prompt builder detects
+# web-search availability by this name, so a test pins it against the tool.
+WEB_SEARCH_TOOL_NAME = "firecrawl_search"
+
+
+def build_web_search_tool() -> BaseTool | None:
+    """The Firecrawl web-search tool, or ``None`` when no ``FIRECRAWL_API_KEY`` is set."""
+    if not env.get(FIRECRAWL_API_KEY_ENV):
+        return None
+
+    from langchain_firecrawl import FirecrawlSearch
+
+    return FirecrawlSearch()
diff --git a/aai_cli/commands/agent_cascade/__init__.py b/aai_cli/commands/agent_cascade/__init__.py
@@ -25,6 +25,7 @@
 _PANEL_STT = "Speech-to-text"
 _PANEL_LLM = "Language model"
 _PANEL_TTS = "Text-to-speech"
+_PANEL_TOOLS = "Tools"
 
 app = typer.Typer()
 
@@ -56,6 +57,10 @@ def _emit_voice_list(_state: AppState, json_mode: bool) -> None:
                 "Give the agent a persona",
                 'assembly --sandbox live --system-prompt "You are a terse pirate."',
             ),
+            (
+                "Add your own MCP servers on top of the defaults",
+                "assembly --sandbox live --mcp-config ~/.config/mcp/servers.json",
+            ),
             ("See available voices", "assembly --sandbox live --list-voices"),
             (
                 "Print equivalent Python instead of running",
@@ -154,6 +159,14 @@ def live(
         dir_okay=False,
     ),
     greeting: str = typer.Option(DEFAULT_GREETING, "--greeting", help="Spoken greeting"),
+    mcp_config: list[Path] | None = typer.Option(
+        None,
+        "--mcp-config",
+        help='Extra MCP servers config JSON ({"mcpServers": {…}}) on top of the defaults (repeatable)',
+        exists=True,
+        dir_okay=False,
+        rich_help_panel=_PANEL_TOOLS,
+    ),
     device: int | None = typer.Option(None, "--device", help="Microphone device index"),
     list_voices: bool = typer.Option(False, "--list-voices", help="Print known voices and exit"),
     json_out: bool = options.json_option("Emit newline-delimited JSON events"),
@@ -187,8 +200,12 @@ def live(
     This only runs a conversation in the terminal — it writes no code. To build
     an agent-cascade app, run 'assembly init agent-cascade' instead.
 
-    Web search needs a TAVILY_API_KEY in the environment; without it the agent
-    keeps its URL-fetch and docs tools.
+    By default the agent loads a curated, no-auth MCP toolset (time, fetch,
+    memory, filesystem, weather) alongside its built-in URL fetch and AssemblyAI
+    docs. Firecrawl web search also loads when a FIRECRAWL_API_KEY is set (you'll
+    get a one-line notice when it isn't). Add your own servers with --mcp-config,
+    pointing at any standard mcpServers JSON file. A server that won't start is
+    skipped, so one broken tool never sinks the session.
     """
 
     if list_voices:
@@ -214,6 +231,7 @@ def live(
         llm_config=tuple(llm_config or ()),
         language=language,
         tts_config=tuple(tts_config or ()),
+        mcp_config=tuple(mcp_config or ()),
         show_code=show_code,
     )
     run_with_options(ctx, agent_cascade_exec.run_agent_cascade, opts, json=json_out)