From e50dfed18289b7f89640713ca171df8836f873d4 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 18 Jun 2026 20:09:30 +0000
Subject: [PATCH] code agent: switch web search to Firecrawl, drop Tavily

The coding agent (`assembly code`) now shares the same Firecrawl-backed
web search tool as the live voice agent, gated on FIRECRAWL_API_KEY. The
Tavily integration is removed: delete code_agent/web_search.py, drop the
langchain-tavily dependency, and point --web at FIRECRAWL_API_KEY.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_019AiP3DXiv1c9QJh4Sa6fFZ
---
 aai_cli/AGENTS.md                             |  2 +-
 aai_cli/code_agent/fetch_tool.py              |  2 +-
 aai_cli/code_agent/firecrawl_search.py        |  6 +--
 aai_cli/code_agent/web_search.py              | 37 -------------------
 aai_cli/commands/code/__init__.py             |  2 +-
 aai_cli/commands/code/_exec.py                |  9 +++--
 pyproject.toml                                |  1 -
 .../test_snapshots_help_run.ambr              |  4 +-
 tests/test_code_agent.py                      | 12 +++---
 tests/test_code_command.py                    |  4 +-
 uv.lock                                       | 17 ---------
 11 files changed, 21 insertions(+), 75 deletions(-)
 delete mode 100644 aai_cli/code_agent/web_search.py

diff --git a/aai_cli/AGENTS.md b/aai_cli/AGENTS.md
index 8a27f3c..2a4e848 100644
--- a/aai_cli/AGENTS.md
+++ b/aai_cli/AGENTS.md
@@ -153,7 +153,7 @@ heavily-reworked commands with long bodies; small commands keep the inline
 - **`agent/`** — full-duplex voice agent (mic in, TTS out via `voices.py`).
 - **`agent_cascade/`** + `commands/agent_cascade/` — `assembly agent-cascade`: the same live terminal conversation as `assembly agent`, but **client-orchestrated** — `engine.run_cascade` wires Streaming STT → the LLM Gateway → streaming TTS itself instead of talking to the Voice Agent endpoint, mirroring what the `agent-cascade` `assembly init` template does server-side. **Sandbox-only** (streaming TTS has no prod host; guarded via `tts.session.require_available`). Reuses the agent slice's `DuplexAudio`/`AgentRenderer` and `core.client.stream_audio`/`core.llm.complete`/`tts.session.synthesize`; the three network legs are injected through `engine.CascadeDeps` (the `tts/session.py` seam) so the cascade — greeting, per-sentence TTS, barge-in, history window — is unit-tested against fakes with no sockets/mic/speaker. The LLM leg is a deepagents graph (`brain.py`); under `-v` (`debuglog.active()`) `brain._run_graph` *streams* that graph instead of `invoke`-ing it and logs each tool call/result/interim line as it lands (reusing `code_agent.events.message_events`), so a spoken turn that stalls mid-tool is debuggable — plain `invoke` runs the whole loop internally and `-v` would otherwise show only the httpx lines.
 - **`tts/`** + `commands/speak.py` — `assembly speak` synthesizes text to speech over the sandbox streaming-TTS WebSocket (`streaming-tts.sandbox000.…`). **Sandbox-only:** `session.is_available()` is false in production (empty `Environment.streaming_tts_host`), so the command exits 2 with a `--sandbox` hint. `session.synthesize` drives a Begin→Generate→Flush→Audio→Terminate protocol with an injectable `connect` for hermetic tests (mirrors `agent/session.py`); `audio.py` plays the PCM (default) or writes a WAV (`--out`). The single-voice default-playback path **streams**: `synthesize`'s `on_audio(chunk, sample_rate)` callback is wired to `audio.PcmPlayer.feed`, so speech starts on the first Audio frame (it opens the device lazily, since the rate is only known at Begin) instead of after the whole text — the win for a long `--url` page. `--out` (needs the full buffer) and the multi-voice dialogue path (`synthesize_dialogue` → `_output_audio` → buffered `play_pcm`) stay buffered; `synthesize` still returns the complete PCM for the summary regardless.
-- **`code_agent/`** + `commands/code/` — `assembly code`: a terminal coding agent (a bespoke port of langchain-ai/deepagents' `code` agent) that talks **only** to the LLM Gateway. `model.py` pins the model to `ChatOpenAI` against `llm_gateway_base`; `agent.py` builds the deepagents graph over a cwd-scoped `LocalShellBackend` (filesystem + shell tools), plus extra tools: the custom `assembly` CLI tool (`cli_tool.py`, runs `python -m aai_cli` with the key via child env, never argv), a URL `fetch_url` tool (`fetch_tool.py`), Tavily web search when `TAVILY_API_KEY` is set (`web_search.py`), an `ask_user` tool routed through an `AskBridge` to the front-end (`ask_tool.py`), and best-effort docs MCP tools (`docs_mcp.py`). Middleware adds installed skills (`skills.py`) and long-term memory (`memory.py`), each over its own dedicated backend. Sessions persist via a SQLite checkpointer (`store.py`) keyed by `--session`, so conversations resume. Approval gates the mutating tools (write/edit/execute/`assembly`/`fetch_url`); the general-purpose `task` subagent comes from deepagents by default. `session.py` drives the graph turn-by-turn (interrupt/resume = human approval), emitting framework-agnostic `events.py` to either the Textual TUI (`tui.py`, modeled on deepagents-code: transcript + input + approval/ask modals + clipboard copy) or the Rich fallback (`render.py`). The whole orchestration is tested by driving the **real** graph with a fake `BaseChatModel` (`tests/test_code_agent.py`), so no network/TTY is needed. **Voice is the default front-end in an interactive TTY** (`voice.py` + `_exec._run_voice`): `VoiceSession.listen` captures one spoken turn over Streaming STT (gating the mic shut the instant a turn finalizes) and `VoiceSession.speak` reads each assistant reply back over streaming TTS. It runs the **Rich REPL** loop (not the keyboard TUI) with a voice `read_line` + a reply-speaking sink. Readback needs streaming TTS, so it's **sandbox-only** (`tts.session.is_available`); in production the mic input still works and replies stay on screen. A mic-less box degrades to typed input on the first `AUDIO_ERROR_TYPES` `CLIError`; `--no-voice` selects the TUI, and a non-TTY (pipe/CI) the headless loop. Both legs (STT/TTS) are injected like the cascade's, so `tests/test_code_voice.py` drives it with fakes — no mic/speaker/socket.
+- **`code_agent/`** + `commands/code/` — `assembly code`: a terminal coding agent (a bespoke port of langchain-ai/deepagents' `code` agent) that talks **only** to the LLM Gateway. `model.py` pins the model to `ChatOpenAI` against `llm_gateway_base`; `agent.py` builds the deepagents graph over a cwd-scoped `LocalShellBackend` (filesystem + shell tools), plus extra tools: the custom `assembly` CLI tool (`cli_tool.py`, runs `python -m aai_cli` with the key via child env, never argv), a URL `fetch_url` tool (`fetch_tool.py`), Firecrawl web search when `FIRECRAWL_API_KEY` is set (`firecrawl_search.py`, shared with the live voice agent), an `ask_user` tool routed through an `AskBridge` to the front-end (`ask_tool.py`), and best-effort docs MCP tools (`docs_mcp.py`). Middleware adds installed skills (`skills.py`) and long-term memory (`memory.py`), each over its own dedicated backend. Sessions persist via a SQLite checkpointer (`store.py`) keyed by `--session`, so conversations resume. Approval gates the mutating tools (write/edit/execute/`assembly`/`fetch_url`); the general-purpose `task` subagent comes from deepagents by default. `session.py` drives the graph turn-by-turn (interrupt/resume = human approval), emitting framework-agnostic `events.py` to either the Textual TUI (`tui.py`, modeled on deepagents-code: transcript + input + approval/ask modals + clipboard copy) or the Rich fallback (`render.py`). The whole orchestration is tested by driving the **real** graph with a fake `BaseChatModel` (`tests/test_code_agent.py`), so no network/TTY is needed. **Voice is the default front-end in an interactive TTY** (`voice.py` + `_exec._run_voice`): `VoiceSession.listen` captures one spoken turn over Streaming STT (gating the mic shut the instant a turn finalizes) and `VoiceSession.speak` reads each assistant reply back over streaming TTS. It runs the **Rich REPL** loop (not the keyboard TUI) with a voice `read_line` + a reply-speaking sink. Readback needs streaming TTS, so it's **sandbox-only** (`tts.session.is_available`); in production the mic input still works and replies stay on screen. A mic-less box degrades to typed input on the first `AUDIO_ERROR_TYPES` `CLIError`; `--no-voice` selects the TUI, and a non-TTY (pipe/CI) the headless loop. Both legs (STT/TTS) are injected like the cascade's, so `tests/test_code_voice.py` drives it with fakes — no mic/speaker/socket.
 - **`code_gen/`** — backs `--show-code` on `transcribe`/`stream`/`agent`: builds a ready-to-run Python SDK script from exactly the flags passed (no API key needed; generated code reads `ASSEMBLYAI_API_KEY`).
 - **`auth/`** — browser-assisted `assembly login` via AMS + **Stytch B2B OAuth discovery** (`discovery.py`, `flow.py`, `loopback.py`, `ams.py`). Not Stytch Connected Apps.
 - **`init/`** — scaffolds a self-contained FastAPI + HTML starter (`audio-transcription`/`live-captions`/`voice-agent` templates), optionally installs deps and opens the browser; writes the key to a git-ignored `.env`.
diff --git a/aai_cli/code_agent/fetch_tool.py b/aai_cli/code_agent/fetch_tool.py
index 2c538e1..b473897 100644
--- a/aai_cli/code_agent/fetch_tool.py
+++ b/aai_cli/code_agent/fetch_tool.py
@@ -1,6 +1,6 @@
 """A URL-fetch tool for the coding agent (deepagents-code parity).
 
-Distinct from web *search* (Tavily): this fetches a specific URL the agent already
+Distinct from web *search* (Firecrawl): this fetches a specific URL the agent already
 knows and returns its text. It is approval-gated (see ``MUTATING_TOOLS``) because an
 arbitrary fetch can reach internal/SSRF targets, so the user confirms each one.
 """
diff --git a/aai_cli/code_agent/firecrawl_search.py b/aai_cli/code_agent/firecrawl_search.py
index 7a97134..e66be97 100644
--- a/aai_cli/code_agent/firecrawl_search.py
+++ b/aai_cli/code_agent/firecrawl_search.py
@@ -1,12 +1,12 @@
-"""Optional Firecrawl web search for the live voice agent.
+"""Optional Firecrawl web search for the coding and live voice agents.
 
 Firecrawl grounds the agent with live web search, enabled when a ``FIRECRAWL_API_KEY``
 is present in the environment. Search is read-only, so it is *not* gated behind the
 approval flow. With no key set we simply omit the tool (the agent still has its URL
 fetch and the AssemblyAI docs MCP), rather than erroring.
 
-This mirrors ``web_search.py`` (Tavily) but reuses Firecrawl's official LangChain
-integration; the live agent prefers it as its default search tool.
+Both ``assembly code`` (approval-gated, opt-out via ``--no-web``) and the live voice
+agent share this single search tool via Firecrawl's official LangChain integration.
 """
 
 from __future__ import annotations
diff --git a/aai_cli/code_agent/web_search.py b/aai_cli/code_agent/web_search.py
deleted file mode 100644
index 71ed2bf..0000000
--- a/aai_cli/code_agent/web_search.py
+++ /dev/null
@@ -1,37 +0,0 @@
-"""Optional Tavily web search for the coding agent (matching deepagents-code).
-
-dcode grounds the agent with Tavily web search; we offer the same as an opt-in tool,
-enabled when a ``TAVILY_API_KEY`` is present in the environment. Search is read-only,
-so it is *not* gated behind the approval flow. With no key set we simply omit the tool
-(the agent still has the AssemblyAI docs MCP for reference), rather than erroring.
-"""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-
-from aai_cli.core import env
-
-if TYPE_CHECKING:
-    from langchain_core.tools import BaseTool
-
-# Tavily reads this from the environment; we gate on its presence so we never hand the
-# agent a tool that will fail on first use for lack of a key.
-TAVILY_API_KEY_ENV = "TAVILY_API_KEY"
-
-# The name ``TavilySearch`` registers itself under. Callers (e.g. the live agent's prompt
-# builder) detect web-search availability by this name, so a test pins it against the tool.
-WEB_SEARCH_TOOL_NAME = "tavily_search"
-
-# A small result cap keeps search responses inside the model's context budget.
-_DEFAULT_MAX_RESULTS = 5
-
-
-def build_web_search_tool(max_results: int = _DEFAULT_MAX_RESULTS) -> BaseTool | None:
-    """The Tavily web-search tool, or ``None`` when no ``TAVILY_API_KEY`` is set."""
-    if not env.get(TAVILY_API_KEY_ENV):
-        return None
-
-    from langchain_tavily import TavilySearch
-
-    return TavilySearch(max_results=max_results)
diff --git a/aai_cli/commands/code/__init__.py b/aai_cli/commands/code/__init__.py
index 9d71d40..6045e4f 100644
--- a/aai_cli/commands/code/__init__.py
+++ b/aai_cli/commands/code/__init__.py
@@ -58,7 +58,7 @@ def code(
         True, "--skills/--no-skills", help="Load installed agent skills (e.g. the assemblyai skill)"
     ),
     web: bool = typer.Option(
-        True, "--web/--no-web", help="Enable Tavily web search when TAVILY_API_KEY is set"
+        True, "--web/--no-web", help="Enable Firecrawl web search when FIRECRAWL_API_KEY is set"
     ),
     memory: bool = typer.Option(
         True, "--memory/--no-memory", help="Load and persist the agent's long-term memory"
diff --git a/aai_cli/commands/code/_exec.py b/aai_cli/commands/code/_exec.py
index 1628647..2985629 100644
--- a/aai_cli/commands/code/_exec.py
+++ b/aai_cli/commands/code/_exec.py
@@ -25,6 +25,7 @@
 from aai_cli.code_agent.docs_mcp import load_docs_tools
 from aai_cli.code_agent.events import AssistantText, Event
 from aai_cli.code_agent.fetch_tool import build_fetch_tool
+from aai_cli.code_agent.firecrawl_search import FIRECRAWL_API_KEY_ENV, build_web_search_tool
 from aai_cli.code_agent.memory import build_memory_middleware
 from aai_cli.code_agent.model import build_model
 from aai_cli.code_agent.prompt import DEFAULT_MODEL
@@ -38,7 +39,6 @@
     build_voice_session,
     spoken_summary,
 )
-from aai_cli.code_agent.web_search import TAVILY_API_KEY_ENV, build_web_search_tool
 from aai_cli.core import env, errors, stdio
 from aai_cli.ui import output
 
@@ -136,10 +136,11 @@ def _read_line() -> str | None:
 
 
 def _web_note(opts: CodeOptions) -> str | None:
-    """The "web search disabled" notice when --web is on but no Tavily key is set."""
-    if opts.web and not env.get(TAVILY_API_KEY_ENV):
+    """The "web search disabled" notice when --web is on but no Firecrawl key is set."""
+    if opts.web and not env.get(FIRECRAWL_API_KEY_ENV):
         return (
-            "TAVILY_API_KEY is not set, so web search is disabled. Get a key at https://tavily.com"
+            "FIRECRAWL_API_KEY is not set, so web search is disabled. "
+            "Get a key at https://firecrawl.dev"
         )
     return None
 
diff --git a/pyproject.toml b/pyproject.toml
index 04be6b4..6412592 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,7 +80,6 @@ dependencies = [
     "langchain-core>=1.4.7",
     "langchain-mcp-adapters>=0.3.0",
     "textual>=8.2.7",
-    "langchain-tavily>=0.2.18",
     "langgraph-checkpoint-sqlite>=3.1.0",
     "pyperclip>=1.11.0",
     "langchain-text-splitters>=1.0.0",
diff --git a/tests/__snapshots__/test_snapshots_help_run.ambr b/tests/__snapshots__/test_snapshots_help_run.ambr
index e25e9a8..65db349 100644
--- a/tests/__snapshots__/test_snapshots_help_run.ambr
+++ b/tests/__snapshots__/test_snapshots_help_run.ambr
@@ -299,8 +299,8 @@
   │ --skills       --no-skills               Load installed agent skills (e.g.   │
   │                                          the assemblyai skill)               │
   │                                          [default: skills]                   │
-  │ --web          --no-web                  Enable Tavily web search when       │
-  │                                          TAVILY_API_KEY is set               │
+  │ --web          --no-web                  Enable Firecrawl web search when    │
+  │                                          FIRECRAWL_API_KEY is set            │
   │                                          [default: web]                      │
   │ --memory       --no-memory               Load and persist the agent's        │
   │                                          long-term memory                    │
diff --git a/tests/test_code_agent.py b/tests/test_code_agent.py
index 05d9fcb..28a9561 100644
--- a/tests/test_code_agent.py
+++ b/tests/test_code_agent.py
@@ -20,10 +20,10 @@
     docs_mcp,
     events,
     fetch_tool,
+    firecrawl_search,
     memory,
     skills,
     store,
-    web_search,
 )
 from aai_cli.code_agent import model as model_mod
 from aai_cli.code_agent.agent import MUTATING_TOOLS, build_agent
@@ -225,12 +225,12 @@ def test_skills_middleware_present_and_absent(tmp_path: Path) -> None:
 
 
 def test_web_search_tool_gated_on_api_key(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.delenv("TAVILY_API_KEY", raising=False)
-    assert web_search.build_web_search_tool() is None
+    monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
+    assert firecrawl_search.build_web_search_tool() is None
 
-    monkeypatch.setenv("TAVILY_API_KEY", "tvly-key")
-    tool = web_search.build_web_search_tool()
-    assert tool is not None and tool.name == "tavily_search"
+    monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
+    tool = firecrawl_search.build_web_search_tool()
+    assert tool is not None and tool.name == "firecrawl_search"
 
 
 def test_message_events_coerces_list_content() -> None:
diff --git a/tests/test_code_command.py b/tests/test_code_command.py
index b548cd6..044fc87 100644
--- a/tests/test_code_command.py
+++ b/tests/test_code_command.py
@@ -192,10 +192,10 @@ def test_build_agent_wires_model_tools_and_checkpointer(monkeypatch):
 
 
 def test_web_note_only_without_key(monkeypatch):
-    monkeypatch.delenv("TAVILY_API_KEY", raising=False)
+    monkeypatch.delenv("FIRECRAWL_API_KEY", raising=False)
     assert _exec._web_note(_opts(web=True)) is not None
     assert _exec._web_note(_opts(web=False)) is None
-    monkeypatch.setenv("TAVILY_API_KEY", "tvly")
+    monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-x")
     assert _exec._web_note(_opts(web=True)) is None
 
 
diff --git a/uv.lock b/uv.lock
index 64baa45..9d8a95b 100644
--- a/uv.lock
+++ b/uv.lock
@@ -32,7 +32,6 @@ dependencies = [
     { name = "langchain-firecrawl" },
     { name = "langchain-mcp-adapters" },
     { name = "langchain-openai" },
-    { name = "langchain-tavily" },
     { name = "langchain-text-splitters" },
     { name = "langgraph" },
     { name = "langgraph-checkpoint-sqlite" },
@@ -98,7 +97,6 @@ requires-dist = [
     { name = "langchain-firecrawl", specifier = ">=0.1.0" },
     { name = "langchain-mcp-adapters", specifier = ">=0.3.0" },
     { name = "langchain-openai", specifier = ">=1.3.2" },
-    { name = "langchain-tavily", specifier = ">=0.2.18" },
     { name = "langchain-text-splitters", specifier = ">=1.0.0" },
     { name = "langgraph", specifier = ">=1.2.2" },
     { name = "langgraph-checkpoint-sqlite", specifier = ">=3.1.0" },
@@ -1674,21 +1672,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/13/0a/a1bfe72c6ec856e99773bbd96c8086421e554b3693d0142b9ea009c6ac92/langchain_protocol-0.0.17-py3-none-any.whl", hash = "sha256:982a08fe152586ed10d4ff3d538c2e0b5766e5f307cdea325e10be3f2c17cae6", size = 7096, upload-time = "2026-06-12T18:39:50.973Z" },
 ]
 
-[[package]]
-name = "langchain-tavily"
-version = "0.2.18"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "aiohttp" },
-    { name = "langchain" },
-    { name = "langchain-core" },
-    { name = "requests" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d6/6c/b309ef3062b189a82463dc93553804566e71aa393f9ba8954750793c1a6f/langchain_tavily-0.2.18.tar.gz", hash = "sha256:cd7859ae1a6ce79236580ef67072ff5fc43c7ded94e7eac38ff04209ca85a320", size = 25378, upload-time = "2026-04-16T15:23:24.526Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/9c/0c043e4434b1823f0ac194f66036cbb0569275a99dcb890e0891ecd34fb2/langchain_tavily-0.2.18-py3-none-any.whl", hash = "sha256:dccf3ad1c50e2cb2a89bec11727555805c9df8abd42c1f3ad42ccad86e28aa44", size = 30814, upload-time = "2026-04-16T15:23:23.424Z" },
-]
-
 [[package]]
 name = "langchain-text-splitters"
 version = "1.1.2"