From bdee1bb440b1dc93e01cfbac56ca63c5516512c6 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:13:12 -0500 Subject: [PATCH 01/16] agentgrep(refactor[mcp]): Split mcp.py into mcp/ subpackage why: A single 626-line mcp.py is fine for two tools but blocks the planned domain coverage expansion. Per-domain tool modules with a register(mcp) dispatcher mirrors the libtmux-mcp pattern and keeps each module focused. what: - Replace src/agentgrep/mcp.py with the src/agentgrep/mcp/ subpackage. - Models, protocols, instructions, resources, prompts, and tools each get their own module. - tools/__init__.py exposes register_tools(mcp); server.py imports and calls it during build_mcp_server(). - Entry point agentgrep-mcp = agentgrep.mcp:main keeps working via the mcp/__init__.py re-export. --- src/agentgrep/mcp.py | 625 --------------------- src/agentgrep/mcp/__init__.py | 88 +++ src/agentgrep/mcp/_library.py | 165 ++++++ src/agentgrep/mcp/instructions.py | 15 + src/agentgrep/mcp/models.py | 167 ++++++ src/agentgrep/mcp/prompts.py | 54 ++ src/agentgrep/mcp/resources.py | 101 ++++ src/agentgrep/mcp/server.py | 35 ++ src/agentgrep/mcp/tools/__init__.py | 16 + src/agentgrep/mcp/tools/discovery_tools.py | 78 +++ src/agentgrep/mcp/tools/search_tools.py | 111 ++++ 11 files changed, 830 insertions(+), 625 deletions(-) delete mode 100644 src/agentgrep/mcp.py create mode 100644 src/agentgrep/mcp/__init__.py create mode 100644 src/agentgrep/mcp/_library.py create mode 100644 src/agentgrep/mcp/instructions.py create mode 100644 src/agentgrep/mcp/models.py create mode 100644 src/agentgrep/mcp/prompts.py create mode 100644 src/agentgrep/mcp/resources.py create mode 100644 src/agentgrep/mcp/server.py create mode 100644 src/agentgrep/mcp/tools/__init__.py create mode 100644 src/agentgrep/mcp/tools/discovery_tools.py create mode 100644 src/agentgrep/mcp/tools/search_tools.py diff --git a/src/agentgrep/mcp.py b/src/agentgrep/mcp.py deleted file mode 100644 index cfb0685..0000000 --- a/src/agentgrep/mcp.py +++ /dev/null @@ -1,625 +0,0 @@ -#!/usr/bin/env python3 -# /// script -# requires-python = ">=3.14" -# dependencies = ["fastmcp>=3.0.0", "pydantic>=2.11.3"] -# /// -"""FastMCP server exposing ``agentgrep`` search and discovery. - -Examples --------- -Run the MCP server over stdio: - -```console -$ uv run agentgrep-mcp -``` - -Use the FastMCP config: - -```console -$ uv run fastmcp run fastmcp.json -``` -""" - -from __future__ import annotations - -import asyncio -import importlib -import pathlib -import typing as t - -from fastmcp import FastMCP -from pydantic import BaseModel, ConfigDict, Field, TypeAdapter - -AgentName = t.Literal["codex", "claude", "cursor", "gemini"] -AgentSelector = t.Literal["codex", "claude", "cursor", "gemini", "all"] -SearchTypeName = t.Literal["prompts", "history", "all"] - -SERVER_VERSION = "0.1.0" -KNOWN_ADAPTERS: tuple[str, ...] = ( - "codex.history_json.v1", - "codex.sessions_jsonl.v1", - "claude.projects_jsonl.v1", - "cursor.ai_tracking_sqlite.v1", - "cursor.cli_jsonl.v1", - "cursor.state_vscdb_legacy.v1", - "cursor.state_vscdb_modern.v1", - "gemini.tmp_chats_jsonl.v1", - "gemini.tmp_chats_legacy_json.v1", - "gemini.tmp_logs_json.v1", -) -READONLY_TAGS = {"readonly", "agentgrep"} -RESOURCE_ANNOTATIONS = {"readOnlyHint": True, "idempotentHint": True} - - -class SearchRecordLike(t.Protocol): - """Structural type for shared ``agentgrep`` search records.""" - - kind: str - agent: str - store: str - adapter_id: str - path: pathlib.Path - text: str - title: str | None - role: str | None - timestamp: str | None - model: str | None - session_id: str | None - conversation_id: str | None - metadata: dict[str, object] - - -class FindRecordLike(t.Protocol): - """Structural type for shared ``agentgrep`` find records.""" - - kind: str - agent: str - store: str - adapter_id: str - path: pathlib.Path - path_kind: str - metadata: dict[str, object] - - -class SourceHandleLike(t.Protocol): - """Structural type for discovered ``agentgrep`` sources.""" - - agent: str - store: str - adapter_id: str - path: pathlib.Path - path_kind: str - source_kind: str - search_root: pathlib.Path | None - mtime_ns: int - - -class SearchQueryFactory(t.Protocol): - """Factory protocol for ``agentgrep.SearchQuery``.""" - - def __call__( # noqa: D102 - self, - *, - terms: tuple[str, ...], - search_type: str, - any_term: bool, - regex: bool, - case_sensitive: bool, - agents: tuple[str, ...], - limit: int | None, - ) -> object: ... - - -class BackendSelectionLike(t.Protocol): - """Structural type for subprocess backend selection.""" - - find_tool: str | None - grep_tool: str | None - json_tool: str | None - - -class AgentGrepModule(t.Protocol): - """Structural type for the imported ``agentgrep`` module.""" - - SCHEMA_VERSION: str - AGENT_CHOICES: tuple[AgentName, ...] - SearchQuery: SearchQueryFactory - - def parse_agents(self, values: list[str]) -> tuple[str, ...]: ... # noqa: D102 - - def select_backends(self) -> BackendSelectionLike: ... # noqa: D102 - - def discover_sources( # noqa: D102 - self, - home: pathlib.Path, - agents: tuple[str, ...], - backends: BackendSelectionLike, - ) -> list[SourceHandleLike]: ... - - def run_search_query( # noqa: D102 - self, - home: pathlib.Path, - query: object, - *, - backends: BackendSelectionLike | None = None, - ) -> list[SearchRecordLike]: ... - - def run_find_query( # noqa: D102 - self, - home: pathlib.Path, - agents: tuple[str, ...], - *, - pattern: str | None, - limit: int | None, - backends: BackendSelectionLike | None = None, - ) -> list[FindRecordLike]: ... - - def serialize_search_record( # noqa: D102 - self, - record: SearchRecordLike, - ) -> dict[str, object]: ... - - def serialize_find_record( # noqa: D102 - self, - record: FindRecordLike, - ) -> dict[str, object]: ... - - def serialize_source_handle( # noqa: D102 - self, - source: SourceHandleLike, - ) -> dict[str, object]: ... - - -agentgrep = t.cast( - "AgentGrepModule", - t.cast("object", importlib.import_module("agentgrep")), -) - - -class AgentGrepModel(BaseModel): - """Base model for MCP payloads.""" - - model_config: t.ClassVar[ConfigDict] = ConfigDict(extra="forbid") - - -class SearchRecordModel(AgentGrepModel): - """Normalized search result payload.""" - - schema_version: str = agentgrep.SCHEMA_VERSION - kind: t.Literal["prompt", "history"] - agent: t.Literal["codex", "claude", "cursor", "gemini"] - store: str - adapter_id: str - path: str - text: str - title: str | None = None - role: str | None = None - timestamp: str | None = None - model: str | None = None - session_id: str | None = None - conversation_id: str | None = None - metadata: dict[str, t.Any] = Field(default_factory=dict) - - @classmethod - def from_record(cls, record: SearchRecordLike) -> SearchRecordModel: - """Build a typed result from an ``agentgrep`` search record.""" - return cls.model_validate(agentgrep.serialize_search_record(record)) - - -class FindRecordModel(AgentGrepModel): - """Normalized find result payload.""" - - schema_version: str = agentgrep.SCHEMA_VERSION - kind: t.Literal["find"] - agent: t.Literal["codex", "claude", "cursor", "gemini"] - store: str - adapter_id: str - path: str - path_kind: t.Literal["history_file", "session_file", "sqlite_db"] - metadata: dict[str, t.Any] = Field(default_factory=dict) - - @classmethod - def from_record(cls, record: FindRecordLike) -> FindRecordModel: - """Build a typed result from an ``agentgrep`` find record.""" - return cls.model_validate(agentgrep.serialize_find_record(record)) - - -class SourceRecordModel(AgentGrepModel): - """Discovered source summary payload.""" - - schema_version: str = agentgrep.SCHEMA_VERSION - agent: t.Literal["codex", "claude", "cursor", "gemini"] - store: str - adapter_id: str - path: str - path_kind: t.Literal["history_file", "session_file", "sqlite_db"] - source_kind: t.Literal["json", "jsonl", "sqlite"] - search_root: str | None = None - mtime_ns: int - - @classmethod - def from_source(cls, source: SourceHandleLike) -> SourceRecordModel: - """Build a typed result from a discovered source.""" - return cls.model_validate(agentgrep.serialize_source_handle(source)) - - -class SearchToolQuery(AgentGrepModel): - """Echo of normalized search tool inputs.""" - - terms: list[str] - agent: AgentSelector - search_type: SearchTypeName - any_term: bool - regex: bool - case_sensitive: bool - limit: int | None = None - - -class SearchToolResponse(AgentGrepModel): - """Structured response for the MCP search tool.""" - - schema_version: str = agentgrep.SCHEMA_VERSION - query: SearchToolQuery - results: list[SearchRecordModel] - - -class FindToolQuery(AgentGrepModel): - """Echo of normalized find tool inputs.""" - - pattern: str | None = None - agent: AgentSelector - limit: int | None = None - - -class FindToolResponse(AgentGrepModel): - """Structured response for the MCP find tool.""" - - schema_version: str = agentgrep.SCHEMA_VERSION - query: FindToolQuery - results: list[FindRecordModel] - - -class BackendAvailabilityModel(AgentGrepModel): - """Selected read-only subprocess backends.""" - - find_tool: str | None = None - grep_tool: str | None = None - json_tool: str | None = None - - -class CapabilitiesModel(AgentGrepModel): - """Static MCP capability summary.""" - - schema_version: str = agentgrep.SCHEMA_VERSION - name: str = "agentgrep" - version: str = SERVER_VERSION - read_only: bool = True - agents: list[t.Literal["codex", "claude", "cursor", "gemini"]] - search_types: list[SearchTypeName] - adapters: list[str] - tools: list[str] - resources: list[str] - prompts: list[str] - backends: BackendAvailabilityModel - - -SourceListAdapter = TypeAdapter(list[SourceRecordModel]) - - -def normalize_agent_selection(agent: AgentSelector) -> tuple[str, ...]: - """Convert a single MCP agent selector into ``agentgrep`` agents.""" - values: list[str] = [] if agent == "all" else [agent] - return agentgrep.parse_agents(values) - - -def list_source_models(agent: AgentSelector = "all") -> list[SourceRecordModel]: - """Return discovered sources as typed MCP payloads.""" - backends = agentgrep.select_backends() - sources = agentgrep.discover_sources( - pathlib.Path.home(), - normalize_agent_selection(agent), - backends, - ) - return [SourceRecordModel.from_source(source) for source in sources] - - -def build_capabilities() -> CapabilitiesModel: - """Build a typed capability summary.""" - backends = agentgrep.select_backends() - return CapabilitiesModel( - agents=list(agentgrep.AGENT_CHOICES), - search_types=["prompts", "history", "all"], - adapters=list(KNOWN_ADAPTERS), - tools=["search", "find"], - resources=[ - "agentgrep://capabilities", - "agentgrep://sources", - "agentgrep://sources/{agent}", - ], - prompts=["search_prompts", "search_history", "inspect_stores"], - backends=BackendAvailabilityModel( - find_tool=backends.find_tool, - grep_tool=backends.grep_tool, - json_tool=backends.json_tool, - ), - ) - - -def _build_instructions() -> str: - """Return server instructions for MCP clients.""" - return ( - "agentgrep is a read-only MCP server for local AI agent history search. " - "Use `search` to retrieve full prompt/history matches and `find` to inspect " - "discovered stores and session files. Search results are newest-first and " - "duplicate prompts within the same session are collapsed. " - "This server never mutates agent stores, never opens SQLite in write mode, " - "and never executes arbitrary shell commands." - ) - - -class SearchRequestModel(AgentGrepModel): - """Validated search request payload.""" - - terms: list[str] - agent: AgentSelector - search_type: SearchTypeName - any_term: bool - regex: bool - case_sensitive: bool - limit: int | None = None - - -class FindRequestModel(AgentGrepModel): - """Validated find request payload.""" - - pattern: str | None = None - agent: AgentSelector - limit: int | None = None - - -def _search_sync(request: SearchRequestModel) -> SearchToolResponse: - """Run the blocking search work and build a typed response.""" - query = agentgrep.SearchQuery( - terms=tuple(request.terms), - search_type=request.search_type, - any_term=request.any_term, - regex=request.regex, - case_sensitive=request.case_sensitive, - agents=normalize_agent_selection(request.agent), - limit=request.limit, - ) - records = agentgrep.run_search_query(pathlib.Path.home(), query) - return SearchToolResponse( - query=SearchToolQuery( - terms=request.terms, - agent=request.agent, - search_type=request.search_type, - any_term=request.any_term, - regex=request.regex, - case_sensitive=request.case_sensitive, - limit=request.limit, - ), - results=[SearchRecordModel.from_record(record) for record in records], - ) - - -def _find_sync(request: FindRequestModel) -> FindToolResponse: - """Run the blocking find work and build a typed response.""" - records = agentgrep.run_find_query( - pathlib.Path.home(), - normalize_agent_selection(request.agent), - pattern=request.pattern, - limit=request.limit, - ) - return FindToolResponse( - query=FindToolQuery( - pattern=request.pattern, - agent=request.agent, - limit=request.limit, - ), - results=[FindRecordModel.from_record(record) for record in records], - ) - - -def _register_tools(mcp: FastMCP) -> None: - """Register tool handlers on the server.""" - - @mcp.tool( - name="search", - tags=READONLY_TAGS | {"search"}, - description="Search normalized prompts or history across local agent stores.", - ) - async def search_tool( - terms: t.Annotated[ - list[str], - Field( - min_length=1, - description="One or more literal or regex search terms.", - ), - ], - agent: t.Annotated[ - AgentSelector, - Field(description="Limit search to one agent or search all agents."), - ] = "all", - search_type: t.Annotated[ - SearchTypeName, - Field(description="Search prompts, history, or both."), - ] = "prompts", - any_term: t.Annotated[ - bool, - Field(description="Match any term instead of requiring all terms."), - ] = False, - regex: t.Annotated[ - bool, - Field(description="Treat search terms as regular expressions."), - ] = False, - case_sensitive: t.Annotated[ - bool, - Field(description="Perform case-sensitive matching."), - ] = False, - limit: t.Annotated[ - int | None, - Field( - default=20, - ge=1, - description="Maximum number of search results to return.", - ), - ] = 20, - ) -> SearchToolResponse: - request = SearchRequestModel( - terms=terms, - agent=agent, - search_type=search_type, - any_term=any_term, - regex=regex, - case_sensitive=case_sensitive, - limit=limit, - ) - return await asyncio.to_thread(_search_sync, request) - - _ = search_tool - - @mcp.tool( - name="find", - tags=READONLY_TAGS | {"discovery"}, - description="Find known agent stores, session files, and SQLite databases.", - ) - async def find_tool( - pattern: t.Annotated[ - str | None, - Field( - default=None, - description="Optional substring filter against discovered paths and adapters.", - ), - ] = None, - agent: t.Annotated[ - AgentSelector, - Field(description="Limit discovery to one agent or search all agents."), - ] = "all", - limit: t.Annotated[ - int | None, - Field( - default=50, - ge=1, - description="Maximum number of discovered sources to return.", - ), - ] = 50, - ) -> FindToolResponse: - request = FindRequestModel(pattern=pattern, agent=agent, limit=limit) - return await asyncio.to_thread(_find_sync, request) - - _ = find_tool - - -def _register_resources(mcp: FastMCP) -> None: - """Register static and templated resources.""" - - @mcp.resource( - "agentgrep://capabilities", - name="agentgrep_capabilities", - description="Read-only capability summary for the agentgrep MCP server.", - mime_type="application/json", - tags=READONLY_TAGS | {"capabilities"}, - annotations=RESOURCE_ANNOTATIONS, - ) - def capabilities_resource() -> str: - return build_capabilities().model_dump_json(indent=2) - - _ = capabilities_resource - - @mcp.resource( - "agentgrep://sources", - name="agentgrep_sources", - description="All discovered read-only agent stores known to agentgrep.", - mime_type="application/json", - tags=READONLY_TAGS | {"discovery"}, - annotations=RESOURCE_ANNOTATIONS, - ) - def sources_resource() -> str: - return SourceListAdapter.dump_json(list_source_models()).decode("utf-8") - - _ = sources_resource - - @mcp.resource( - "agentgrep://sources/{agent}", - name="agentgrep_sources_by_agent", - description="Discovered sources filtered to one agent.", - mime_type="application/json", - tags=READONLY_TAGS | {"discovery"}, - annotations=RESOURCE_ANNOTATIONS, - ) - def sources_by_agent_resource(agent: str) -> str: - selected_agent = t.cast("AgentSelector", agent) - return SourceListAdapter.dump_json(list_source_models(selected_agent)).decode("utf-8") - - _ = sources_by_agent_resource - - -def _register_prompts(mcp: FastMCP) -> None: - """Register prompt templates that guide MCP clients.""" - - @mcp.prompt( - name="search_prompts", - description="Guide the client to search for matching user prompts.", - tags={"search", "prompts", "readonly"}, - ) - def search_prompts_prompt(topic: str, agent: str = "all") -> str: - return ( - "Use the `search` tool to find full user prompts about " - f"{topic!r}. Search `prompts` only, keep newest-first ordering, " - f"and limit the search to agent={agent!r} if requested." - ) - - _ = search_prompts_prompt - - @mcp.prompt( - name="search_history", - description="Guide the client to search assistant or command history records.", - tags={"search", "history", "readonly"}, - ) - def search_history_prompt(topic: str, agent: str = "all") -> str: - return ( - "Use the `search` tool to find matching history records about " - f"{topic!r}. Search `history` only, and restrict to " - f"agent={agent!r} when appropriate." - ) - - _ = search_history_prompt - - @mcp.prompt( - name="inspect_stores", - description="Guide the client to inspect discovered agent stores and session files.", - tags={"discovery", "readonly"}, - ) - def inspect_stores_prompt(agent: str = "all", pattern: str = "") -> str: - return ( - "Use the `find` tool to inspect discovered stores, session files, and " - f"SQLite databases for agent={agent!r}. " - f"Apply the pattern {pattern!r} when it is non-empty." - ) - - _ = inspect_stores_prompt - - -def build_mcp_server() -> FastMCP: - """Build and return the FastMCP server instance.""" - mcp = FastMCP( - name="agentgrep", - version=SERVER_VERSION, - instructions=_build_instructions(), - on_duplicate="error", - ) - _register_tools(mcp) - _register_resources(mcp) - _register_prompts(mcp) - return mcp - - -def main() -> int: - """Run the MCP server over stdio.""" - build_mcp_server().run() - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/src/agentgrep/mcp/__init__.py b/src/agentgrep/mcp/__init__.py new file mode 100644 index 0000000..294e14d --- /dev/null +++ b/src/agentgrep/mcp/__init__.py @@ -0,0 +1,88 @@ +"""FastMCP server exposing ``agentgrep`` search and discovery. + +Examples +-------- +Run the MCP server over stdio: + +```console +$ uv run agentgrep-mcp +``` + +Use the FastMCP config: + +```console +$ uv run fastmcp run fastmcp.json +``` +""" + +from __future__ import annotations + +from agentgrep.mcp._library import ( + KNOWN_ADAPTERS, + READONLY_TAGS, + RESOURCE_ANNOTATIONS, + SERVER_VERSION, + AgentGrepModule, + AgentName, + AgentSelector, + BackendSelectionLike, + FindRecordLike, + SearchQueryFactory, + SearchRecordLike, + SearchTypeName, + SourceHandleLike, + agentgrep, + normalize_agent_selection, +) +from agentgrep.mcp.models import ( + AgentGrepModel, + BackendAvailabilityModel, + CapabilitiesModel, + FindRecordModel, + FindRequestModel, + FindToolQuery, + FindToolResponse, + SearchRecordModel, + SearchRequestModel, + SearchToolQuery, + SearchToolResponse, + SourceListAdapter, + SourceRecordModel, +) +from agentgrep.mcp.resources import build_capabilities, list_source_models +from agentgrep.mcp.server import build_mcp_server, main + +__all__ = ( + "KNOWN_ADAPTERS", + "READONLY_TAGS", + "RESOURCE_ANNOTATIONS", + "SERVER_VERSION", + "AgentGrepModel", + "AgentGrepModule", + "AgentName", + "AgentSelector", + "BackendAvailabilityModel", + "BackendSelectionLike", + "CapabilitiesModel", + "FindRecordLike", + "FindRecordModel", + "FindRequestModel", + "FindToolQuery", + "FindToolResponse", + "SearchQueryFactory", + "SearchRecordLike", + "SearchRecordModel", + "SearchRequestModel", + "SearchToolQuery", + "SearchToolResponse", + "SearchTypeName", + "SourceHandleLike", + "SourceListAdapter", + "SourceRecordModel", + "agentgrep", + "build_capabilities", + "build_mcp_server", + "list_source_models", + "main", + "normalize_agent_selection", +) diff --git a/src/agentgrep/mcp/_library.py b/src/agentgrep/mcp/_library.py new file mode 100644 index 0000000..4f01085 --- /dev/null +++ b/src/agentgrep/mcp/_library.py @@ -0,0 +1,165 @@ +"""Library facade for the ``agentgrep`` MCP server. + +Holds the protocol-typed view of the parent :mod:`agentgrep` package along +with the shared constants and type aliases that the rest of the +``agentgrep.mcp`` subpackage consumes. The dynamic import here breaks a +circular import that would otherwise arise from ``agentgrep.__init__`` +trying to load its own MCP subpackage during library setup. +""" + +from __future__ import annotations + +import importlib +import pathlib +import typing as t + +AgentName = t.Literal["codex", "claude", "cursor", "gemini"] +AgentSelector = t.Literal["codex", "claude", "cursor", "gemini", "all"] +SearchTypeName = t.Literal["prompts", "history", "all"] + +SERVER_VERSION = "0.1.0" +KNOWN_ADAPTERS: tuple[str, ...] = ( + "codex.history_json.v1", + "codex.sessions_jsonl.v1", + "claude.projects_jsonl.v1", + "cursor.ai_tracking_sqlite.v1", + "cursor.cli_jsonl.v1", + "cursor.state_vscdb_legacy.v1", + "cursor.state_vscdb_modern.v1", + "gemini.tmp_chats_jsonl.v1", + "gemini.tmp_chats_legacy_json.v1", + "gemini.tmp_logs_json.v1", +) +READONLY_TAGS = {"readonly", "agentgrep"} +RESOURCE_ANNOTATIONS = {"readOnlyHint": True, "idempotentHint": True} + + +class SearchRecordLike(t.Protocol): + """Structural type for shared ``agentgrep`` search records.""" + + kind: str + agent: str + store: str + adapter_id: str + path: pathlib.Path + text: str + title: str | None + role: str | None + timestamp: str | None + model: str | None + session_id: str | None + conversation_id: str | None + metadata: dict[str, object] + + +class FindRecordLike(t.Protocol): + """Structural type for shared ``agentgrep`` find records.""" + + kind: str + agent: str + store: str + adapter_id: str + path: pathlib.Path + path_kind: str + metadata: dict[str, object] + + +class SourceHandleLike(t.Protocol): + """Structural type for discovered ``agentgrep`` sources.""" + + agent: str + store: str + adapter_id: str + path: pathlib.Path + path_kind: str + source_kind: str + search_root: pathlib.Path | None + mtime_ns: int + + +class SearchQueryFactory(t.Protocol): + """Factory protocol for ``agentgrep.SearchQuery``.""" + + def __call__( + self, + *, + terms: tuple[str, ...], + search_type: str, + any_term: bool, + regex: bool, + case_sensitive: bool, + agents: tuple[str, ...], + limit: int | None, + ) -> object: ... + + +class BackendSelectionLike(t.Protocol): + """Structural type for subprocess backend selection.""" + + find_tool: str | None + grep_tool: str | None + json_tool: str | None + + +class AgentGrepModule(t.Protocol): + """Structural type for the imported ``agentgrep`` module.""" + + SCHEMA_VERSION: str + AGENT_CHOICES: tuple[AgentName, ...] + SearchQuery: SearchQueryFactory + + def parse_agents(self, values: list[str]) -> tuple[str, ...]: ... + + def select_backends(self) -> BackendSelectionLike: ... + + def discover_sources( + self, + home: pathlib.Path, + agents: tuple[str, ...], + backends: BackendSelectionLike, + ) -> list[SourceHandleLike]: ... + + def run_search_query( + self, + home: pathlib.Path, + query: object, + *, + backends: BackendSelectionLike | None = None, + ) -> list[SearchRecordLike]: ... + + def run_find_query( + self, + home: pathlib.Path, + agents: tuple[str, ...], + *, + pattern: str | None, + limit: int | None, + backends: BackendSelectionLike | None = None, + ) -> list[FindRecordLike]: ... + + def serialize_search_record( + self, + record: SearchRecordLike, + ) -> dict[str, object]: ... + + def serialize_find_record( + self, + record: FindRecordLike, + ) -> dict[str, object]: ... + + def serialize_source_handle( + self, + source: SourceHandleLike, + ) -> dict[str, object]: ... + + +agentgrep = t.cast( + "AgentGrepModule", + t.cast("object", importlib.import_module("agentgrep")), +) + + +def normalize_agent_selection(agent: AgentSelector) -> tuple[str, ...]: + """Convert a single MCP agent selector into ``agentgrep`` agents.""" + values: list[str] = [] if agent == "all" else [agent] + return agentgrep.parse_agents(values) diff --git a/src/agentgrep/mcp/instructions.py b/src/agentgrep/mcp/instructions.py new file mode 100644 index 0000000..a7666fc --- /dev/null +++ b/src/agentgrep/mcp/instructions.py @@ -0,0 +1,15 @@ +"""Server instructions shown to MCP clients on handshake.""" + +from __future__ import annotations + + +def _build_instructions() -> str: + """Return server instructions for MCP clients.""" + return ( + "agentgrep is a read-only MCP server for local AI agent history search. " + "Use `search` to retrieve full prompt/history matches and `find` to inspect " + "discovered stores and session files. Search results are newest-first and " + "duplicate prompts within the same session are collapsed. " + "This server never mutates agent stores, never opens SQLite in write mode, " + "and never executes arbitrary shell commands." + ) diff --git a/src/agentgrep/mcp/models.py b/src/agentgrep/mcp/models.py new file mode 100644 index 0000000..caf3b9b --- /dev/null +++ b/src/agentgrep/mcp/models.py @@ -0,0 +1,167 @@ +"""Pydantic models for ``agentgrep`` MCP tool inputs and outputs.""" + +from __future__ import annotations + +import typing as t + +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter + +from agentgrep.mcp._library import ( + SERVER_VERSION, + AgentSelector, + FindRecordLike, + SearchRecordLike, + SearchTypeName, + SourceHandleLike, + agentgrep, +) + + +class AgentGrepModel(BaseModel): + """Base model for MCP payloads.""" + + model_config: t.ClassVar[ConfigDict] = ConfigDict(extra="forbid") + + +class SearchRecordModel(AgentGrepModel): + """Normalized search result payload.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + kind: t.Literal["prompt", "history"] + agent: t.Literal["codex", "claude", "cursor", "gemini"] + store: str + adapter_id: str + path: str + text: str + title: str | None = None + role: str | None = None + timestamp: str | None = None + model: str | None = None + session_id: str | None = None + conversation_id: str | None = None + metadata: dict[str, t.Any] = Field(default_factory=dict) + + @classmethod + def from_record(cls, record: SearchRecordLike) -> SearchRecordModel: + """Build a typed result from an ``agentgrep`` search record.""" + return cls.model_validate(agentgrep.serialize_search_record(record)) + + +class FindRecordModel(AgentGrepModel): + """Normalized find result payload.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + kind: t.Literal["find"] + agent: t.Literal["codex", "claude", "cursor", "gemini"] + store: str + adapter_id: str + path: str + path_kind: t.Literal["history_file", "session_file", "sqlite_db"] + metadata: dict[str, t.Any] = Field(default_factory=dict) + + @classmethod + def from_record(cls, record: FindRecordLike) -> FindRecordModel: + """Build a typed result from an ``agentgrep`` find record.""" + return cls.model_validate(agentgrep.serialize_find_record(record)) + + +class SourceRecordModel(AgentGrepModel): + """Discovered source summary payload.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + agent: t.Literal["codex", "claude", "cursor", "gemini"] + store: str + adapter_id: str + path: str + path_kind: t.Literal["history_file", "session_file", "sqlite_db"] + source_kind: t.Literal["json", "jsonl", "sqlite"] + search_root: str | None = None + mtime_ns: int + + @classmethod + def from_source(cls, source: SourceHandleLike) -> SourceRecordModel: + """Build a typed result from a discovered source.""" + return cls.model_validate(agentgrep.serialize_source_handle(source)) + + +class SearchToolQuery(AgentGrepModel): + """Echo of normalized search tool inputs.""" + + terms: list[str] + agent: AgentSelector + search_type: SearchTypeName + any_term: bool + regex: bool + case_sensitive: bool + limit: int | None = None + + +class SearchToolResponse(AgentGrepModel): + """Structured response for the MCP search tool.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + query: SearchToolQuery + results: list[SearchRecordModel] + + +class FindToolQuery(AgentGrepModel): + """Echo of normalized find tool inputs.""" + + pattern: str | None = None + agent: AgentSelector + limit: int | None = None + + +class FindToolResponse(AgentGrepModel): + """Structured response for the MCP find tool.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + query: FindToolQuery + results: list[FindRecordModel] + + +class BackendAvailabilityModel(AgentGrepModel): + """Selected read-only subprocess backends.""" + + find_tool: str | None = None + grep_tool: str | None = None + json_tool: str | None = None + + +class CapabilitiesModel(AgentGrepModel): + """Static MCP capability summary.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + name: str = "agentgrep" + version: str = SERVER_VERSION + read_only: bool = True + agents: list[t.Literal["codex", "claude", "cursor", "gemini"]] + search_types: list[SearchTypeName] + adapters: list[str] + tools: list[str] + resources: list[str] + prompts: list[str] + backends: BackendAvailabilityModel + + +SourceListAdapter = TypeAdapter(list[SourceRecordModel]) + + +class SearchRequestModel(AgentGrepModel): + """Validated search request payload.""" + + terms: list[str] + agent: AgentSelector + search_type: SearchTypeName + any_term: bool + regex: bool + case_sensitive: bool + limit: int | None = None + + +class FindRequestModel(AgentGrepModel): + """Validated find request payload.""" + + pattern: str | None = None + agent: AgentSelector + limit: int | None = None diff --git a/src/agentgrep/mcp/prompts.py b/src/agentgrep/mcp/prompts.py new file mode 100644 index 0000000..b58fdd7 --- /dev/null +++ b/src/agentgrep/mcp/prompts.py @@ -0,0 +1,54 @@ +"""MCP prompt templates that guide clients.""" + +from __future__ import annotations + +import typing as t + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def register_prompts(mcp: FastMCP) -> None: + """Register every ``agentgrep`` prompt on ``mcp``.""" + + @mcp.prompt( + name="search_prompts", + description="Guide the client to search for matching user prompts.", + tags={"search", "prompts", "readonly"}, + ) + def search_prompts_prompt(topic: str, agent: str = "all") -> str: + return ( + "Use the `search` tool to find full user prompts about " + f"{topic!r}. Search `prompts` only, keep newest-first ordering, " + f"and limit the search to agent={agent!r} if requested." + ) + + _ = search_prompts_prompt + + @mcp.prompt( + name="search_history", + description="Guide the client to search assistant or command history records.", + tags={"search", "history", "readonly"}, + ) + def search_history_prompt(topic: str, agent: str = "all") -> str: + return ( + "Use the `search` tool to find matching history records about " + f"{topic!r}. Search `history` only, and restrict to " + f"agent={agent!r} when appropriate." + ) + + _ = search_history_prompt + + @mcp.prompt( + name="inspect_stores", + description="Guide the client to inspect discovered agent stores and session files.", + tags={"discovery", "readonly"}, + ) + def inspect_stores_prompt(agent: str = "all", pattern: str = "") -> str: + return ( + "Use the `find` tool to inspect discovered stores, session files, and " + f"SQLite databases for agent={agent!r}. " + f"Apply the pattern {pattern!r} when it is non-empty." + ) + + _ = inspect_stores_prompt diff --git a/src/agentgrep/mcp/resources.py b/src/agentgrep/mcp/resources.py new file mode 100644 index 0000000..7af20ca --- /dev/null +++ b/src/agentgrep/mcp/resources.py @@ -0,0 +1,101 @@ +"""Static and templated MCP resources for ``agentgrep``.""" + +from __future__ import annotations + +import pathlib +import typing as t + +from agentgrep.mcp._library import ( + KNOWN_ADAPTERS, + READONLY_TAGS, + RESOURCE_ANNOTATIONS, + AgentSelector, + agentgrep, + normalize_agent_selection, +) +from agentgrep.mcp.models import ( + BackendAvailabilityModel, + CapabilitiesModel, + SourceListAdapter, + SourceRecordModel, +) + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def list_source_models(agent: AgentSelector = "all") -> list[SourceRecordModel]: + """Return discovered sources as typed MCP payloads.""" + backends = agentgrep.select_backends() + sources = agentgrep.discover_sources( + pathlib.Path.home(), + normalize_agent_selection(agent), + backends, + ) + return [SourceRecordModel.from_source(source) for source in sources] + + +def build_capabilities() -> CapabilitiesModel: + """Build a typed capability summary.""" + backends = agentgrep.select_backends() + return CapabilitiesModel( + agents=list(agentgrep.AGENT_CHOICES), + search_types=["prompts", "history", "all"], + adapters=list(KNOWN_ADAPTERS), + tools=["search", "find"], + resources=[ + "agentgrep://capabilities", + "agentgrep://sources", + "agentgrep://sources/{agent}", + ], + prompts=["search_prompts", "search_history", "inspect_stores"], + backends=BackendAvailabilityModel( + find_tool=backends.find_tool, + grep_tool=backends.grep_tool, + json_tool=backends.json_tool, + ), + ) + + +def register_resources(mcp: FastMCP) -> None: + """Register every ``agentgrep`` resource on ``mcp``.""" + + @mcp.resource( + "agentgrep://capabilities", + name="agentgrep_capabilities", + description="Read-only capability summary for the agentgrep MCP server.", + mime_type="application/json", + tags=READONLY_TAGS | {"capabilities"}, + annotations=RESOURCE_ANNOTATIONS, + ) + def capabilities_resource() -> str: + return build_capabilities().model_dump_json(indent=2) + + _ = capabilities_resource + + @mcp.resource( + "agentgrep://sources", + name="agentgrep_sources", + description="All discovered read-only agent stores known to agentgrep.", + mime_type="application/json", + tags=READONLY_TAGS | {"discovery"}, + annotations=RESOURCE_ANNOTATIONS, + ) + def sources_resource() -> str: + return SourceListAdapter.dump_json(list_source_models()).decode("utf-8") + + _ = sources_resource + + @mcp.resource( + "agentgrep://sources/{agent}", + name="agentgrep_sources_by_agent", + description="Discovered sources filtered to one agent.", + mime_type="application/json", + tags=READONLY_TAGS | {"discovery"}, + annotations=RESOURCE_ANNOTATIONS, + ) + def sources_by_agent_resource(agent: str) -> str: + selected_agent = t.cast("AgentSelector", agent) + return SourceListAdapter.dump_json(list_source_models(selected_agent)).decode("utf-8") + + _ = sources_by_agent_resource diff --git a/src/agentgrep/mcp/server.py b/src/agentgrep/mcp/server.py new file mode 100644 index 0000000..2a15535 --- /dev/null +++ b/src/agentgrep/mcp/server.py @@ -0,0 +1,35 @@ +"""FastMCP server assembly and stdio entry point.""" + +from __future__ import annotations + +from fastmcp import FastMCP + +from agentgrep.mcp._library import SERVER_VERSION +from agentgrep.mcp.instructions import _build_instructions +from agentgrep.mcp.prompts import register_prompts +from agentgrep.mcp.resources import register_resources +from agentgrep.mcp.tools import register_tools + + +def build_mcp_server() -> FastMCP: + """Build and return the FastMCP server instance.""" + mcp = FastMCP( + name="agentgrep", + version=SERVER_VERSION, + instructions=_build_instructions(), + on_duplicate="error", + ) + register_tools(mcp) + register_resources(mcp) + register_prompts(mcp) + return mcp + + +def main() -> int: + """Run the MCP server over stdio.""" + build_mcp_server().run() + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/agentgrep/mcp/tools/__init__.py b/src/agentgrep/mcp/tools/__init__.py new file mode 100644 index 0000000..7271c0a --- /dev/null +++ b/src/agentgrep/mcp/tools/__init__.py @@ -0,0 +1,16 @@ +"""MCP tool registration dispatcher for ``agentgrep``.""" + +from __future__ import annotations + +import typing as t + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def register_tools(mcp: FastMCP) -> None: + """Register every ``agentgrep`` MCP tool on ``mcp``.""" + from agentgrep.mcp.tools import discovery_tools, search_tools + + search_tools.register(mcp) + discovery_tools.register(mcp) diff --git a/src/agentgrep/mcp/tools/discovery_tools.py b/src/agentgrep/mcp/tools/discovery_tools.py new file mode 100644 index 0000000..51c57ab --- /dev/null +++ b/src/agentgrep/mcp/tools/discovery_tools.py @@ -0,0 +1,78 @@ +"""Discovery-domain MCP tools.""" + +from __future__ import annotations + +import asyncio +import pathlib +import typing as t + +from pydantic import Field + +from agentgrep.mcp._library import ( + READONLY_TAGS, + AgentSelector, + agentgrep, + normalize_agent_selection, +) +from agentgrep.mcp.models import ( + FindRecordModel, + FindRequestModel, + FindToolQuery, + FindToolResponse, +) + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def _find_sync(request: FindRequestModel) -> FindToolResponse: + """Run the blocking find work and build a typed response.""" + records = agentgrep.run_find_query( + pathlib.Path.home(), + normalize_agent_selection(request.agent), + pattern=request.pattern, + limit=request.limit, + ) + return FindToolResponse( + query=FindToolQuery( + pattern=request.pattern, + agent=request.agent, + limit=request.limit, + ), + results=[FindRecordModel.from_record(record) for record in records], + ) + + +def register(mcp: FastMCP) -> None: + """Register discovery-domain tools.""" + + @mcp.tool( + name="find", + tags=READONLY_TAGS | {"discovery"}, + description="Find known agent stores, session files, and SQLite databases.", + ) + async def find_tool( + pattern: t.Annotated[ + str | None, + Field( + default=None, + description="Optional substring filter against discovered paths and adapters.", + ), + ] = None, + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or search all agents."), + ] = "all", + limit: t.Annotated[ + int | None, + Field( + default=50, + ge=1, + description="Maximum number of discovered sources to return.", + ), + ] = 50, + ) -> FindToolResponse: + request = FindRequestModel(pattern=pattern, agent=agent, limit=limit) + return await asyncio.to_thread(_find_sync, request) + + _ = find_tool diff --git a/src/agentgrep/mcp/tools/search_tools.py b/src/agentgrep/mcp/tools/search_tools.py new file mode 100644 index 0000000..00fe2fc --- /dev/null +++ b/src/agentgrep/mcp/tools/search_tools.py @@ -0,0 +1,111 @@ +"""Search-domain MCP tools.""" + +from __future__ import annotations + +import asyncio +import pathlib +import typing as t + +from pydantic import Field + +from agentgrep.mcp._library import ( + READONLY_TAGS, + AgentSelector, + SearchTypeName, + agentgrep, + normalize_agent_selection, +) +from agentgrep.mcp.models import ( + SearchRecordModel, + SearchRequestModel, + SearchToolQuery, + SearchToolResponse, +) + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def _search_sync(request: SearchRequestModel) -> SearchToolResponse: + """Run the blocking search work and build a typed response.""" + query = agentgrep.SearchQuery( + terms=tuple(request.terms), + search_type=request.search_type, + any_term=request.any_term, + regex=request.regex, + case_sensitive=request.case_sensitive, + agents=normalize_agent_selection(request.agent), + limit=request.limit, + ) + records = agentgrep.run_search_query(pathlib.Path.home(), query) + return SearchToolResponse( + query=SearchToolQuery( + terms=request.terms, + agent=request.agent, + search_type=request.search_type, + any_term=request.any_term, + regex=request.regex, + case_sensitive=request.case_sensitive, + limit=request.limit, + ), + results=[SearchRecordModel.from_record(record) for record in records], + ) + + +def register(mcp: FastMCP) -> None: + """Register search-domain tools.""" + + @mcp.tool( + name="search", + tags=READONLY_TAGS | {"search"}, + description="Search normalized prompts or history across local agent stores.", + ) + async def search_tool( + terms: t.Annotated[ + list[str], + Field( + min_length=1, + description="One or more literal or regex search terms.", + ), + ], + agent: t.Annotated[ + AgentSelector, + Field(description="Limit search to one agent or search all agents."), + ] = "all", + search_type: t.Annotated[ + SearchTypeName, + Field(description="Search prompts, history, or both."), + ] = "prompts", + any_term: t.Annotated[ + bool, + Field(description="Match any term instead of requiring all terms."), + ] = False, + regex: t.Annotated[ + bool, + Field(description="Treat search terms as regular expressions."), + ] = False, + case_sensitive: t.Annotated[ + bool, + Field(description="Perform case-sensitive matching."), + ] = False, + limit: t.Annotated[ + int | None, + Field( + default=20, + ge=1, + description="Maximum number of search results to return.", + ), + ] = 20, + ) -> SearchToolResponse: + request = SearchRequestModel( + terms=terms, + agent=agent, + search_type=search_type, + any_term=any_term, + regex=regex, + case_sensitive=case_sensitive, + limit=limit, + ) + return await asyncio.to_thread(_search_sync, request) + + _ = search_tool From a2851b771af4ddd336c5e1111658ee9e7374094e Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:14:33 -0500 Subject: [PATCH 02/16] agentgrep(refactor[mcp]): Modular MCP server instructions why: A single string blob makes it hard to extend instructions without churn. Composed _INSTR_* segments let downstream readers (clients, dashboards) scan section headers and let us add agent-context segments later without breaking the base set. what: - Split _build_instructions() into HEADER / SCOPE / SEARCH_VS_DISCOVERY / DEFAULTS / RESOURCES / PRIVACY segments. - Add a segment-presence test in tests/test_agentgrep_mcp.py. --- src/agentgrep/mcp/instructions.py | 71 +++++++++++++++++++++++++++---- tests/test_agentgrep_mcp.py | 22 ++++++++++ 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/agentgrep/mcp/instructions.py b/src/agentgrep/mcp/instructions.py index a7666fc..ffb53d6 100644 --- a/src/agentgrep/mcp/instructions.py +++ b/src/agentgrep/mcp/instructions.py @@ -1,15 +1,68 @@ -"""Server instructions shown to MCP clients on handshake.""" +"""Server instructions shown to MCP clients on handshake. + +The instructions are composed from named ``_INSTR_*`` segments so downstream +readers (clients, dashboards) can scan section headers, and so new segments +(agent context, deployment hints) can be added without rewriting the base +set. +""" from __future__ import annotations +_INSTR_HEADER = ( + "agentgrep MCP server. Read-only search over local AI-agent prompts and " + "history across Codex, Claude Code, Cursor, and Gemini CLIs. All tools " + "are read-only and never spawn writes." +) + +_INSTR_SCOPE = ( + "TRIGGERS: invoke for retrospective questions about what the user typed " + "into or received from a coding-agent CLI (prompts, history, session " + "transcripts, store discovery). Bare 'prompt', 'history', 'transcript', " + "'session', 'what did I ask Claude/Codex/Cursor/Gemini' default to " + "agentgrep.\n" + "ANTI-TRIGGERS: do NOT invoke for IDE editor history (VS Code timeline), " + "shell history (zsh/fish history), browser tabs, or live agent sessions " + "in progress. Use shell tools for filesystem-wide grep that is not " + "agent-history scoped." +) + +_INSTR_SEARCH_VS_DISCOVERY = ( + "search vs discovery: search() finds matching prompts/history text; " + "find() enumerates the on-disk stores agentgrep can read. Use the " + "agentgrep://capabilities and agentgrep://sources resources to inspect " + "the server's catalog before deciding which stores are worth searching." +) + +_INSTR_DEFAULTS = ( + "Defaults: results are newest-first and deduplicated by session. " + "search uses substring AND-matching across all terms; set any_term=true " + "for OR. Use regex=true for pattern matching; complex regex should be " + "validated locally before running a broad cross-agent search." +) + +_INSTR_RESOURCES = ( + "Resources: agentgrep://capabilities (server info), agentgrep://sources " + "(discovered stores), agentgrep://sources/{agent} (per-agent)." +) + +_INSTR_PRIVACY = ( + "Privacy: all paths returned are absolute. Treat record text as " + "potentially sensitive (it is the user's own prompt history). Do not " + "echo or forward record text outside the immediate request scope." +) + +_BASE_INSTRUCTIONS = "\n\n".join( + ( + _INSTR_HEADER, + _INSTR_SCOPE, + _INSTR_SEARCH_VS_DISCOVERY, + _INSTR_DEFAULTS, + _INSTR_RESOURCES, + _INSTR_PRIVACY, + ) +) + def _build_instructions() -> str: """Return server instructions for MCP clients.""" - return ( - "agentgrep is a read-only MCP server for local AI agent history search. " - "Use `search` to retrieve full prompt/history matches and `find` to inspect " - "discovered stores and session files. Search results are newest-first and " - "duplicate prompts within the same session are collapsed. " - "This server never mutates agent stores, never opens SQLite in write mode, " - "and never executes arbitrary shell commands." - ) + return _BASE_INSTRUCTIONS diff --git a/tests/test_agentgrep_mcp.py b/tests/test_agentgrep_mcp.py index 5a5d1a5..2e7054d 100644 --- a/tests/test_agentgrep_mcp.py +++ b/tests/test_agentgrep_mcp.py @@ -266,3 +266,25 @@ async def test_mcp_prompt_guides_search() -> None: assert "search" in rendered assert "serenity" in rendered assert "codex" in rendered + + +def test_mcp_instructions_carry_every_segment_header() -> None: + """Server instructions must include each named ``_INSTR_*`` segment. + + The instructions are composed from segments and an accidental deletion of + one would silently shorten what MCP clients see on handshake. Asserting on + segment-header sentinels catches that without locking in exact wording. + """ + from agentgrep.mcp.instructions import _build_instructions + + rendered = _build_instructions() + for marker in ( + "agentgrep MCP server", + "TRIGGERS:", + "ANTI-TRIGGERS:", + "search vs discovery:", + "Defaults:", + "Resources:", + "Privacy:", + ): + assert marker in rendered, marker From cf43657576747631de138a160a05399fa0be6c7e Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:18:59 -0500 Subject: [PATCH 03/16] agentgrep(feat[mcp]): Wire FastMCP middleware quartet + audit log why: The MCP server previously had no observability and no response cap. A large search dump from a power user could OOM a client; an exception inside a tool surfaced as a raw Python traceback instead of an MCP error code. Adding the standard FastMCP middleware quartet plus an agentgrep-flavored audit log brings the server in line with libtmux-mcp's hardening level. what: - Add AgentgrepAuditMiddleware that logs agentgrep_tool, agentgrep_outcome, agentgrep_duration_ms, and agentgrep_args_summary; redacts terms and pattern to {len, sha256_prefix}. - Wire FastMCP's TimingMiddleware, ResponseLimitingMiddleware (512KB cap), and ErrorHandlingMiddleware(transform_errors=True). - Tests cover audit extras, pattern redaction, and middleware wiring assertion. --- src/agentgrep/mcp/middleware.py | 158 ++++++++++++++++++++++++++++++++ src/agentgrep/mcp/server.py | 26 ++++++ tests/test_agentgrep_mcp.py | 70 ++++++++++++++ 3 files changed, 254 insertions(+) create mode 100644 src/agentgrep/mcp/middleware.py diff --git a/src/agentgrep/mcp/middleware.py b/src/agentgrep/mcp/middleware.py new file mode 100644 index 0000000..4bf27cb --- /dev/null +++ b/src/agentgrep/mcp/middleware.py @@ -0,0 +1,158 @@ +"""FastMCP middleware for the ``agentgrep`` server. + +Holds :class:`AgentgrepAuditMiddleware`, a per-tool structured-logging hook +that records each invocation with ``agentgrep_*`` ``extra`` keys. FastMCP's +own ``TimingMiddleware`` / ``ResponseLimitingMiddleware`` / +``ErrorHandlingMiddleware`` are wired alongside it from +:mod:`agentgrep.mcp.server`. +""" + +from __future__ import annotations + +import hashlib +import logging +import time +import typing as t + +from fastmcp.server.middleware import Middleware, MiddlewareContext + +_SENSITIVE_ARG_NAMES: frozenset[str] = frozenset({"terms", "pattern", "sample_text"}) +"""Tool argument names whose values get redacted before logging. + +``terms`` and ``pattern`` can carry user secrets when an agent searches its +own history for tokens; ``sample_text`` is the validate-query payload and may +contain anything the caller pastes in. +""" + +_MAX_LOGGED_STR_LEN: int = 200 + + +def _redact_digest(value: str) -> dict[str, t.Any]: + """Return a length and SHA-256 prefix summary of ``value``. + + The digest is stable and deterministic, so operators can correlate the + same payload across log lines without ever recording the payload itself. + + Examples + -------- + >>> _redact_digest("hello") + {'len': 5, 'sha256_prefix': '2cf24dba5fb0'} + >>> _redact_digest("") + {'len': 0, 'sha256_prefix': 'e3b0c44298fc'} + """ + return { + "len": len(value), + "sha256_prefix": hashlib.sha256(value.encode("utf-8")).hexdigest()[:12], + } + + +def _summarize_args(args: dict[str, t.Any]) -> dict[str, t.Any]: + """Summarize tool arguments for audit logging. + + Sensitive scalars get replaced by a digest dict. Sensitive list payloads + (e.g. ``terms`` is ``list[str]``) get each element digested. Long + non-sensitive strings get truncated with a marker. Everything else passes + through as-is. + + Examples + -------- + Non-sensitive scalars pass through unchanged: + + >>> _summarize_args({"agent": "codex", "regex": True}) + {'agent': 'codex', 'regex': True} + + Sensitive scalar payloads are replaced by a digest dict: + + >>> _summarize_args({"pattern": "secret-token"})["pattern"]["len"] + 12 + + Sensitive list payloads digest each element: + + >>> redacted = _summarize_args({"terms": ["alpha", "beta"]}) + >>> [item["len"] for item in redacted["terms"]] + [5, 4] + >>> "alpha" in str(redacted) + False + """ + summary: dict[str, t.Any] = {} + for key, value in args.items(): + if key in _SENSITIVE_ARG_NAMES and isinstance(value, str): + summary[key] = _redact_digest(value) + elif key in _SENSITIVE_ARG_NAMES and isinstance(value, list): + summary[key] = [ + _redact_digest(str(item)) if isinstance(item, str) else item for item in value + ] + elif isinstance(value, str) and len(value) > _MAX_LOGGED_STR_LEN: + summary[key] = value[:_MAX_LOGGED_STR_LEN] + "..." + else: + summary[key] = value + return summary + + +class AgentgrepAuditMiddleware(Middleware): + """Emit a structured log record per ``agentgrep`` tool invocation. + + Records carry ``agentgrep_tool``, ``agentgrep_outcome``, + ``agentgrep_duration_ms``, ``agentgrep_error_type`` (on failure), + ``agentgrep_client_id`` / ``agentgrep_request_id`` (when available), and + ``agentgrep_args_summary``. The logger name defaults to + ``agentgrep.audit`` so operators can route it independently of the + ``agentgrep`` library logger. + + Parameters + ---------- + logger_name : str + Name of the :mod:`logging` logger used for audit records. + """ + + def __init__(self, logger_name: str = "agentgrep.audit") -> None: + self._logger = logging.getLogger(logger_name) + + async def on_call_tool( + self, + context: MiddlewareContext[t.Any], + call_next: t.Callable[[MiddlewareContext[t.Any]], t.Awaitable[t.Any]], + ) -> t.Any: + """Wrap the tool call with a timer and emit one audit record.""" + start = time.monotonic() + tool_name = getattr(context.message, "name", "") + raw_args = getattr(context.message, "arguments", None) or {} + args_summary = _summarize_args(raw_args) + + client_id: str | None = None + request_id: str | None = None + if context.fastmcp_context is not None: + client_id = getattr(context.fastmcp_context, "client_id", None) + request_id = getattr(context.fastmcp_context, "request_id", None) + + try: + result = await call_next(context) + except Exception as exc: + duration_ms = (time.monotonic() - start) * 1000.0 + self._logger.info( + "tool call failed", + extra={ + "agentgrep_tool": tool_name, + "agentgrep_outcome": "error", + "agentgrep_error_type": type(exc).__name__, + "agentgrep_duration_ms": duration_ms, + "agentgrep_client_id": client_id, + "agentgrep_request_id": request_id, + "agentgrep_args_summary": args_summary, + }, + ) + raise + + duration_ms = (time.monotonic() - start) * 1000.0 + self._logger.info( + "tool call completed", + extra={ + "agentgrep_tool": tool_name, + "agentgrep_outcome": "ok", + "agentgrep_duration_ms": duration_ms, + "agentgrep_client_id": client_id, + "agentgrep_request_id": request_id, + "agentgrep_args_summary": args_summary, + }, + ) + return result diff --git a/src/agentgrep/mcp/server.py b/src/agentgrep/mcp/server.py index 2a15535..1b8dd95 100644 --- a/src/agentgrep/mcp/server.py +++ b/src/agentgrep/mcp/server.py @@ -3,13 +3,22 @@ from __future__ import annotations from fastmcp import FastMCP +from fastmcp.server.middleware.error_handling import ErrorHandlingMiddleware +from fastmcp.server.middleware.response_limiting import ResponseLimitingMiddleware +from fastmcp.server.middleware.timing import TimingMiddleware from agentgrep.mcp._library import SERVER_VERSION from agentgrep.mcp.instructions import _build_instructions +from agentgrep.mcp.middleware import AgentgrepAuditMiddleware from agentgrep.mcp.prompts import register_prompts from agentgrep.mcp.resources import register_resources from agentgrep.mcp.tools import register_tools +#: Byte ceiling for response truncation. Sized to fit a generous slice of +#: prompt/history records (a typical record is ~1 KB; 512 KB allows a few +#: hundred records before truncation fires). +DEFAULT_RESPONSE_LIMIT_BYTES = 512 * 1024 + def build_mcp_server() -> FastMCP: """Build and return the FastMCP server instance.""" @@ -17,6 +26,23 @@ def build_mcp_server() -> FastMCP: name="agentgrep", version=SERVER_VERSION, instructions=_build_instructions(), + # Middleware runs outermost-first. Order rationale: + # 1. TimingMiddleware — neutral observer; start clock early so + # timing captures middleware cost too. + # 2. ResponseLimitingMiddleware — bound the response before + # ErrorHandlingMiddleware can transform exceptions; keeps the + # size cap independent of error path. + # 3. ErrorHandlingMiddleware — transforms exceptions into proper + # MCP errors; sits outside Audit so failed-tool records still + # log the failure with structured extras. + # 4. AgentgrepAuditMiddleware — innermost log hook; records + # outcome=ok or outcome=error for every call. + middleware=[ + TimingMiddleware(), + ResponseLimitingMiddleware(max_size=DEFAULT_RESPONSE_LIMIT_BYTES), + ErrorHandlingMiddleware(transform_errors=True), + AgentgrepAuditMiddleware(), + ], on_duplicate="error", ) register_tools(mcp) diff --git a/tests/test_agentgrep_mcp.py b/tests/test_agentgrep_mcp.py index 2e7054d..da26828 100644 --- a/tests/test_agentgrep_mcp.py +++ b/tests/test_agentgrep_mcp.py @@ -4,6 +4,7 @@ from __future__ import annotations import json +import logging import pathlib import typing as t @@ -268,6 +269,75 @@ async def test_mcp_prompt_guides_search() -> None: assert "codex" in rendered +async def test_audit_middleware_emits_extras( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Every tool call emits an audit record with ``agentgrep_*`` extras.""" + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + with caplog.at_level(logging.INFO, logger="agentgrep.audit"): + async with Client(agentgrep_mcp.build_mcp_server()) as client: + _ = await client.call_tool( + "find", + {"pattern": "missing", "agent": "all", "limit": 5}, + ) + + audit_records = [r for r in caplog.records if getattr(r, "agentgrep_tool", None) == "find"] + assert audit_records, "expected at least one audit record for the find tool" + record = audit_records[-1] + assert getattr(record, "agentgrep_outcome", None) == "ok" + duration = t.cast("float", getattr(record, "agentgrep_duration_ms", None)) + assert duration >= 0.0 + + +async def test_audit_middleware_redacts_pattern( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, + caplog: pytest.LogCaptureFixture, +) -> None: + """Sensitive argument payloads are digested in the audit record.""" + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + secret = "secret-token-do-not-leak" + with caplog.at_level(logging.INFO, logger="agentgrep.audit"): + async with Client(agentgrep_mcp.build_mcp_server()) as client: + _ = await client.call_tool( + "find", + {"pattern": secret, "agent": "all", "limit": 1}, + ) + + audit_records = [r for r in caplog.records if getattr(r, "agentgrep_tool", None) == "find"] + assert audit_records + summary = t.cast( + "dict[str, t.Any]", + getattr(audit_records[-1], "agentgrep_args_summary", None), + ) + assert isinstance(summary["pattern"], dict) + assert set(summary["pattern"]) == {"len", "sha256_prefix"} + assert summary["pattern"]["len"] == len(secret) + # The literal secret must not appear anywhere in the structured record. + assert secret not in str(summary) + + +def test_response_limit_middleware_is_wired() -> None: + """The server installs a ResponseLimitingMiddleware backstop.""" + from fastmcp.server.middleware.response_limiting import ResponseLimitingMiddleware + + from agentgrep.mcp.middleware import AgentgrepAuditMiddleware + + agentgrep_mcp = load_agentgrep_mcp_module() + server = agentgrep_mcp.build_mcp_server() + classes = {type(m) for m in server.middleware} + assert ResponseLimitingMiddleware in classes + assert AgentgrepAuditMiddleware in classes + + def test_mcp_instructions_carry_every_segment_header() -> None: """Server instructions must include each named ``_INSTR_*`` segment. From 028e6442b0285e760b2c6c56501486129664037e Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:27:38 -0500 Subject: [PATCH 04/16] agentgrep(feat[mcp]): Eight new high-level MCP tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: The MCP server only exposed search and find — leaving every other library capability invisible to MCP clients. Full domain coverage means clients can introspect the store catalog, validate regex before running it, summarize what's discoverable, and inspect adapter samples without dropping back to the CLI. what: - list_stores / get_store_descriptor wrap the StoreCatalog (catalog_tools). - list_sources / filter_sources / summarize_discovery layer structured filters on top of discover_sources (discovery_tools). - validate_query exposes matches_text for cheap pre-flight regex checks (diagnostic_tools). - recent_sessions filters discovered sources by mtime (search_tools). - inspect_record_sample returns first-N records from a named adapter + path for schema validation (catalog_tools). - Each tool runs through asyncio.to_thread to keep the event loop unblocked. - Eleven new test cases plus the lockstep tool-count assertion bumped from 2 to 10. --- src/agentgrep/mcp/_library.py | 11 + src/agentgrep/mcp/models.py | 141 ++++++++++ src/agentgrep/mcp/resources.py | 13 +- src/agentgrep/mcp/tools/__init__.py | 9 +- src/agentgrep/mcp/tools/catalog_tools.py | 219 ++++++++++++++++ src/agentgrep/mcp/tools/diagnostic_tools.py | 82 ++++++ src/agentgrep/mcp/tools/discovery_tools.py | 144 +++++++++++ src/agentgrep/mcp/tools/search_tools.py | 61 +++++ tests/test_agentgrep_mcp.py | 272 +++++++++++++++++++- 9 files changed, 948 insertions(+), 4 deletions(-) create mode 100644 src/agentgrep/mcp/tools/catalog_tools.py create mode 100644 src/agentgrep/mcp/tools/diagnostic_tools.py diff --git a/src/agentgrep/mcp/_library.py b/src/agentgrep/mcp/_library.py index 4f01085..4beaf72 100644 --- a/src/agentgrep/mcp/_library.py +++ b/src/agentgrep/mcp/_library.py @@ -152,6 +152,17 @@ def serialize_source_handle( source: SourceHandleLike, ) -> dict[str, object]: ... + def matches_text( + self, + text: str, + query: object, + ) -> bool: ... + + def iter_source_records( + self, + source: SourceHandleLike, + ) -> t.Iterator[SearchRecordLike]: ... + agentgrep = t.cast( "AgentGrepModule", diff --git a/src/agentgrep/mcp/models.py b/src/agentgrep/mcp/models.py index caf3b9b..61f8ed5 100644 --- a/src/agentgrep/mcp/models.py +++ b/src/agentgrep/mcp/models.py @@ -165,3 +165,144 @@ class FindRequestModel(AgentGrepModel): pattern: str | None = None agent: AgentSelector limit: int | None = None + + +class StoreDescriptorModel(AgentGrepModel): + """Catalog descriptor for one on-disk agent store.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + kind: t.Literal["store"] = "store" + agent: t.Literal["codex", "claude", "cursor", "gemini"] + store_id: str + role: str + format: str + path_pattern: str + env_overrides: list[str] = Field(default_factory=list) + platform_variants: dict[str, str] = Field(default_factory=dict) + observed_version: str | None = None + observed_at: str | None = None + upstream_ref: str | None = None + schema_notes: str | None = None + sample_record: str | None = None + search_by_default: bool | None = None + search_notes: str | None = None + distinguishes_from: list[str] = Field(default_factory=list) + + +class ListStoresRequest(AgentGrepModel): + """Validated list-stores request payload.""" + + agent: AgentSelector = "all" + role_filter: str | None = None + search_default_only: bool = False + + +class ListStoresResponse(AgentGrepModel): + """Structured response for the MCP list_stores tool.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + stores: list[StoreDescriptorModel] + total: int + + +class GetStoreDescriptorRequest(AgentGrepModel): + """Validated get-store-descriptor request payload.""" + + store_id: str = Field( + min_length=1, + description="Store id (e.g. 'claude.projects.session').", + ) + + +class ListSourcesRequest(AgentGrepModel): + """Validated list-sources request payload.""" + + agent: AgentSelector = "all" + path_kind_filter: t.Literal["history_file", "session_file", "sqlite_db"] | None = None + source_kind_filter: t.Literal["json", "jsonl", "sqlite"] | None = None + limit: int | None = Field(default=None, ge=1) + + +class ListSourcesResponse(AgentGrepModel): + """Structured response for the MCP list_sources tool.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + sources: list[SourceRecordModel] + total: int + + +class FilterSourcesRequest(AgentGrepModel): + """Validated filter-sources request payload.""" + + pattern: str = Field(min_length=1) + agent: AgentSelector = "all" + limit: int | None = Field(default=50, ge=1) + + +class DiscoverySummaryRequest(AgentGrepModel): + """Validated summarize-discovery request payload.""" + + agent: AgentSelector = "all" + + +class DiscoverySummaryResponse(AgentGrepModel): + """Aggregate counts of discovered sources.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + total_sources: int + sources_by_agent: dict[str, int] + sources_by_format: dict[str, int] + sources_by_kind: dict[str, int] + + +class ValidateQueryRequest(AgentGrepModel): + """Validated validate-query request payload.""" + + terms: list[str] = Field(min_length=1) + regex: bool = False + case_sensitive: bool = False + any_term: bool = False + sample_text: str + + +class ValidateQueryResponse(AgentGrepModel): + """Result of a dry-run query validation.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + matches: bool + regex_valid: bool + error_message: str | None = None + + +class RecentSessionsRequest(AgentGrepModel): + """Validated recent-sessions request payload.""" + + agent: AgentSelector = "all" + hours: int = Field(default=24, ge=1, le=24 * 30) + limit: int | None = Field(default=10, ge=1) + + +class RecentSessionsResponse(AgentGrepModel): + """Recently modified sources.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + cutoff_iso: str + sources: list[SourceRecordModel] + + +class InspectSampleRequest(AgentGrepModel): + """Validated inspect-record-sample request payload.""" + + adapter_id: str = Field(min_length=1) + source_path: str = Field(min_length=1) + sample_size: int = Field(default=1, ge=1, le=20) + + +class InspectSampleResponse(AgentGrepModel): + """Sample records read from one source.""" + + schema_version: str = agentgrep.SCHEMA_VERSION + adapter_id: str + sample_count: int + records: list[SearchRecordModel] + error_message: str | None = None diff --git a/src/agentgrep/mcp/resources.py b/src/agentgrep/mcp/resources.py index 7af20ca..02ae929 100644 --- a/src/agentgrep/mcp/resources.py +++ b/src/agentgrep/mcp/resources.py @@ -42,7 +42,18 @@ def build_capabilities() -> CapabilitiesModel: agents=list(agentgrep.AGENT_CHOICES), search_types=["prompts", "history", "all"], adapters=list(KNOWN_ADAPTERS), - tools=["search", "find"], + tools=[ + "search", + "recent_sessions", + "find", + "list_sources", + "filter_sources", + "summarize_discovery", + "list_stores", + "get_store_descriptor", + "inspect_record_sample", + "validate_query", + ], resources=[ "agentgrep://capabilities", "agentgrep://sources", diff --git a/src/agentgrep/mcp/tools/__init__.py b/src/agentgrep/mcp/tools/__init__.py index 7271c0a..4f7cc17 100644 --- a/src/agentgrep/mcp/tools/__init__.py +++ b/src/agentgrep/mcp/tools/__init__.py @@ -10,7 +10,14 @@ def register_tools(mcp: FastMCP) -> None: """Register every ``agentgrep`` MCP tool on ``mcp``.""" - from agentgrep.mcp.tools import discovery_tools, search_tools + from agentgrep.mcp.tools import ( + catalog_tools, + diagnostic_tools, + discovery_tools, + search_tools, + ) search_tools.register(mcp) discovery_tools.register(mcp) + catalog_tools.register(mcp) + diagnostic_tools.register(mcp) diff --git a/src/agentgrep/mcp/tools/catalog_tools.py b/src/agentgrep/mcp/tools/catalog_tools.py new file mode 100644 index 0000000..90fbe92 --- /dev/null +++ b/src/agentgrep/mcp/tools/catalog_tools.py @@ -0,0 +1,219 @@ +"""Catalog-domain MCP tools.""" + +from __future__ import annotations + +import asyncio +import pathlib +import typing as t + +from fastmcp.exceptions import ToolError +from pydantic import Field + +from agentgrep.mcp._library import ( + READONLY_TAGS, + agentgrep, +) +from agentgrep.mcp.models import ( + GetStoreDescriptorRequest, + InspectSampleRequest, + InspectSampleResponse, + ListStoresRequest, + ListStoresResponse, + SearchRecordModel, + StoreDescriptorModel, +) +from agentgrep.store_catalog import CATALOG + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def _descriptor_to_model(descriptor: t.Any) -> StoreDescriptorModel: + """Convert a library ``StoreDescriptor`` to the MCP model.""" + observed_at = descriptor.observed_at + observed_at_iso = observed_at.isoformat() if observed_at is not None else None + return StoreDescriptorModel( + agent=descriptor.agent, + store_id=descriptor.store_id, + role=descriptor.role.value, + format=descriptor.format.value, + path_pattern=descriptor.path_pattern, + env_overrides=list(descriptor.env_overrides), + platform_variants=dict(descriptor.platform_variants), + observed_version=descriptor.observed_version, + observed_at=observed_at_iso, + upstream_ref=descriptor.upstream_ref, + schema_notes=descriptor.schema_notes, + sample_record=descriptor.sample_record, + search_by_default=descriptor.search_by_default, + search_notes=descriptor.search_notes, + distinguishes_from=list(descriptor.distinguishes_from), + ) + + +def _list_stores_sync(request: ListStoresRequest) -> ListStoresResponse: + """Build a filtered list of catalog descriptors.""" + selected: list[StoreDescriptorModel] = [] + for descriptor in CATALOG.stores: + if request.agent != "all" and descriptor.agent != request.agent: + continue + if request.role_filter is not None and descriptor.role.value != request.role_filter: + continue + if request.search_default_only and not descriptor.search_by_default: + continue + selected.append(_descriptor_to_model(descriptor)) + return ListStoresResponse(stores=selected, total=len(selected)) + + +def _get_store_descriptor_sync(request: GetStoreDescriptorRequest) -> StoreDescriptorModel: + """Look up one store descriptor by ``store_id``.""" + try: + descriptor = CATALOG.by_id(request.store_id) + except KeyError as exc: + msg = f"unknown store_id: {request.store_id!r}" + raise ToolError(msg) from exc + return _descriptor_to_model(descriptor) + + +def _inspect_record_sample_sync(request: InspectSampleRequest) -> InspectSampleResponse: + """Yield the first ``sample_size`` records from a matching source.""" + backends = agentgrep.select_backends() + sources = agentgrep.discover_sources( + pathlib.Path.home(), + agentgrep.AGENT_CHOICES, + backends, + ) + requested = pathlib.Path(request.source_path).expanduser().resolve() + target = next( + ( + source + for source in sources + if source.adapter_id == request.adapter_id + and pathlib.Path(source.path).resolve() == requested + ), + None, + ) + if target is None: + return InspectSampleResponse( + adapter_id=request.adapter_id, + sample_count=0, + records=[], + error_message="source not found", + ) + try: + records: list[SearchRecordModel] = [] + for record in agentgrep.iter_source_records(target): + records.append(SearchRecordModel.from_record(record)) + if len(records) >= request.sample_size: + break + except Exception as exc: + return InspectSampleResponse( + adapter_id=request.adapter_id, + sample_count=0, + records=[], + error_message=f"{type(exc).__name__}: {exc}", + ) + return InspectSampleResponse( + adapter_id=request.adapter_id, + sample_count=len(records), + records=records, + ) + + +def register(mcp: FastMCP) -> None: + """Register catalog-domain tools.""" + + @mcp.tool( + name="list_stores", + tags=READONLY_TAGS | {"catalog"}, + description="List on-disk agent stores from the agentgrep catalog.", + ) + async def list_stores_tool( + agent: t.Annotated[ + str, + Field( + default="all", + description="Filter to one agent or 'all' for every catalog entry.", + ), + ] = "all", + role_filter: t.Annotated[ + str | None, + Field( + default=None, + description="Filter to one StoreRole value (e.g. 'primary_chat').", + ), + ] = None, + search_default_only: t.Annotated[ + bool, + Field( + default=False, + description="Return only stores that are searched by default.", + ), + ] = False, + ) -> ListStoresResponse: + request = ListStoresRequest( + agent=t.cast("t.Any", agent), + role_filter=role_filter, + search_default_only=search_default_only, + ) + return await asyncio.to_thread(_list_stores_sync, request) + + _ = list_stores_tool + + @mcp.tool( + name="get_store_descriptor", + tags=READONLY_TAGS | {"catalog"}, + description="Return the catalog descriptor for a single store by id.", + ) + async def get_store_descriptor_tool( + store_id: t.Annotated[ + str, + Field( + min_length=1, + description="Store id (e.g. 'claude.projects.session').", + ), + ], + ) -> StoreDescriptorModel: + request = GetStoreDescriptorRequest(store_id=store_id) + return await asyncio.to_thread(_get_store_descriptor_sync, request) + + _ = get_store_descriptor_tool + + @mcp.tool( + name="inspect_record_sample", + tags=READONLY_TAGS | {"catalog"}, + description="Read the first N records from one adapter+path for schema inspection.", + ) + async def inspect_record_sample_tool( + adapter_id: t.Annotated[ + str, + Field( + min_length=1, + description="Adapter id (e.g. 'claude.projects_jsonl.v1').", + ), + ], + source_path: t.Annotated[ + str, + Field( + min_length=1, + description="Absolute path to the source file.", + ), + ], + sample_size: t.Annotated[ + int, + Field( + default=1, + ge=1, + le=20, + description="Number of records to return (1-20).", + ), + ] = 1, + ) -> InspectSampleResponse: + request = InspectSampleRequest( + adapter_id=adapter_id, + source_path=source_path, + sample_size=sample_size, + ) + return await asyncio.to_thread(_inspect_record_sample_sync, request) + + _ = inspect_record_sample_tool diff --git a/src/agentgrep/mcp/tools/diagnostic_tools.py b/src/agentgrep/mcp/tools/diagnostic_tools.py new file mode 100644 index 0000000..efd9132 --- /dev/null +++ b/src/agentgrep/mcp/tools/diagnostic_tools.py @@ -0,0 +1,82 @@ +"""Diagnostic-domain MCP tools.""" + +from __future__ import annotations + +import asyncio +import re +import typing as t + +from pydantic import Field + +from agentgrep.mcp._library import READONLY_TAGS, agentgrep +from agentgrep.mcp.models import ValidateQueryRequest, ValidateQueryResponse + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + + +def _validate_query_sync(request: ValidateQueryRequest) -> ValidateQueryResponse: + """Dry-run a ``SearchQuery`` against sample text without searching files.""" + query = agentgrep.SearchQuery( + terms=tuple(request.terms), + search_type="all", + any_term=request.any_term, + regex=request.regex, + case_sensitive=request.case_sensitive, + agents=agentgrep.AGENT_CHOICES, + limit=None, + ) + try: + matches = agentgrep.matches_text(request.sample_text, query) + except re.error as exc: + return ValidateQueryResponse( + matches=False, + regex_valid=False, + error_message=str(exc), + ) + return ValidateQueryResponse(matches=matches, regex_valid=True) + + +def register(mcp: FastMCP) -> None: + """Register diagnostic-domain tools.""" + + @mcp.tool( + name="validate_query", + tags=READONLY_TAGS | {"diagnostic"}, + description="Dry-run a query against sample text without searching files.", + ) + async def validate_query_tool( + terms: t.Annotated[ + list[str], + Field( + min_length=1, + description="One or more literal or regex search terms.", + ), + ], + sample_text: t.Annotated[ + str, + Field(description="Sample text to test the query against."), + ], + regex: t.Annotated[ + bool, + Field(description="Treat terms as regular expressions."), + ] = False, + case_sensitive: t.Annotated[ + bool, + Field(description="Perform case-sensitive matching."), + ] = False, + any_term: t.Annotated[ + bool, + Field(description="Match any term instead of requiring all terms."), + ] = False, + ) -> ValidateQueryResponse: + request = ValidateQueryRequest( + terms=terms, + sample_text=sample_text, + regex=regex, + case_sensitive=case_sensitive, + any_term=any_term, + ) + return await asyncio.to_thread(_validate_query_sync, request) + + _ = validate_query_tool diff --git a/src/agentgrep/mcp/tools/discovery_tools.py b/src/agentgrep/mcp/tools/discovery_tools.py index 51c57ab..3dc01dd 100644 --- a/src/agentgrep/mcp/tools/discovery_tools.py +++ b/src/agentgrep/mcp/tools/discovery_tools.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import collections import pathlib import typing as t @@ -15,10 +16,16 @@ normalize_agent_selection, ) from agentgrep.mcp.models import ( + DiscoverySummaryRequest, + DiscoverySummaryResponse, + FilterSourcesRequest, FindRecordModel, FindRequestModel, FindToolQuery, FindToolResponse, + ListSourcesRequest, + ListSourcesResponse, + SourceRecordModel, ) if t.TYPE_CHECKING: @@ -43,6 +50,70 @@ def _find_sync(request: FindRequestModel) -> FindToolResponse: ) +def _list_sources_sync(request: ListSourcesRequest) -> ListSourcesResponse: + """Build a structured list of discovered sources.""" + backends = agentgrep.select_backends() + sources = agentgrep.discover_sources( + pathlib.Path.home(), + normalize_agent_selection(request.agent), + backends, + ) + filtered: list[SourceRecordModel] = [] + for source in sources: + if request.path_kind_filter is not None and source.path_kind != request.path_kind_filter: + continue + if ( + request.source_kind_filter is not None + and source.source_kind != request.source_kind_filter + ): + continue + filtered.append(SourceRecordModel.from_source(source)) + if request.limit is not None and len(filtered) >= request.limit: + break + return ListSourcesResponse(sources=filtered, total=len(filtered)) + + +def _filter_sources_sync(request: FilterSourcesRequest) -> FindToolResponse: + """Run the find pipeline with the requested pattern.""" + records = agentgrep.run_find_query( + pathlib.Path.home(), + normalize_agent_selection(request.agent), + pattern=request.pattern, + limit=request.limit, + ) + return FindToolResponse( + query=FindToolQuery( + pattern=request.pattern, + agent=request.agent, + limit=request.limit, + ), + results=[FindRecordModel.from_record(record) for record in records], + ) + + +def _summarize_discovery_sync(request: DiscoverySummaryRequest) -> DiscoverySummaryResponse: + """Aggregate counts of discovered sources by agent/format/path-kind.""" + backends = agentgrep.select_backends() + sources = agentgrep.discover_sources( + pathlib.Path.home(), + normalize_agent_selection(request.agent), + backends, + ) + by_agent: collections.Counter[str] = collections.Counter() + by_format: collections.Counter[str] = collections.Counter() + by_kind: collections.Counter[str] = collections.Counter() + for source in sources: + by_agent[source.agent] += 1 + by_format[source.source_kind] += 1 + by_kind[source.path_kind] += 1 + return DiscoverySummaryResponse( + total_sources=len(sources), + sources_by_agent=dict(by_agent), + sources_by_format=dict(by_format), + sources_by_kind=dict(by_kind), + ) + + def register(mcp: FastMCP) -> None: """Register discovery-domain tools.""" @@ -76,3 +147,76 @@ async def find_tool( return await asyncio.to_thread(_find_sync, request) _ = find_tool + + @mcp.tool( + name="list_sources", + tags=READONLY_TAGS | {"discovery"}, + description="List discovered sources with structured path-kind/source-kind filters.", + ) + async def list_sources_tool( + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + path_kind_filter: t.Annotated[ + t.Literal["history_file", "session_file", "sqlite_db"] | None, + Field(default=None, description="Filter by path kind."), + ] = None, + source_kind_filter: t.Annotated[ + t.Literal["json", "jsonl", "sqlite"] | None, + Field(default=None, description="Filter by on-disk source kind."), + ] = None, + limit: t.Annotated[ + int | None, + Field(default=None, ge=1, description="Maximum number of sources to return."), + ] = None, + ) -> ListSourcesResponse: + request = ListSourcesRequest( + agent=agent, + path_kind_filter=path_kind_filter, + source_kind_filter=source_kind_filter, + limit=limit, + ) + return await asyncio.to_thread(_list_sources_sync, request) + + _ = list_sources_tool + + @mcp.tool( + name="filter_sources", + tags=READONLY_TAGS | {"discovery"}, + description="Filter discovered sources by required substring pattern.", + ) + async def filter_sources_tool( + pattern: t.Annotated[ + str, + Field(min_length=1, description="Required substring pattern."), + ], + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + limit: t.Annotated[ + int | None, + Field(default=50, ge=1, description="Maximum number of sources to return."), + ] = 50, + ) -> FindToolResponse: + request = FilterSourcesRequest(pattern=pattern, agent=agent, limit=limit) + return await asyncio.to_thread(_filter_sources_sync, request) + + _ = filter_sources_tool + + @mcp.tool( + name="summarize_discovery", + tags=READONLY_TAGS | {"discovery"}, + description="Aggregate counts of discovered sources by agent, format, and kind.", + ) + async def summarize_discovery_tool( + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + ) -> DiscoverySummaryResponse: + request = DiscoverySummaryRequest(agent=agent) + return await asyncio.to_thread(_summarize_discovery_sync, request) + + _ = summarize_discovery_tool diff --git a/src/agentgrep/mcp/tools/search_tools.py b/src/agentgrep/mcp/tools/search_tools.py index 00fe2fc..9ac4922 100644 --- a/src/agentgrep/mcp/tools/search_tools.py +++ b/src/agentgrep/mcp/tools/search_tools.py @@ -3,7 +3,9 @@ from __future__ import annotations import asyncio +import datetime import pathlib +import time import typing as t from pydantic import Field @@ -16,10 +18,13 @@ normalize_agent_selection, ) from agentgrep.mcp.models import ( + RecentSessionsRequest, + RecentSessionsResponse, SearchRecordModel, SearchRequestModel, SearchToolQuery, SearchToolResponse, + SourceRecordModel, ) if t.TYPE_CHECKING: @@ -52,6 +57,29 @@ def _search_sync(request: SearchRequestModel) -> SearchToolResponse: ) +def _recent_sessions_sync(request: RecentSessionsRequest) -> RecentSessionsResponse: + """Return recently modified sources sorted newest-first.""" + backends = agentgrep.select_backends() + sources = agentgrep.discover_sources( + pathlib.Path.home(), + normalize_agent_selection(request.agent), + backends, + ) + cutoff_ns = time.time_ns() - request.hours * 3600 * 1_000_000_000 + recent = [source for source in sources if source.mtime_ns >= cutoff_ns] + recent.sort(key=lambda s: s.mtime_ns, reverse=True) + if request.limit is not None: + recent = recent[: request.limit] + cutoff_iso = datetime.datetime.fromtimestamp( + cutoff_ns / 1_000_000_000, + tz=datetime.UTC, + ).isoformat() + return RecentSessionsResponse( + cutoff_iso=cutoff_iso, + sources=[SourceRecordModel.from_source(source) for source in recent], + ) + + def register(mcp: FastMCP) -> None: """Register search-domain tools.""" @@ -109,3 +137,36 @@ async def search_tool( return await asyncio.to_thread(_search_sync, request) _ = search_tool + + @mcp.tool( + name="recent_sessions", + tags=READONLY_TAGS | {"search"}, + description="Return sources modified in the last N hours, newest-first.", + ) + async def recent_sessions_tool( + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + hours: t.Annotated[ + int, + Field( + default=24, + ge=1, + le=24 * 30, + description="Look back this many hours (max 30 days).", + ), + ] = 24, + limit: t.Annotated[ + int | None, + Field( + default=10, + ge=1, + description="Maximum number of sources to return.", + ), + ] = 10, + ) -> RecentSessionsResponse: + request = RecentSessionsRequest(agent=agent, hours=hours, limit=limit) + return await asyncio.to_thread(_recent_sessions_sync, request) + + _ = recent_sessions_tool diff --git a/tests/test_agentgrep_mcp.py b/tests/test_agentgrep_mcp.py index da26828..665fce0 100644 --- a/tests/test_agentgrep_mcp.py +++ b/tests/test_agentgrep_mcp.py @@ -109,6 +109,21 @@ def extract_resource_text(contents: object) -> str: return items[0].text or "" +class ToolResultLike(t.Protocol): + """Minimal MCP tool-call result surface for response decoding.""" + + content: object + + +def tool_payload(result: object) -> dict[str, t.Any]: + """Decode a FastMCP tool result's JSON body into a dict.""" + typed = t.cast("ToolResultLike", result) + content = t.cast("cabc.Sequence[ResourceTextLike]", typed.content) + assert content + text = content[0].text or "" + return t.cast("dict[str, t.Any]", json.loads(text)) + + async def test_mcp_lists_tools_resources_prompts_and_templates() -> None: agentgrep_mcp = load_agentgrep_mcp_module() @@ -121,7 +136,18 @@ async def test_mcp_lists_tools_resources_prompts_and_templates() -> None: await client.list_resource_templates(), ) - assert {tool.name for tool in tools} == {"search", "find"} + assert {tool.name for tool in tools} == { + "search", + "find", + "list_sources", + "filter_sources", + "summarize_discovery", + "list_stores", + "get_store_descriptor", + "inspect_record_sample", + "validate_query", + "recent_sessions", + } assert any(str(resource.uri) == "agentgrep://capabilities" for resource in resources) assert any(str(resource.uri) == "agentgrep://sources" for resource in resources) assert any(prompt.name == "search_prompts" for prompt in prompts) @@ -222,7 +248,10 @@ async def test_mcp_capabilities_resource_reports_read_only() -> None: data = t.cast("dict[str, object]", json.loads(text)) assert data["read_only"] is True - assert data["tools"] == ["search", "find"] + tools_advertised = t.cast("list[str]", data["tools"]) + assert "search" in tools_advertised + assert "find" in tools_advertised + assert "list_stores" in tools_advertised prompts = t.cast("list[str]", data["prompts"]) assert "search_history" in prompts @@ -358,3 +387,242 @@ def test_mcp_instructions_carry_every_segment_header() -> None: "Privacy:", ): assert marker in rendered, marker + + +async def test_mcp_list_stores_returns_catalog_entries() -> None: + """``list_stores`` enumerates the StoreCatalog.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool("list_stores", {"agent": "all"}) + + data = tool_payload(result) + assert data["total"] >= 10 + assert {s["agent"] for s in data["stores"]} >= {"codex", "claude", "cursor", "gemini"} + + +async def test_mcp_list_stores_filters_by_agent() -> None: + """``list_stores`` respects the ``agent`` filter.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool("list_stores", {"agent": "cursor"}) + + data = tool_payload(result) + assert data["total"] >= 1 + assert {s["agent"] for s in data["stores"]} == {"cursor"} + + +async def test_mcp_get_store_descriptor_known_and_unknown() -> None: + """``get_store_descriptor`` returns one entry or raises for unknown ids.""" + from fastmcp.exceptions import ToolError + + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + ok = await client.call_tool( + "get_store_descriptor", + {"store_id": "claude.projects.session"}, + ) + try: + _ = await client.call_tool( + "get_store_descriptor", + {"store_id": "definitely.not.a.real.store"}, + ) + except ToolError as exc: + error_message = str(exc) + else: + error_message = "" + + data = tool_payload(ok) + assert data["store_id"] == "claude.projects.session" + assert error_message and "definitely.not.a.real.store" in error_message + + +async def test_mcp_list_sources_with_filters( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """``list_sources`` honors path_kind_filter.""" + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + state_db = home / ".cursor" / "state.vscdb" + state_db.parent.mkdir(parents=True, exist_ok=True) + state_db.touch() + history_path = home / ".codex" / "history.json" + history_path.parent.mkdir(parents=True, exist_ok=True) + _ = history_path.write_text("[]", encoding="utf-8") + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool( + "list_sources", + {"path_kind_filter": "sqlite_db"}, + ) + + data = tool_payload(result) + assert data["total"] >= 1 + assert all(s["path_kind"] == "sqlite_db" for s in data["sources"]) + + +async def test_mcp_filter_sources_requires_pattern() -> None: + """``filter_sources`` rejects an empty pattern at the validation layer.""" + from fastmcp.exceptions import ToolError + + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + try: + _ = await client.call_tool("filter_sources", {"pattern": ""}) + except ToolError as exc: + error_message = str(exc) + else: + error_message = "" + + assert error_message # validation should refuse the empty pattern + + +async def test_mcp_summarize_discovery_totals_match_list_sources( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """``summarize_discovery.total_sources`` equals ``list_sources.total``.""" + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + state_db = home / ".cursor" / "state.vscdb" + state_db.parent.mkdir(parents=True, exist_ok=True) + state_db.touch() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + summary = await client.call_tool("summarize_discovery", {}) + listing = await client.call_tool("list_sources", {}) + + summary_data = tool_payload(summary) + listing_data = tool_payload(listing) + assert summary_data["total_sources"] == listing_data["total"] + + +async def test_mcp_validate_query_invalid_regex() -> None: + """``validate_query`` reports ``regex_valid=False`` on unclosed character classes.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool( + "validate_query", + { + "terms": ["[unclosed"], + "regex": True, + "sample_text": "anything", + }, + ) + + data = tool_payload(result) + assert data["regex_valid"] is False + assert data["matches"] is False + assert data["error_message"] + + +async def test_mcp_validate_query_substring_match() -> None: + """``validate_query`` returns ``matches=True`` for a literal hit.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool( + "validate_query", + {"terms": ["foo"], "sample_text": "foobar baz"}, + ) + + data = tool_payload(result) + assert data["regex_valid"] is True + assert data["matches"] is True + + +async def test_mcp_recent_sessions_filters_by_mtime( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Sources older than ``hours`` are excluded.""" + import os + + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + state_db = home / ".cursor" / "state.vscdb" + state_db.parent.mkdir(parents=True, exist_ok=True) + state_db.touch() + # Backdate the file to 48 hours ago. + old = state_db.stat().st_mtime - (48 * 3600) + os.utime(state_db, (old, old)) + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + recent = await client.call_tool("recent_sessions", {"hours": 24}) + broad = await client.call_tool("recent_sessions", {"hours": 24 * 7}) + + recent_data = tool_payload(recent) + broad_data = tool_payload(broad) + # Paths come back with the home directory collapsed to '~', so compare + # by suffix rather than by the absolute tmp_path string. + suffix = ".cursor/state.vscdb" + assert not any(s["path"].endswith(suffix) for s in recent_data["sources"]) + assert any(s["path"].endswith(suffix) for s in broad_data["sources"]) + _ = state_db # quiet F841 — kept for readability of the test setup + + +async def test_mcp_inspect_record_sample_unknown_path( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """An unknown adapter+path returns an error_message and no records.""" + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool( + "inspect_record_sample", + { + "adapter_id": "codex.history_json.v1", + "source_path": str(tmp_path / "no_such_file.json"), + "sample_size": 1, + }, + ) + + data = tool_payload(result) + assert data["sample_count"] == 0 + assert data["records"] == [] + assert data["error_message"] == "source not found" + + +async def test_mcp_inspect_record_sample_returns_codex_history( + tmp_path: pathlib.Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A known codex history file yields parsed sample records.""" + agentgrep_mcp = load_agentgrep_mcp_module() + home = tmp_path / "home" + monkeypatch.setenv("HOME", str(home)) + + history_path = home / ".codex" / "history.json" + history_path.parent.mkdir(parents=True, exist_ok=True) + _ = history_path.write_text( + json.dumps([{"command": "echo alpha", "timestamp": "2026-01-01T00:00:00Z"}]), + encoding="utf-8", + ) + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + result = await client.call_tool( + "inspect_record_sample", + { + "adapter_id": "codex.history_json.v1", + "source_path": str(history_path), + "sample_size": 1, + }, + ) + + data = tool_payload(result) + assert data["error_message"] is None + assert data["sample_count"] >= 1 From 48c8947f647cbf3c05f75a93d43ae58dbeb554fb Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:29:16 -0500 Subject: [PATCH 05/16] agentgrep(feat[mcp]): Resources for catalog, store-roles, store-formats why: Clients that want to reason about the on-disk layout without spawning a search shouldn't have to scrape the docs site. Surfacing the StoreCatalog plus its supporting enums as resources gives agents a self-describing data dictionary. what: - agentgrep://catalog returns the full StoreCatalog model. - agentgrep://store-roles and agentgrep://store-formats enumerate enum members with one-line descriptions. - CapabilitiesModel.resources is updated; the lockstep test covers the new URIs. --- src/agentgrep/mcp/resources.py | 84 ++++++++++++++++++++++++++++++++++ tests/test_agentgrep_mcp.py | 58 +++++++++++++++++++++++ 2 files changed, 142 insertions(+) diff --git a/src/agentgrep/mcp/resources.py b/src/agentgrep/mcp/resources.py index 02ae929..26441a8 100644 --- a/src/agentgrep/mcp/resources.py +++ b/src/agentgrep/mcp/resources.py @@ -2,6 +2,7 @@ from __future__ import annotations +import json import pathlib import typing as t @@ -19,10 +20,39 @@ SourceListAdapter, SourceRecordModel, ) +from agentgrep.store_catalog import CATALOG +from agentgrep.stores import StoreFormat, StoreRole if t.TYPE_CHECKING: from fastmcp import FastMCP +#: One-line descriptions for each :class:`StoreRole` value. Kept here rather +#: than on the enum so the wording can be tuned for MCP consumers without +#: touching the library surface. +_ROLE_DESCRIPTIONS: dict[str, str] = { + "primary_chat": "Primary conversation transcript for an agent.", + "supplementary_chat": "Secondary chat (e.g. composer, side panel).", + "prompt_history": "User-issued prompt history outside a session log.", + "persistent_memory": "Cross-session memory or notes the agent retains.", + "plan": "Plan-style step list the agent generated or maintains.", + "todo": "Task list or todo store driven by the agent.", + "app_state": "Application state (settings, UI, caches that aren't chat).", + "cache": "Throwaway caches; usually not search-by-default.", + "source_tree": "Source-tree snapshot or workspace index.", + "unknown": "Role not yet classified.", +} + +#: One-line descriptions for each :class:`StoreFormat` value. +_FORMAT_DESCRIPTIONS: dict[str, str] = { + "jsonl": "JSON Lines: one object per line.", + "json_array": "Single JSON array of records.", + "json_object": "Single JSON object holding records at known keys.", + "sqlite": "SQLite database opened read-only.", + "md_frontmatter": "Markdown with YAML/JSON frontmatter blocks.", + "protobuf": "Binary protobuf payload.", + "opaque": "Format not parsed by agentgrep.", +} + def list_source_models(agent: AgentSelector = "all") -> list[SourceRecordModel]: """Return discovered sources as typed MCP payloads.""" @@ -58,6 +88,9 @@ def build_capabilities() -> CapabilitiesModel: "agentgrep://capabilities", "agentgrep://sources", "agentgrep://sources/{agent}", + "agentgrep://catalog", + "agentgrep://store-roles", + "agentgrep://store-formats", ], prompts=["search_prompts", "search_history", "inspect_stores"], backends=BackendAvailabilityModel( @@ -110,3 +143,54 @@ def sources_by_agent_resource(agent: str) -> str: return SourceListAdapter.dump_json(list_source_models(selected_agent)).decode("utf-8") _ = sources_by_agent_resource + + @mcp.resource( + "agentgrep://catalog", + name="agentgrep_catalog", + description="Full StoreCatalog: every known store with role, format, and notes.", + mime_type="application/json", + tags=READONLY_TAGS | {"catalog"}, + annotations=RESOURCE_ANNOTATIONS, + ) + def catalog_resource() -> str: + return CATALOG.model_dump_json(indent=2) + + _ = catalog_resource + + @mcp.resource( + "agentgrep://store-roles", + name="agentgrep_store_roles", + description="StoreRole enum members with one-line descriptions.", + mime_type="application/json", + tags=READONLY_TAGS | {"catalog"}, + annotations=RESOURCE_ANNOTATIONS, + ) + def store_roles_resource() -> str: + rows = [ + {"name": role.name, "value": role.value, "description": _ROLE_DESCRIPTIONS[role.value]} + for role in StoreRole + ] + return json.dumps(rows, indent=2) + + _ = store_roles_resource + + @mcp.resource( + "agentgrep://store-formats", + name="agentgrep_store_formats", + description="StoreFormat enum members with one-line descriptions.", + mime_type="application/json", + tags=READONLY_TAGS | {"catalog"}, + annotations=RESOURCE_ANNOTATIONS, + ) + def store_formats_resource() -> str: + rows = [ + { + "name": fmt.name, + "value": fmt.value, + "description": _FORMAT_DESCRIPTIONS[fmt.value], + } + for fmt in StoreFormat + ] + return json.dumps(rows, indent=2) + + _ = store_formats_resource diff --git a/tests/test_agentgrep_mcp.py b/tests/test_agentgrep_mcp.py index 665fce0..acfa1ad 100644 --- a/tests/test_agentgrep_mcp.py +++ b/tests/test_agentgrep_mcp.py @@ -626,3 +626,61 @@ async def test_mcp_inspect_record_sample_returns_codex_history( data = tool_payload(result) assert data["error_message"] is None assert data["sample_count"] >= 1 + + +async def test_mcp_catalog_resource_returns_full_catalog() -> None: + """``agentgrep://catalog`` returns the StoreCatalog payload.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + text = extract_resource_text(await client.read_resource("agentgrep://catalog")) + + data = t.cast("dict[str, t.Any]", json.loads(text)) + assert "stores" in data + assert len(data["stores"]) >= 10 + store_ids = {s["store_id"] for s in data["stores"]} + assert "claude.projects.session" in store_ids + + +async def test_mcp_store_roles_resource() -> None: + """``agentgrep://store-roles`` lists every StoreRole with a description.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + text = extract_resource_text(await client.read_resource("agentgrep://store-roles")) + + rows = t.cast("list[dict[str, str]]", json.loads(text)) + values = {row["value"] for row in rows} + assert "primary_chat" in values + assert "prompt_history" in values + assert all(row["description"] for row in rows) + + +async def test_mcp_store_formats_resource() -> None: + """``agentgrep://store-formats`` lists every StoreFormat with a description.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + text = extract_resource_text(await client.read_resource("agentgrep://store-formats")) + + rows = t.cast("list[dict[str, str]]", json.loads(text)) + values = {row["value"] for row in rows} + assert "jsonl" in values + assert "sqlite" in values + assert all(row["description"] for row in rows) + + +async def test_mcp_capabilities_advertises_new_resources() -> None: + """The capabilities resource must list the three new resource URIs.""" + agentgrep_mcp = load_agentgrep_mcp_module() + + async with Client(agentgrep_mcp.build_mcp_server()) as client: + text = extract_resource_text(await client.read_resource("agentgrep://capabilities")) + + data = t.cast("dict[str, t.Any]", json.loads(text)) + advertised = set(data["resources"]) + assert { + "agentgrep://catalog", + "agentgrep://store-roles", + "agentgrep://store-formats", + } <= advertised From f582fa94b203968950c015e21b0f87cbbae78931 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:34:37 -0500 Subject: [PATCH 06/16] agentgrep(docs[mcp]): Mirror new tools in fastmcp shim why: sphinx_autodoc_fastmcp introspects the docs-only shim, not the live server, so new tools stayed invisible to the docs site until we mirror their signatures and metadata here. what: - Add docs-only shims for the eight new tools (list_stores, get_store_descriptor, list_sources, filter_sources, summarize_discovery, validate_query, recent_sessions, inspect_record_sample). - Each shim carries the Pydantic Field annotations the live tool uses, plus examples. - Update fastmcp_model_classes in docs/conf.py with the new request and response models, and add Catalog/Diagnostic to the section badge map. - Add the new directives to docs/mcp/tools.md, picking H2 titles that don't collide with the tool slug anchors. - Document the three new resources in docs/mcp/resources.md. --- docs/_ext/agentgrep_fastmcp.py | 258 +++++++++++++++++++++++++++++++++ docs/conf.py | 17 +++ docs/mcp/resources.md | 21 +++ docs/mcp/tools.md | 78 ++++++++++ 4 files changed, 374 insertions(+) diff --git a/docs/_ext/agentgrep_fastmcp.py b/docs/_ext/agentgrep_fastmcp.py index 24a0d99..e9bee1e 100644 --- a/docs/_ext/agentgrep_fastmcp.py +++ b/docs/_ext/agentgrep_fastmcp.py @@ -19,6 +19,15 @@ SearchToolResponse, SearchTypeName, ) +from agentgrep.mcp.models import ( + DiscoverySummaryResponse, + InspectSampleResponse, + ListSourcesResponse, + ListStoresResponse, + RecentSessionsResponse, + StoreDescriptorModel, + ValidateQueryResponse, +) READONLY_TAGS = {"readonly", "agentgrep"} DOCS_ONLY_MESSAGE = "Documentation signature only." @@ -104,3 +113,252 @@ async def find( tags=READONLY_TAGS | {"discovery"}, annotations=None, ) + + +async def list_stores( + agent: t.Annotated[ + str, + Field( + default="all", + description="Filter to one agent or 'all' for every catalog entry.", + examples=["all", "claude", "cursor"], + ), + ] = "all", + role_filter: t.Annotated[ + str | None, + Field( + default=None, + description="Filter to one StoreRole value (e.g. 'primary_chat').", + examples=["primary_chat", "prompt_history"], + ), + ] = None, + search_default_only: t.Annotated[ + bool, + Field( + default=False, + description="Return only stores that are searched by default.", + ), + ] = False, +) -> ListStoresResponse: + """List on-disk agent stores from the agentgrep catalog.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, list_stores).__fastmcp__ = types.SimpleNamespace( + name="list_stores", + title="List Stores", + tags=READONLY_TAGS | {"catalog"}, + annotations=None, +) + + +async def get_store_descriptor( + store_id: t.Annotated[ + str, + Field( + min_length=1, + description="Store id (e.g. 'claude.projects.session').", + examples=["claude.projects.session", "codex.history"], + ), + ], +) -> StoreDescriptorModel: + """Return the catalog descriptor for a single store by id.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, get_store_descriptor).__fastmcp__ = types.SimpleNamespace( + name="get_store_descriptor", + title="Get Store Descriptor", + tags=READONLY_TAGS | {"catalog"}, + annotations=None, +) + + +async def inspect_record_sample( + adapter_id: t.Annotated[ + str, + Field( + min_length=1, + description="Adapter id (e.g. 'claude.projects_jsonl.v1').", + examples=["claude.projects_jsonl.v1", "codex.history_json.v1"], + ), + ], + source_path: t.Annotated[ + str, + Field( + min_length=1, + description="Absolute path to the source file.", + ), + ], + sample_size: t.Annotated[ + int, + Field( + default=1, + ge=1, + le=20, + description="Number of records to return (1-20).", + ), + ] = 1, +) -> InspectSampleResponse: + """Read the first N records from one adapter+path for schema inspection.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, inspect_record_sample).__fastmcp__ = types.SimpleNamespace( + name="inspect_record_sample", + title="Inspect Record Sample", + tags=READONLY_TAGS | {"catalog"}, + annotations=None, +) + + +async def list_sources( + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + path_kind_filter: t.Annotated[ + t.Literal["history_file", "session_file", "sqlite_db"] | None, + Field(default=None, description="Filter by path kind."), + ] = None, + source_kind_filter: t.Annotated[ + t.Literal["json", "jsonl", "sqlite"] | None, + Field(default=None, description="Filter by on-disk source kind."), + ] = None, + limit: t.Annotated[ + int | None, + Field(default=None, ge=1, description="Maximum number of sources to return."), + ] = None, +) -> ListSourcesResponse: + """List discovered sources with structured path-kind/source-kind filters.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, list_sources).__fastmcp__ = types.SimpleNamespace( + name="list_sources", + title="List Sources", + tags=READONLY_TAGS | {"discovery"}, + annotations=None, +) + + +async def filter_sources( + pattern: t.Annotated[ + str, + Field( + min_length=1, + description="Required substring pattern.", + examples=["state", ".jsonl"], + ), + ], + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + limit: t.Annotated[ + int | None, + Field(default=50, ge=1, description="Maximum number of sources to return."), + ] = 50, +) -> FindToolResponse: + """Filter discovered sources by required substring pattern.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, filter_sources).__fastmcp__ = types.SimpleNamespace( + name="filter_sources", + title="Filter Sources", + tags=READONLY_TAGS | {"discovery"}, + annotations=None, +) + + +async def summarize_discovery( + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", +) -> DiscoverySummaryResponse: + """Aggregate counts of discovered sources by agent, format, and kind.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, summarize_discovery).__fastmcp__ = types.SimpleNamespace( + name="summarize_discovery", + title="Summarize Discovery", + tags=READONLY_TAGS | {"discovery"}, + annotations=None, +) + + +async def validate_query( + terms: t.Annotated[ + list[str], + Field( + min_length=1, + description="One or more literal or regex search terms.", + examples=[["alpha"], ["foo.*bar"]], + ), + ], + sample_text: t.Annotated[ + str, + Field(description="Sample text to test the query against."), + ], + regex: t.Annotated[ + bool, + Field(description="Treat terms as regular expressions."), + ] = False, + case_sensitive: t.Annotated[ + bool, + Field(description="Perform case-sensitive matching."), + ] = False, + any_term: t.Annotated[ + bool, + Field(description="Match any term instead of requiring all terms."), + ] = False, +) -> ValidateQueryResponse: + """Dry-run a query against sample text without searching files.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, validate_query).__fastmcp__ = types.SimpleNamespace( + name="validate_query", + title="Validate Query", + tags=READONLY_TAGS | {"diagnostic"}, + annotations=None, +) + + +async def recent_sessions( + agent: t.Annotated[ + AgentSelector, + Field(description="Limit discovery to one agent or scan every agent."), + ] = "all", + hours: t.Annotated[ + int, + Field( + default=24, + ge=1, + le=24 * 30, + description="Look back this many hours (max 30 days).", + examples=[1, 24, 168], + ), + ] = 24, + limit: t.Annotated[ + int | None, + Field( + default=10, + ge=1, + description="Maximum number of sources to return.", + ), + ] = 10, +) -> RecentSessionsResponse: + """Return sources modified in the last N hours, newest-first.""" + raise NotImplementedError(DOCS_ONLY_MESSAGE) + + +t.cast(t.Any, recent_sessions).__fastmcp__ = types.SimpleNamespace( + name="recent_sessions", + title="Recent Sessions", + tags=READONLY_TAGS | {"search"}, + annotations=None, +) diff --git a/docs/conf.py b/docs/conf.py index 8fdd708..3c0c5c7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -71,10 +71,27 @@ "CapabilitiesModel", "SearchRequestModel", "FindRequestModel", + "StoreDescriptorModel", + "ListStoresRequest", + "ListStoresResponse", + "GetStoreDescriptorRequest", + "ListSourcesRequest", + "ListSourcesResponse", + "FilterSourcesRequest", + "DiscoverySummaryRequest", + "DiscoverySummaryResponse", + "ValidateQueryRequest", + "ValidateQueryResponse", + "RecentSessionsRequest", + "RecentSessionsResponse", + "InspectSampleRequest", + "InspectSampleResponse", ) conf["fastmcp_section_badge_map"] = { "Search": "readonly", "Discovery": "readonly", + "Catalog": "readonly", + "Diagnostic": "readonly", } conf["fastmcp_section_badge_pages"] = ("mcp/tools", "mcp/index", "index") diff --git a/docs/mcp/resources.md b/docs/mcp/resources.md index f236127..4c3f420 100644 --- a/docs/mcp/resources.md +++ b/docs/mcp/resources.md @@ -24,3 +24,24 @@ Read `agentgrep://sources` to list every discovered source. ``` Read `agentgrep://sources/codex`, `agentgrep://sources/claude`, or `agentgrep://sources/cursor` to filter discovery by agent. + +## Store catalog + +```{fastmcp-resource} agentgrep_catalog +``` + +Read `agentgrep://catalog` for the canonical catalog of every store agentgrep knows about — role, format, upstream reference, and schema notes per entry. + +## Store roles + +```{fastmcp-resource} agentgrep_store_roles +``` + +Read `agentgrep://store-roles` for the enumeration of role values (`primary_chat`, `prompt_history`, `app_state`, …) with one-line descriptions. + +## Store formats + +```{fastmcp-resource} agentgrep_store_formats +``` + +Read `agentgrep://store-formats` for the enumeration of on-disk format values (`jsonl`, `sqlite`, `md_frontmatter`, …) with one-line descriptions. diff --git a/docs/mcp/tools.md b/docs/mcp/tools.md index 5f7e959..9fd07ca 100644 --- a/docs/mcp/tools.md +++ b/docs/mcp/tools.md @@ -31,6 +31,18 @@ agentgrep's tools are read-only. They return structured Pydantic models and prot ```{fastmcp-tool-input} search ``` +## Time-Windowed Activity + +```{fastmcp-tool} recent_sessions +``` + +**Use when** you want the most-recently modified sources for an agent — newest-first, optionally bounded by a time window. + +**Returns:** the cutoff timestamp plus source records ordered by ``mtime_ns`` descending. + +```{fastmcp-tool-input} recent_sessions +``` + ## Store Discovery ```{fastmcp-tool} find @@ -55,3 +67,69 @@ agentgrep's tools are read-only. They return structured Pydantic models and prot ```{fastmcp-tool-input} find ``` + +## Structured Source Listing + +```{fastmcp-tool} list_sources +``` + +**Use when** you want a structured listing of discovered sources with optional path-kind / source-kind filters. + +```{fastmcp-tool-input} list_sources +``` + +## Required-Pattern Filtering + +```{fastmcp-tool} filter_sources +``` + +**Use when** you want to narrow discovered sources by required substring pattern (a stricter ``find``). + +```{fastmcp-tool-input} filter_sources +``` + +## Discovery Counts + +```{fastmcp-tool} summarize_discovery +``` + +**Use when** you want aggregate counts of discovered sources by agent, format, and path-kind. + +```{fastmcp-tool-input} summarize_discovery +``` + +## Catalog + +```{fastmcp-tool} list_stores +``` + +**Use when** you want the canonical catalog of on-disk stores agentgrep knows about — including stores that are not searched by default. + +```{fastmcp-tool-input} list_stores +``` + +```{fastmcp-tool} get_store_descriptor +``` + +**Use when** you need the full descriptor (role, format, upstream reference, schema notes) for a single store id. + +```{fastmcp-tool-input} get_store_descriptor +``` + +```{fastmcp-tool} inspect_record_sample +``` + +**Use when** you want a few raw records from one adapter+path to validate parser output or discover schema variations. + +```{fastmcp-tool-input} inspect_record_sample +``` + +## Diagnostics + +```{fastmcp-tool} validate_query +``` + +**Use when** you want to dry-run a regex or literal pattern against sample text before issuing a broad cross-agent search. + +```{fastmcp-tool-input} validate_query +``` From 58d4bf06662ff5714860f21f1da6476afcfe4351 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:37:55 -0500 Subject: [PATCH 07/16] agentgrep(docs[sidebar]): Promote Library and MCP to first-class sections why: The Packages > agentgrep nesting was premature scaffolding for a future package split that never materialized. Readers care about the library and the MCP server as products, not as members of a packages collection. Promoting both to top-level sidebar sections puts them at the same visual weight as Get started and Reference. what: - git mv docs/packages/agentgrep/*.md to docs/library/*.md. - Delete docs/packages/ entirely. - Rebuild the top-level toctrees in docs/index.md with explicit captions in this order: Get started, Library, MCP, Reference, Project. - Repoint the landing-page Library card and quickstart 'Next steps' link to library/*. - docs/redirects.txt redirects /packages/agentgrep/* to /library/* (dirhtml form, no .html suffix). --- docs/index.md | 10 ++++---- .../agentgrep => library}/examples.md | 0 .../{packages/agentgrep => library}/how-to.md | 0 docs/{packages/agentgrep => library}/index.md | 6 ++--- .../agentgrep => library}/reference.md | 0 .../agentgrep => library}/tutorial.md | 0 docs/packages/index.md | 24 ------------------- docs/quickstart.md | 2 +- docs/redirects.txt | 7 +++++- 9 files changed, 15 insertions(+), 34 deletions(-) rename docs/{packages/agentgrep => library}/examples.md (100%) rename docs/{packages/agentgrep => library}/how-to.md (100%) rename docs/{packages/agentgrep => library}/index.md (65%) rename docs/{packages/agentgrep => library}/reference.md (100%) rename docs/{packages/agentgrep => library}/tutorial.md (100%) delete mode 100644 docs/packages/index.md diff --git a/docs/index.md b/docs/index.md index ff78a71..9597d45 100644 --- a/docs/index.md +++ b/docs/index.md @@ -25,10 +25,10 @@ Run a first search and inspect the result shape. Tools, resources, and prompts for MCP clients. ::: -:::{grid-item-card} Package -:link: packages/agentgrep/index +:::{grid-item-card} Library +:link: library/index :link-type: doc -Tutorial, how-to, reference, and examples for the Python package. +Tutorial, how-to, reference, and examples for the Python library. ::: :::{grid-item-card} API Reference @@ -84,9 +84,9 @@ storage-catalog ```{toctree} :hidden: -:caption: Package +:caption: Library -packages/index +library/index ``` ```{toctree} diff --git a/docs/packages/agentgrep/examples.md b/docs/library/examples.md similarity index 100% rename from docs/packages/agentgrep/examples.md rename to docs/library/examples.md diff --git a/docs/packages/agentgrep/how-to.md b/docs/library/how-to.md similarity index 100% rename from docs/packages/agentgrep/how-to.md rename to docs/library/how-to.md diff --git a/docs/packages/agentgrep/index.md b/docs/library/index.md similarity index 65% rename from docs/packages/agentgrep/index.md rename to docs/library/index.md index 05fae74..3cfc859 100644 --- a/docs/packages/agentgrep/index.md +++ b/docs/library/index.md @@ -1,8 +1,8 @@ -(package-agentgrep)= +(library)= -# agentgrep +# Library -The `agentgrep` package provides both the terminal CLI and the FastMCP server. The CLI and MCP tools share the same source discovery, parsing, matching, serialization, and path privacy behavior. +Use `agentgrep` as a Python library from your own scripts and tools. The same search, discovery, parsing, serialization, and path-privacy layer powers the terminal CLI and the MCP server, so anything you can do from the command line you can drive directly in code. ::::{grid} 1 1 2 2 :gutter: 2 diff --git a/docs/packages/agentgrep/reference.md b/docs/library/reference.md similarity index 100% rename from docs/packages/agentgrep/reference.md rename to docs/library/reference.md diff --git a/docs/packages/agentgrep/tutorial.md b/docs/library/tutorial.md similarity index 100% rename from docs/packages/agentgrep/tutorial.md rename to docs/library/tutorial.md diff --git a/docs/packages/index.md b/docs/packages/index.md deleted file mode 100644 index eeac160..0000000 --- a/docs/packages/index.md +++ /dev/null @@ -1,24 +0,0 @@ -(packages)= - -# Packages - -The current distribution ships one Python package: - -::::{grid} 1 1 1 1 -:gutter: 2 - -:::{grid-item-card} agentgrep -:link: agentgrep/index -:link-type: doc -CLI and FastMCP server for read-only local AI agent history search. -::: - -:::: - -The package docs use a Tutorial / How to / Reference / Examples structure so future package splits can live under the same navigation shape. - -```{toctree} -:hidden: - -agentgrep/index -``` diff --git a/docs/quickstart.md b/docs/quickstart.md index 2970be7..6e2e2da 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -58,6 +58,6 @@ See {ref}`clients` for MCP client snippets. ## Next steps -- {doc}`packages/agentgrep/tutorial` walks through CLI search in more detail. +- {doc}`library/tutorial` walks through CLI search in more detail. - {doc}`mcp/tools` documents the MCP tool payloads. - {doc}`configuration` explains output, progress, privacy, and source selection. diff --git a/docs/redirects.txt b/docs/redirects.txt index 8b13789..fa2864b 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -1 +1,6 @@ - +packages/index library/index +packages/agentgrep/index library/index +packages/agentgrep/tutorial library/tutorial +packages/agentgrep/how-to library/how-to +packages/agentgrep/reference library/reference +packages/agentgrep/examples library/examples From 9585544af36c9a7284522fad36065c759a600dd7 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 17 May 2026 21:43:14 -0500 Subject: [PATCH 08/16] agentgrep(docs[widgets]): Lift framework + MCP install widget from libtmux-mcp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit why: We want a polished, tabbed install widget on the MCP landing page rather than scattered install snippets. Building the widget infrastructure from scratch duplicates work already proven in libtmux-mcp — its autodiscovery + Jinja + prehydrate + asset-copy pipeline is exactly what we need, and lifting it verbatim keeps both projects on the same widget contract. The framework alone won't load (the prehydrate hook imports from mcp_install at module-import time), so the widget and framework land in the same commit. Asset retrofits cover the storage-key namespace, the CSS class prefix lm-/ag-, and the per-CLI install commands which target ``agentgrep-mcp`` inside the ``agentgrep`` PyPI package via ``--from`` (uvx) / ``--spec`` (pipx). what: - Copy docs/_ext/widgets/{__init__,_base,_directive,_assets, _discovery,_prehydrate}.py verbatim from libtmux-mcp. - Copy docs/_ext/widgets/mcp_install.py and retrofit install commands, JSON/TOML bodies, server slug from tmux/libtmux-mcp to agentgrep. - Copy docs/_widgets/mcp-install/{widget.html,widget.js, widget.css}; rename lm- prefix to ag- and storage namespace to agentgrep.mcp-install.*. - Register docs._ext.widgets in conf.py extra_extensions. - Embed {mcp-install} at the top of docs/mcp/index.md. --- docs/_ext/widgets/__init__.py | 66 ++++ docs/_ext/widgets/_assets.py | 58 +++ docs/_ext/widgets/_base.py | 196 ++++++++++ docs/_ext/widgets/_directive.py | 62 +++ docs/_ext/widgets/_discovery.py | 45 +++ docs/_ext/widgets/_prehydrate.py | 351 +++++++++++++++++ docs/_ext/widgets/mcp_install.py | 522 ++++++++++++++++++++++++++ docs/_widgets/mcp-install/widget.css | 370 ++++++++++++++++++ docs/_widgets/mcp-install/widget.html | 221 +++++++++++ docs/_widgets/mcp-install/widget.js | 503 +++++++++++++++++++++++++ docs/conf.py | 1 + docs/mcp/index.md | 7 + 12 files changed, 2402 insertions(+) create mode 100644 docs/_ext/widgets/__init__.py create mode 100644 docs/_ext/widgets/_assets.py create mode 100644 docs/_ext/widgets/_base.py create mode 100644 docs/_ext/widgets/_directive.py create mode 100644 docs/_ext/widgets/_discovery.py create mode 100644 docs/_ext/widgets/_prehydrate.py create mode 100644 docs/_ext/widgets/mcp_install.py create mode 100644 docs/_widgets/mcp-install/widget.css create mode 100644 docs/_widgets/mcp-install/widget.html create mode 100644 docs/_widgets/mcp-install/widget.js diff --git a/docs/_ext/widgets/__init__.py b/docs/_ext/widgets/__init__.py new file mode 100644 index 0000000..ee9b8b4 --- /dev/null +++ b/docs/_ext/widgets/__init__.py @@ -0,0 +1,66 @@ +"""Reusable widget framework for Sphinx docs. + +Each widget is a ``BaseWidget`` subclass in a sibling module (e.g. +``mcp_install.py``) plus a ``/_widgets//widget.{html,js,css}`` +asset directory. Widgets autodiscover at ``setup()`` time — adding a new one +requires no registry edits. Usage from Markdown/RST: + +.. code-block:: markdown + + ```{mcp-install} + :variant: compact + ``` +""" + +from __future__ import annotations + +import functools +import typing as t + +from ._assets import install_widget_assets +from ._base import ( + BaseWidget, + depart_widget_container, + visit_widget_container, + widget_container, +) +from ._directive import make_widget_directive +from ._discovery import discover +from ._prehydrate import inject_mcp_install_prehydrate + +if t.TYPE_CHECKING: + from sphinx.application import Sphinx + +__version__ = "0.1.0" + +__all__ = [ + "BaseWidget", + "__version__", + "setup", + "widget_container", +] + + +def setup(app: Sphinx) -> dict[str, t.Any]: + """Register every discovered widget and wire the asset pipeline.""" + widgets = discover() + + app.add_node( + widget_container, + html=(visit_widget_container, depart_widget_container), + ) + + for name, widget_cls in widgets.items(): + app.add_directive(name, make_widget_directive(widget_cls)) + + app.connect( + "builder-inited", + functools.partial(install_widget_assets, widgets=widgets), + ) + app.connect("html-page-context", inject_mcp_install_prehydrate) + + return { + "version": __version__, + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/docs/_ext/widgets/_assets.py b/docs/_ext/widgets/_assets.py new file mode 100644 index 0000000..bfd7c61 --- /dev/null +++ b/docs/_ext/widgets/_assets.py @@ -0,0 +1,58 @@ +"""Copy widget assets into ``_static/widgets//`` and register them.""" + +from __future__ import annotations + +import pathlib +import shutil +import typing as t + +from sphinx.util import logging + +from ._base import BaseWidget + +if t.TYPE_CHECKING: + from sphinx.application import Sphinx + +logger = logging.getLogger(__name__) + +STATIC_SUBDIR = "widgets" + + +def install_widget_assets( + app: Sphinx, + widgets: dict[str, type[BaseWidget]], +) -> None: + """Copy each widget's ``widget.{css,js}`` into ``_static/widgets//``. + + Assets are then registered via ``app.add_css_file`` / ``app.add_js_file`` so + every page includes them (same pattern as ``sphinx-copybutton``). This is + intentionally simpler than per-page inclusion — the files are small and the + docs are not bandwidth-constrained. + + Uses :func:`shutil.copy2` directly. Recent Sphinx releases tightened + ``copy_asset_file`` to refuse overwriting (it emits a + ``misc.copy_overwrite`` warning and aborts), which leaves stale widget + JS/CSS on every incremental rebuild. The fix is to do the byte copy + ourselves: the cache-busting ``?v=`` querystring on the + ``add_*_file`` registration line keeps browser caches honest. + """ + if app.builder.format != "html": + return + + srcdir = pathlib.Path(app.srcdir) + outdir_static = pathlib.Path(app.outdir) / "_static" / STATIC_SUBDIR + + for name, widget_cls in widgets.items(): + asset_dir = widget_cls.assets_dir(srcdir) + dest = outdir_static / name + + for filename, register in ( + ("widget.css", app.add_css_file), + ("widget.js", app.add_js_file), + ): + source = asset_dir / filename + if not source.is_file(): + continue + dest.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(source), str(dest / filename)) + register(f"{STATIC_SUBDIR}/{name}/{filename}") diff --git a/docs/_ext/widgets/_base.py b/docs/_ext/widgets/_base.py new file mode 100644 index 0000000..d8d7882 --- /dev/null +++ b/docs/_ext/widgets/_base.py @@ -0,0 +1,196 @@ +"""Base class for widgets and the docutils node that wraps rendered output.""" + +from __future__ import annotations + +import abc +import collections.abc +import pathlib +import typing as t + +import jinja2 +import markupsafe +from docutils import nodes +from sphinx.builders.html import StandaloneHTMLBuilder + +if t.TYPE_CHECKING: + from sphinx.environment import BuildEnvironment + from sphinx.writers.html5 import HTML5Translator + + +class HighlightFilter(t.Protocol): + """Callable signature for the Jinja ``highlight`` filter.""" + + def __call__(self, code: str, language: str = "default") -> markupsafe.Markup: ... + + +class CooldownDaysSlotFilter(t.Protocol): + """Callable signature for the Jinja ``cooldown_days_slot`` filter.""" + + def __call__(self, html: object) -> markupsafe.Markup: ... + + +class widget_container(nodes.container): # type: ignore[misc] # docutils nodes are untyped + """Wraps a widget's rendered HTML; visit/depart emit the outer div.""" + + +def visit_widget_container( + translator: HTML5Translator, + node: widget_container, +) -> None: + """Open ``
`` for the widget.""" + name = node["widget_name"] + translator.body.append(f'
') + + +def depart_widget_container( + translator: HTML5Translator, + node: widget_container, +) -> None: + """Close the widget wrapper div.""" + translator.body.append("
") + + +ASSET_FILES: tuple[str, ...] = ("widget.html", "widget.js", "widget.css") + + +class BaseWidget(abc.ABC): + """Base class every concrete widget subclasses. + + Subclasses declare ``name`` plus optional ``option_spec`` / ``default_options`` + and may override ``context(env)`` to feed data into the Jinja template. + Assets (``widget.html``, ``widget.js``, ``widget.css``) live at + ``/_widgets//``; only ``widget.html`` is required. + """ + + name: t.ClassVar[str] + option_spec: t.ClassVar[ + collections.abc.Mapping[str, collections.abc.Callable[[str], t.Any]] + ] = {} + default_options: t.ClassVar[collections.abc.Mapping[str, t.Any]] = {} + + @classmethod + def assets_dir(cls, srcdir: pathlib.Path) -> pathlib.Path: + return srcdir / "_widgets" / cls.name + + @classmethod + def template_path(cls, srcdir: pathlib.Path) -> pathlib.Path: + return cls.assets_dir(srcdir) / "widget.html" + + @classmethod + def has_asset(cls, srcdir: pathlib.Path, filename: str) -> bool: + return (cls.assets_dir(srcdir) / filename).is_file() + + @classmethod + def context(cls, env: BuildEnvironment) -> collections.abc.Mapping[str, t.Any]: + """Return extra Jinja context. Override in subclasses for widget data.""" + return {} + + @classmethod + def render( + cls, + *, + options: collections.abc.Mapping[str, t.Any], + env: BuildEnvironment, + ) -> str: + """Render the Jinja template with merged context, return HTML.""" + template_path = cls.template_path(pathlib.Path(env.srcdir)) + source = template_path.read_text(encoding="utf-8") + jenv = jinja2.Environment( + undefined=jinja2.StrictUndefined, + autoescape=jinja2.select_autoescape(["html"]), + keep_trailing_newline=False, + trim_blocks=True, + lstrip_blocks=True, + ) + jenv.filters["highlight"] = make_highlight_filter(env) + jenv.filters["cooldown_days_slot"] = make_cooldown_days_slot_filter() + template = jenv.from_string(source) + context: dict[str, t.Any] = { + **cls.default_options, + **options, + **cls.context(env), + "widget_name": cls.name, + } + return template.render(**context) + + +def make_cooldown_days_slot_filter() -> CooldownDaysSlotFilter: + """Return a Jinja filter that injects cooldown slot ````s. + + The Pygments highlighter escapes two days-mode sentinels emitted in + snippet bodies (see :mod:`docs._ext.widgets.mcp_install`): + + * ``<COOLDOWN_DURATION>`` — used by uvx and pip days bodies. + Swapped for a span whose default text content is ``PD`` (ISO + 8601 duration). uv stores the value as + ``ExcludeNewerValue::Relative(ExcludeNewerSpan)`` and recomputes + ``now - N days`` on every resolver call; pip 26.1+ does the same + at flag-parse time per invocation. The snippet stays fresh + forever once saved to an MCP config. + * ``<COOLDOWN_DATE>`` — used by pipx days bodies because + pipx 1.8.0 bundles a pip older than 26.1 that rejects the + duration form with ``Invalid isoformat``. Swapped for a span + whose default text content is an absolute ISO date + (``today - default-days``). Drifts daily but ``widget.js`` + refreshes the slot on every page load. + + Both spans live inside a Pygments string-literal span and inherit + the parent's color. Their ``textContent`` is rewritten by + ``widget.js`` whenever the user changes the days input. The filter + is a no-op for outputs without either sentinel (off and bypass + cooldown modes never emit one). + """ + from .mcp_install import DEFAULT_COOLDOWN_DAYS, default_cooldown_date + + default_date = default_cooldown_date(DEFAULT_COOLDOWN_DAYS) + duration_span = ( + 'P{DEFAULT_COOLDOWN_DAYS}D" + ) + date_span = ( + f'{default_date}' + ) + + def _filter(html: object) -> markupsafe.Markup: + s = str(html) + s = s.replace("<COOLDOWN_DURATION>", duration_span) + s = s.replace("<COOLDOWN_DATE>", date_span) + return markupsafe.Markup(s) + + return _filter + + +def make_highlight_filter(env: BuildEnvironment) -> HighlightFilter: + r"""Return a Jinja filter that runs Sphinx's Pygments highlighter. + + Output matches ``sphinx.writers.html5.HTML5Translator.visit_literal_block`` + byte-for-byte: the inner ``highlight_block`` call already returns + ``
...
\n``; we wrap it with the + ``
...
\n`` starttag Sphinx + produces. This means sphinx-copybutton's default selector + (``div.highlight pre``) matches and the prompt-strip regex from gp-sphinx's + ``DEFAULT_COPYBUTTON_PROMPT_TEXT`` works automatically. + + ``highlighter`` is declared on ``StandaloneHTMLBuilder`` and its subclasses + (``DirectoryHTMLBuilder``, ``SingleFileHTMLBuilder``), not on the ``Builder`` + base. For non-HTML builders (``text``, ``linkcheck``, ``gettext``, ``man``, + ...), fall back to an HTML-escaped ``
`` block; it still flows through
+    the ``nodes.raw("html", ...)`` output path and is harmlessly ignored by
+    non-HTML writers.
+    """
+    builder = env.app.builder
+    if isinstance(builder, StandaloneHTMLBuilder):
+        highlighter = builder.highlighter
+
+        def _highlight(code: str, language: str = "default") -> markupsafe.Markup:
+            inner = highlighter.highlight_block(code, language)
+            return markupsafe.Markup(
+                f'
{inner}
\n' + ) + else: + + def _highlight(code: str, language: str = "default") -> markupsafe.Markup: + escaped = markupsafe.escape(code) + return markupsafe.Markup(f"
{escaped}
\n") + + return _highlight diff --git a/docs/_ext/widgets/_directive.py b/docs/_ext/widgets/_directive.py new file mode 100644 index 0000000..568be6d --- /dev/null +++ b/docs/_ext/widgets/_directive.py @@ -0,0 +1,62 @@ +"""Factory that manufactures a Sphinx Directive class for a given widget.""" + +from __future__ import annotations + +import pathlib +import typing as t + +from docutils import nodes +from sphinx.util.docutils import SphinxDirective + +from ._base import ASSET_FILES, BaseWidget, widget_container + + +def make_widget_directive(widget_cls: type[BaseWidget]) -> type[SphinxDirective]: + """Create a ``SphinxDirective`` subclass bound to ``widget_cls``. + + Each widget gets its own Directive subclass (not a single dispatcher) because + docutils parses ``:option:`` lines against ``option_spec`` *before* calling + ``run()`` -- so the spec must be static per directive name. + """ + + class _WidgetDirective(SphinxDirective): + has_content = False + required_arguments = 0 + optional_arguments = 0 + final_argument_whitespace = False + # Copy the widget's option_spec so per-directive mutations don't leak. + option_spec: t.ClassVar[dict[str, t.Any]] = dict(widget_cls.option_spec) + + def run(self) -> list[nodes.Node]: + """Render the widget and return a single ``widget_container`` node.""" + merged: dict[str, t.Any] = { + **widget_cls.default_options, + **self.options, + } + self._note_asset_dependencies() + html = self._render(merged) + container = widget_container(widget_name=widget_cls.name) + container += nodes.raw("", html, format="html") + self.set_source_info(container) + return [container] + + def _render(self, options: dict[str, t.Any]) -> str: + try: + return widget_cls.render(options=options, env=self.env) + except FileNotFoundError as exc: + msg = f"widget {widget_cls.name!r}: template not found -- expected {exc.filename}" + raise self.severe(msg) from exc + except Exception as exc: # Jinja UndefinedError, etc. + msg = f"widget {widget_cls.name!r} render failed: {exc}" + raise self.error(msg) from exc + + def _note_asset_dependencies(self) -> None: + assets_dir = widget_cls.assets_dir(pathlib.Path(self.env.srcdir)) + for filename in ASSET_FILES: + path = assets_dir / filename + if path.is_file(): + self.env.note_dependency(str(path)) + + _WidgetDirective.__name__ = f"{widget_cls.__name__}Directive" + _WidgetDirective.__qualname__ = _WidgetDirective.__name__ + return _WidgetDirective diff --git a/docs/_ext/widgets/_discovery.py b/docs/_ext/widgets/_discovery.py new file mode 100644 index 0000000..a09c96c --- /dev/null +++ b/docs/_ext/widgets/_discovery.py @@ -0,0 +1,45 @@ +"""Autodiscover widget classes from sibling modules in this package.""" + +from __future__ import annotations + +import importlib +import pkgutil + +from ._base import BaseWidget + + +def discover() -> dict[str, type[BaseWidget]]: + """Import every non-underscore submodule; collect ``BaseWidget`` subclasses. + + Adding a new widget means: drop ``mywidget.py`` next to ``mcp_install.py`` with a + ``MyWidget(BaseWidget)`` that sets ``name = "mywidget"`` -- the discovery sweep + at ``setup()`` time registers it automatically. + """ + from . import __name__ as pkg_name, __path__ as pkg_path + + registry: dict[str, type[BaseWidget]] = {} + for info in pkgutil.iter_modules(pkg_path): + if info.name.startswith("_"): + continue + module = importlib.import_module(f"{pkg_name}.{info.name}") + for obj in vars(module).values(): + if not _is_widget_class(obj): + continue + existing = registry.get(obj.name) + if existing is not None and existing is not obj: + msg = ( + f"Duplicate widget name {obj.name!r}: {existing.__module__} vs {obj.__module__}" + ) + raise RuntimeError(msg) + registry[obj.name] = obj + return registry + + +def _is_widget_class(obj: object) -> bool: + """Return True iff ``obj`` is a concrete ``BaseWidget`` subclass with a name.""" + return ( + isinstance(obj, type) + and issubclass(obj, BaseWidget) + and obj is not BaseWidget + and getattr(obj, "name", None) is not None + ) diff --git a/docs/_ext/widgets/_prehydrate.py b/docs/_ext/widgets/_prehydrate.py new file mode 100644 index 0000000..6c6fa29 --- /dev/null +++ b/docs/_ext/widgets/_prehydrate.py @@ -0,0 +1,351 @@ +"""Prevent flash-of-wrong-selection on the ``mcp-install`` widget. + +The widget's server-rendered HTML always marks the first +client/method/scope tab ``aria-selected="true"`` and ``hidden=""`` on +every panel except the ``(claude-code, uvx, local, off)`` cell. +``widget.js`` then reads ``localStorage`` and mutates the DOM to the +user's saved selection — a visible flash on initial page paint and on +every gp-sphinx SPA navigation between docs pages. + +This module emits an inline ```` script that copies the saved +selection from ``localStorage`` onto ```` as +``data-mcp-install-client`` / ``data-mcp-install-method`` / +``data-mcp-install-scope`` / ``data-mcp-install-cooldown-enabled`` / +``data-mcp-install-cooldown-type`` / ``data-mcp-install-cooldown-days`` +attributes *before first paint*, plus a ``" + + +def _snippet() -> str: + return _build_style() + _script() + + +def inject_mcp_install_prehydrate( + app: Sphinx, + pagename: str, + templatename: str, + context: dict[str, t.Any], + doctree: object, +) -> None: + """Inject the prehydrate ``