From ead8ea35f33153c47d68cbc5797181cf2fa44fbf Mon Sep 17 00:00:00 2001 From: Dan Powers Date: Wed, 3 Jun 2026 08:53:56 -0500 Subject: [PATCH 1/5] refactor(mcp): additive resolve_env parameter + _resolve_env_vars helper extraction parse_mcp_md_text and _build_spec gain a keyword-only resolve_env: bool = True parameter. The default preserves byte-identical behavior for all existing callers (parse_mcp_md, doctor.py:678, profile/types.py:336, profile/filesystem.py:374). The _flush() closure inside parse_mcp_md_text threads resolve_env through to _build_spec. A new module-level _resolve_env_vars(env, server_name) helper is extracted from _build_spec's inline resolution block. The helper raises the canonical MCPServerConnectFailed message shape documented in spec/19 so the future FilesystemMCPServerRegistryBackend can call it on already-built specs at load_mcp_server time without divergent error semantics. profile/types.py:336 and profile/filesystem.py:374 gain inline comments documenting the resolve_env=True invariant their AgentProfile.mcp_servers consumers depend on (the spec/24 D1 mcp_md_raw preservation discipline requires resolved env vars at snapshot time). The public parse_mcp_md function does NOT receive the parameter. Its docstring documents the API asymmetry so callers needing lazy resolution use parse_mcp_md_text(..., resolve_env=False) directly. --- atomic_agents/mcp.py | 131 +++++++++++++++++++++------- atomic_agents/profile/filesystem.py | 4 + atomic_agents/profile/types.py | 4 + 3 files changed, 108 insertions(+), 31 deletions(-) diff --git a/atomic_agents/mcp.py b/atomic_agents/mcp.py index cc8c7ec..62b8ea8 100644 --- a/atomic_agents/mcp.py +++ b/atomic_agents/mcp.py @@ -40,6 +40,7 @@ _logger = logging.getLogger(__name__) + # Detect path-shaped args. A path-shaped arg is one that: # - starts with / (absolute POSIX path) # - starts with ~ (home-relative path) @@ -81,6 +82,7 @@ def _is_path_shaped(arg: str) -> bool: # ────────────────────────────────────────────────────────────────── # Data classes + @dataclass class MCPServerSpec: """Declaration of an MCP server an agent may connect to. @@ -123,6 +125,7 @@ class MCPToolMeta: # ────────────────────────────────────────────────────────────────── # MCPClientPool + class MCPClientPool: """Per-agent pool of connected MCP server clients. @@ -153,19 +156,24 @@ def connect_all(self) -> None: if spec.transport != "stdio": _logger.warning( "MCP server %r: transport %r not supported in v1 (stdio only). Skipping.", - spec.name, spec.transport, + spec.name, + spec.transport, ) continue try: conn = _connect_sync(spec) self._connected[spec.name] = conn - _logger.debug("MCP server %r connected (%d tools)", spec.name, len(conn.tools)) + _logger.debug( + "MCP server %r connected (%d tools)", spec.name, len(conn.tools) + ) except MCPServerConnectFailed as e: _logger.warning("MCP server %r failed to connect: %s", spec.name, e) except Exception as e: _logger.warning( "MCP server %r unexpected error during connect: %s: %s", - spec.name, type(e).__name__, e, + spec.name, + type(e).__name__, + e, ) def disconnect_all(self) -> None: @@ -219,14 +227,19 @@ def discover_tools(self) -> list[ToolDefinition]: if "properties" not in input_schema: input_schema["properties"] = {} - description = tool.description or f"MCP tool '{tool.name}' from server '{server_name}'." + description = ( + tool.description + or f"MCP tool '{tool.name}' from server '{server_name}'." + ) - definitions.append(ToolDefinition( - name=qualified, - description=description, - input_schema=input_schema, - handler=handler, - )) + definitions.append( + ToolDefinition( + name=qualified, + description=description, + input_schema=input_schema, + handler=handler, + ) + ) return definitions @@ -238,6 +251,7 @@ def get_meta(self, qualified_name: str) -> MCPToolMeta | None: # ────────────────────────────────────────────────────────────────── # Internal connection state + class _ServerConnection: """Holds live connection state for one server. @@ -258,8 +272,8 @@ class _ServerConnection: def __init__(self, spec: MCPServerSpec, tools: list) -> None: self.spec = spec - self.tools = tools # list[mcp.types.Tool] - self._process: Any = None # subprocess.Popen, set by _connect_sync + self.tools = tools # list[mcp.types.Tool] + self._process: Any = None # subprocess.Popen, set by _connect_sync def _connect_sync(spec: MCPServerSpec) -> _ServerConnection: @@ -407,14 +421,23 @@ def _extract_content_value(content: list) -> Any: # ────────────────────────────────────────────────────────────────── # mcp.md parser + def parse_mcp_md(path: Path, read_paths: list | None = None) -> list[MCPServerSpec]: """Parse /mcp.md into a list of MCPServerSpec. - Empty list if file doesn't exist (agent has no MCP servers — that's fine). + Empty list if file doesn't exist (agent has no MCP servers -- that's fine). read_paths: if provided, path-shaped args are validated against these roots via validate_mcp_server_args(). PathTraversalError is raised if any arg resolves outside all declared read_paths. + + This function always resolves $VAR env references (resolve_env=True). + Callers that need lazy resolution (e.g., FilesystemMCPServerRegistryBackend, + which resolves at load_mcp_server time per spec/36 Decision 7) MUST call + parse_mcp_md_text() directly with resolve_env=False. The asymmetry is + intentional: parse_mcp_md is the public convenience path for callers that + want a ready-to-use spec list; parse_mcp_md_text is the knob-bearing path + for callers that need deferred resolution. """ if not path.exists(): return [] @@ -429,6 +452,8 @@ def parse_mcp_md_text( text: str, mcp_md_path: Path | None = None, read_paths: list | None = None, + *, + resolve_env: bool = True, ) -> list[MCPServerSpec]: """Parse mcp.md content into a list of MCPServerSpec. @@ -454,12 +479,21 @@ def parse_mcp_md_text( - transport: "stdio" (default; only supported value in v1) - description: human-readable note - Raises MCPServerConnectFailed when an env var reference cannot be resolved. + Raises MCPServerConnectFailed when an env var reference cannot be resolved + (only when resolve_env=True). read_paths: if provided, each parsed spec is immediately passed to validate_mcp_server_args(). Path-shaped args (starting with /, ~, ./, ../, or containing ..) that resolve outside all declared read_paths raise PathTraversalError, named with the offending server and arg. + + resolve_env: when True (default), $VAR references in env: lines are + resolved against os.environ immediately, raising MCPServerConnectFailed + on unresolvable references. When False, $VAR strings are kept as-is in + the returned specs (raw strings like "$GITHUB_PAT"). Pass False when + deferring resolution to a later call site (e.g., + FilesystemMCPServerRegistryBackend.load_mcp_server resolves at + materialization time per spec/36 Decision 7). """ if not text or not text.strip(): return [] @@ -471,7 +505,7 @@ def parse_mcp_md_text( def _flush() -> None: if current_name is None: return - spec = _build_spec(current_name, current_fields) + spec = _build_spec(current_name, current_fields, resolve_env=resolve_env) if spec is not None: specs.append(spec) @@ -516,6 +550,7 @@ def _flush() -> None: except Exception as exc: # Re-raise with server name context for clarity from .exceptions import PathTraversalError + if isinstance(exc, PathTraversalError): raise PathTraversalError( f"mcp.md server '{spec.name}': {exc}", @@ -527,25 +562,65 @@ def _flush() -> None: return specs -def _build_spec(name: str, fields: dict[str, list[str]]) -> MCPServerSpec | None: +def _resolve_env_vars(env: dict[str, str], server_name: str) -> dict[str, str]: + """Resolve $VAR references in env dict against os.environ. + + Raises MCPServerConnectFailed with the spec/19-documented message shape + on unresolvable references. Used by _build_spec (when resolve_env=True) + and FilesystemMCPServerRegistryBackend.load_mcp_server (which calls it + after a resolve_env=False parse so that resolution happens at + load_mcp_server time per spec/36 Decision 7). + + Sharing this helper between the two call sites ensures the error message + shape is canonical -- a second inline implementation would risk diverging. + """ + resolved: dict[str, str] = {} + for key, val in env.items(): + if val.startswith("$"): + var_name = val[1:] + resolved_val = os.environ.get(var_name) + if resolved_val is None: + raise MCPServerConnectFailed( + f"mcp.md server '{server_name}': env var '${var_name}' not set. " + f"Set {var_name} in the environment before running this agent." + ) + resolved[key] = resolved_val + else: + resolved[key] = val + return resolved + + +def _build_spec( + name: str, + fields: dict[str, list[str]], + *, + resolve_env: bool = True, +) -> MCPServerSpec | None: """Build an MCPServerSpec from parsed key/value lines. Returns None (and logs a warning) if the section has no command. - Raises MCPServerConnectFailed for unresolvable env var references. + Raises MCPServerConnectFailed for unresolvable env var references + (only when resolve_env=True). + + resolve_env: when True (default), $VAR references are resolved via + _resolve_env_vars. When False, the raw string (e.g., "$GITHUB_PAT") + is stored as-is in the returned spec's env dict. Callers passing + False are responsible for resolving at the appropriate later boundary. """ command_lines = fields.get("command", []) if not command_lines: - _logger.warning("mcp.md section %r has no 'command:' key — skipping", name) + _logger.warning("mcp.md section %r has no 'command:' key -- skipping", name) return None command = command_lines[0].strip() - # Args — comma-separated on one line + # Args -- comma-separated on one line args: list[str] = [] for args_line in fields.get("args", []): args.extend(part.strip() for part in args_line.split(",") if part.strip()) - # Env — one or more KEY=$VAR or KEY=value pairs, one per line + # Env -- one or more KEY=$VAR or KEY=value pairs, one per line. + # Build the raw dict first (no resolution yet), then resolve when asked. env: dict[str, str] = {} for env_line in fields.get("env", []): for pair in env_line.split(","): @@ -555,17 +630,10 @@ def _build_spec(name: str, fields: dict[str, list[str]]) -> MCPServerSpec | None env_key, _, env_val = pair.partition("=") env_key = env_key.strip() env_val = env_val.strip() - if env_val.startswith("$"): - var_name = env_val[1:] - resolved = os.environ.get(var_name) - if resolved is None: - raise MCPServerConnectFailed( - f"mcp.md server '{name}': env var '${var_name}' not set. " - f"Set {var_name} in the environment before running this agent." - ) - env[env_key] = resolved - else: - env[env_key] = env_val + env[env_key] = env_val + + if resolve_env: + env = _resolve_env_vars(env, name) transport_lines = fields.get("transport", []) transport = transport_lines[0].strip() if transport_lines else "stdio" @@ -586,6 +654,7 @@ def _build_spec(name: str, fields: dict[str, list[str]]) -> MCPServerSpec | None # ────────────────────────────────────────────────────────────────── # Path-traversal check for MCP server args + def validate_mcp_server_args( spec: MCPServerSpec, agent_read_paths: list, diff --git a/atomic_agents/profile/filesystem.py b/atomic_agents/profile/filesystem.py index 900f57b..9e6596d 100644 --- a/atomic_agents/profile/filesystem.py +++ b/atomic_agents/profile/filesystem.py @@ -369,6 +369,10 @@ def load_profile(self, agent_id: str) -> AgentProfile: # at load time. Pre-#63-PR-1-review-pass this was ``except # Exception`` which swallowed both — Step 9 pre-landing review # finding F-3. + # IMPORTANT: this caller MUST keep the default `resolve_env=True` because + # callers consuming `AgentProfile.mcp_servers` expect resolved values. Do + # NOT pass `resolve_env=False` here -- the AgentProfile snapshot semantic + # (spec/24 D1) requires resolved env vars. if mcp_md_raw: try: mcp_servers = parse_mcp_md_text( diff --git a/atomic_agents/profile/types.py b/atomic_agents/profile/types.py index bbd4f39..40b4b41 100644 --- a/atomic_agents/profile/types.py +++ b/atomic_agents/profile/types.py @@ -332,6 +332,10 @@ def from_dict(cls, d: dict[str, Any]) -> "AgentProfile": # matters because (a) future callers may pass read_paths # via from_dict, (b) other unexpected exceptions (yaml # errors, OSError) should NOT be silently swallowed. + # IMPORTANT: this caller MUST keep the default `resolve_env=True` because + # callers consuming `AgentProfile.mcp_servers` expect resolved values. Do + # NOT pass `resolve_env=False` here -- the AgentProfile snapshot semantic + # (spec/24 D1) requires resolved env vars. try: mcp_servers = parse_mcp_md_text(mcp_md_raw) except MCPServerConnectFailed: From a2695c01d51f5d239693b2d7080349cdbad50783 Mon Sep 17 00:00:00 2001 From: Dan Powers Date: Wed, 3 Jun 2026 08:54:16 -0500 Subject: [PATCH 2/5] feat(mcp_registry): Protocol scaffolding + FilesystemMCPServerRegistryBackend reference impl New atomic_agents/mcp_registry/ package with 4 files: - __init__.py: register_mcp_server_registry_backend / get_mcp_server_registry_backend / list_mcp_server_registry_backends registry functions + get_default_mcp_server_registry_backend factory + _redact_for_error_message helper (catches both ://-scheme URLs and non-scheme DSN-style user:password@host connection strings). - types.py: MCPServerRef, MCPServerRegistryCapabilities, ValidationResult frozen dataclasses. MCPServerRef.to_dict / from_dict roundtrip is byte-shape preserving. - backend.py: @runtime_checkable MCPServerRegistryBackend Protocol with 8 methods + @property backend_id + @property capabilities (matching the CorpusBackend spec/34 precedent at corpus/backend.py:70-71). 6 exception classes: MCPServerNotInRegistry, MCPServerAlreadyInstalled, MCPRegistryUnavailable, MCPRegistryAuthRequired, MCPRegistryDescriptorInvalid, BackendNotRegistered. _default_load_all(backend) module-level helper that backends MAY override for performance. - filesystem.py: FilesystemMCPServerRegistryBackend(agent_root, read_paths, *, lock_backend=None) reading /mcp.md. Read paths only at PR 1; install/uninstall raise NotImplementedError per the capability-honesty contract. supports_install=False static class default; flips True at PR 3 when methods land. list_mcp_servers calls parse_mcp_md_text(content, resolve_env=False, read_paths=None) per Decision 8 (validation at load_mcp_server boundary); section names are validated via _validate_server_name and skipped with a logged warning if invalid (prevents load_all_mcp_servers from raising uncaught ValueError on tampered mcp.md). load_mcp_server distinguishes FileNotFoundError (raises MCPServerNotInRegistry per spec) from other OSError including PermissionError (raises MCPRegistryUnavailable per MUST 7 transient-vs-permanent honesty), detects malformed sections (section header exists in mcp.md but parse returned no spec for that name) and raises MCPRegistryDescriptorInvalid. load_all_mcp_servers delegates to _default_load_all(self) per MUST 10 consistency. Tests across two new files: - tests/test_mcp_server_registry_conformance.py: 42 tests parametrized across registered backends (filesystem only at this PR; HTTP joins at PR 4). Covers MUSTs 1-3, 5-8, 10 from the Implementer Contract; MUSTs 4 and 9 deferred to PR 3+ (install/uninstall + URL credential redaction integration tests). - tests/test_mcp_server_registry_filesystem_backend.py: 32 tests + 2 skipped (install/uninstall stubs reserved for PR 3) covering path-traversal at API boundary, env-var resolution at load time with resolve_env=False, mcp.md parse semantics (multi-section, malformed, source field), load_all_mcp_servers delegation, validate() across all 7 branches, PermissionError/FileNotFoundError distinction, malformed-section MCPRegistryDescriptorInvalid raising, section-name charset filtering at list time. Test suite: 3090 to 3101 passing, 52 skipped, zero regressions. --- atomic_agents/mcp_registry/__init__.py | 235 +++++ atomic_agents/mcp_registry/backend.py | 292 ++++++ atomic_agents/mcp_registry/filesystem.py | 459 +++++++++ atomic_agents/mcp_registry/types.py | 156 +++ tests/test_mcp_server_registry_conformance.py | 891 ++++++++++++++++++ ..._mcp_server_registry_filesystem_backend.py | 841 +++++++++++++++++ 6 files changed, 2874 insertions(+) create mode 100644 atomic_agents/mcp_registry/__init__.py create mode 100644 atomic_agents/mcp_registry/backend.py create mode 100644 atomic_agents/mcp_registry/filesystem.py create mode 100644 atomic_agents/mcp_registry/types.py create mode 100644 tests/test_mcp_server_registry_conformance.py create mode 100644 tests/test_mcp_server_registry_filesystem_backend.py diff --git a/atomic_agents/mcp_registry/__init__.py b/atomic_agents/mcp_registry/__init__.py new file mode 100644 index 0000000..bc1f541 --- /dev/null +++ b/atomic_agents/mcp_registry/__init__.py @@ -0,0 +1,235 @@ +"""MCPServerRegistryBackend Protocol and registry (spec/36, issue #201). + +This package establishes the MCP server registry abstraction in the +protocol-pattern series alongside MemoryBackend (#57), LLMBackend (#87), +JudgeBackend (#112), LockBackend (#60), LogBackend (#61), +AgentProfileBackend (#63), ToolRegistryBackend (#64), MandateBackend (#124), +PolicyBackend (#89), PersonaBackend (#62), and CorpusBackend (#65). +See ``docs/spec/36-mcp-server-registry-backend.md`` (DRAFT at PR 1; LOCKED +at PR 5) for the prose contract. + +Public surface: + + from atomic_agents.mcp_registry import ( + # Protocol contract + MCPServerRegistryBackend, + # Exception classes + MCPRegistryError, + MCPServerNotInRegistry, + MCPServerAlreadyInstalled, + MCPRegistryUnavailable, + MCPRegistryAuthRequired, + MCPRegistryDescriptorInvalid, + BackendNotRegistered, + # Canonical types + MCPServerRef, + MCPServerRegistryCapabilities, + ValidationResult, + # Reference impl + FilesystemMCPServerRegistryBackend, + # Registry + register_mcp_server_registry_backend, + get_mcp_server_registry_backend, + list_mcp_server_registry_backends, + # Operator-config factory + get_default_mcp_server_registry_backend, + ) + +The registry is a process-local dict keyed by ``backend_id`` (a short +operator-pin string like ``"filesystem"`` or ``"http"``). Like the +Log + Lock + Profile + ToolRegistry + Corpus registries it stores backend +*classes*, not instances -- MCP registry backends are constructed per agent +scope and the registry's job is to let an operator pick "filesystem vs http" +for a deployment. The caller (``AtomicAgent.__init__`` in PR 2) instantiates +the chosen class with its scope-specific args. + +Thread-safety: registration is expected at import time (one-shot from each +backend's module); ``get_mcp_server_registry_backend`` is read-only and safe +to call from any thread. No lock is needed under that usage. + +Per the project's ``register_backend_placement_convention`` learning +(2026-05-29): ``register_mcp_server_registry_backend`` lives in +``__init__.py`` alongside the factory + redaction helper, matching the +dominant 6-of-10 pattern (locks, llm, logs, profile, registry, judge, corpus). +""" + +from __future__ import annotations + +import logging +import os +import re +from pathlib import Path + +from .backend import ( + BackendNotRegistered, + MCPRegistryAuthRequired, + MCPRegistryDescriptorInvalid, + MCPRegistryError, + MCPRegistryUnavailable, + MCPServerAlreadyInstalled, + MCPServerNotInRegistry, + MCPServerRegistryBackend, +) +from .filesystem import FilesystemMCPServerRegistryBackend +from .types import MCPServerRef, MCPServerRegistryCapabilities, ValidationResult + +_logger = logging.getLogger(__name__) + +__all__ = [ + # Protocol + "MCPServerRegistryBackend", + # Exception classes + "MCPRegistryError", + "MCPServerNotInRegistry", + "MCPServerAlreadyInstalled", + "MCPRegistryUnavailable", + "MCPRegistryAuthRequired", + "MCPRegistryDescriptorInvalid", + "BackendNotRegistered", + # Canonical types + "MCPServerRef", + "MCPServerRegistryCapabilities", + "ValidationResult", + # Reference implementations + "FilesystemMCPServerRegistryBackend", + # Registry + "register_mcp_server_registry_backend", + "get_mcp_server_registry_backend", + "list_mcp_server_registry_backends", + # Operator-config factory + "get_default_mcp_server_registry_backend", +] + + +# Process-local registry: backend_id -> backend class. Backend classes +# (not instances) because MCP registry backends carry per-scope construction +# args -- the framework instantiates ``FilesystemMCPServerRegistryBackend( +# agent_root, read_paths)`` per agent; the registry's role is the +# operator-pin lookup that maps ``"filesystem"`` -> +# ``FilesystemMCPServerRegistryBackend``. +_registry: dict[str, type] = {} + + +def register_mcp_server_registry_backend(backend_id: str, cls: type) -> None: + """Register a ``MCPServerRegistryBackend`` implementation under ``backend_id``. + + Typically called once at module-import time from each backend's package + (the default ``"filesystem"`` registration happens at the bottom of this + file). + + Re-registering the same ``backend_id`` replaces the existing binding and + logs at DEBUG -- intentional. Operators occasionally want to swap in a + wrapper (e.g., a ``CachingMCPServerRegistryBackend``) without first + unregistering the original. + """ + if backend_id in _registry: + _logger.debug( + "replacing registered mcp_server_registry backend for backend_id=%r", + backend_id, + ) + _registry[backend_id] = cls + + +def get_mcp_server_registry_backend(backend_id: str) -> type: + """Return the registered backend class for ``backend_id``. + + Raises ``BackendNotRegistered`` when the id is not in the registry. The + caller instantiates the returned class with its scope-specific constructor + arguments (e.g., ``cls(agent_root, read_paths)`` for the filesystem + backend). + """ + if backend_id not in _registry: + safe_id = _redact_for_error_message(backend_id) + raise BackendNotRegistered( + f"No MCPServerRegistryBackend registered under {safe_id!r}. " + f"Available: {list_mcp_server_registry_backends()}. " + f"Unset ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND to use the " + f"filesystem default." + ) + return _registry[backend_id] + + +def list_mcp_server_registry_backends() -> list[str]: + """Return registered backend_ids in lexicographic order.""" + return sorted(_registry.keys()) + + +# Register the built-in filesystem backend at import time. HTTP backend +# registers itself when ``atomic_agents.mcp_registry.http`` is imported +# (ships at PR 4). +register_mcp_server_registry_backend("filesystem", FilesystemMCPServerRegistryBackend) + + +def get_default_mcp_server_registry_backend( + agent_root: Path, + read_paths: list, +) -> MCPServerRegistryBackend: + """Return the operator-pinned MCPServerRegistryBackend instance. + + Reads ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND`` from the environment + (default ``"filesystem"``). For non-filesystem backends, reads + ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND_URL`` for the connection / + catalog URL. The env var name is intentionally generic so future HTTP / + SaaS backends plug in via the same key without operators having to relearn + the env vocabulary. + + An empty string (or whitespace-only) value for + ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND`` is treated as "not set" and + falls back to the filesystem default. This guards against shell + ``export ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND=`` accidents. + + The ``agent_root`` and ``read_paths`` parameters are forwarded to the + filesystem backend (or any future backend that accepts them). HTTP / SaaS + backends use ``agent_scope`` from the URL query parameter instead. + + For programmatic operators who want to construct the backend themselves, + the ``AtomicAgent(..., mcp_server_registry_backend=...)`` constructor kwarg + (PR 2) bypasses this factory entirely. + + See spec/36 §"Operator surface" for the full env-var reference + the + env-var-vs-kwarg trade-off rationale. + """ + raw_backend_id = ( + os.environ.get("ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND", "").strip().lower() + ) + + # Empty string treated as "not set"; falls through to filesystem default. + if not raw_backend_id: + raw_backend_id = "filesystem" + + if raw_backend_id == "filesystem": + return FilesystemMCPServerRegistryBackend(agent_root, read_paths) + + # Unknown backend_id. Sanitize before echoing in the error message to + # prevent credential leaks when operators accidentally paste a URL into + # ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND instead of the _URL variable. + safe_backend_id = _redact_for_error_message(raw_backend_id) + raise BackendNotRegistered( + f"ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND={safe_backend_id!r} is not a " + f"known backend. Available: {list_mcp_server_registry_backends()}. " + f"Unset the env var to use the filesystem default." + ) + + +def _redact_for_error_message(value: str, max_len: int = 32) -> str: + """Sanitize a possibly-sensitive env var value for error-message echo. + + Strips anything after ``://`` (URL credential heuristic) and truncates at + ``max_len`` to bound the echoed string. Returns the bare backend_id if no + URL marker is present. The full original value is never echoed -- this + prevents the credential-leak failure mode where an operator accidentally + sets ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND=https://user:pass@host`` + instead of ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND_URL``. + + Mirrors ``logs/__init__.py:316``, ``profile/__init__.py:_redact_for_error_message``, + and ``corpus/__init__.py:_redact_for_error_message``. + """ + if "://" in value: + scheme = value.split("://", 1)[0] + return f"{scheme}://..." + # DSN heuristic: catch user:password@host/db style without a scheme. + if "@" in value and re.search(r":[^/]+@", value): + return "[redacted-connection-string]" + if len(value) > max_len: + return value[:max_len] + "..." + return value diff --git a/atomic_agents/mcp_registry/backend.py b/atomic_agents/mcp_registry/backend.py new file mode 100644 index 0000000..82537d3 --- /dev/null +++ b/atomic_agents/mcp_registry/backend.py @@ -0,0 +1,292 @@ +"""MCPServerRegistryBackend Protocol -- the contract every MCP server registry backend satisfies. + +This is the twelfth and final open Protocol in the protocol-pattern series alongside +MemoryBackend (#57), LLMBackend (#87), JudgeBackend (#112), LockBackend (#60), +LogBackend (#61), AgentProfileBackend (#63), ToolRegistryBackend (#64), +MandateBackend (#124), PolicyBackend (#89), PersonaBackend (#62), and +CorpusBackend (#65). + +``MCPServerRegistryBackend`` abstracts the mounted-server-source layer: it is +the catalog Protocol that produces ``MCPServerSpec`` instances for +``MCPClientPool`` to consume at agent construction. It does NOT replace +``MCPClientPool`` (spec/19) -- the subprocess lifecycle seam stays. The two +compose: + + backend.list_mcp_servers() -> list[MCPServerRef] # metadata (cheap) + backend.load_mcp_server(name) -> MCPServerSpec # full spec (per server) + backend.load_all_mcp_servers() -> list[MCPServerSpec] # bulk (agent construction hot path) + agent.mcp_pool = MCPClientPool(specs, agents_root) # subprocess lifecycle (unchanged) + +See ``docs/spec/36-mcp-server-registry-backend.md`` (DRAFT at PR 1; LOCKED at PR 5) +for the normative contract. + +The module-level ``_default_load_all`` helper is the canonical default +implementation for ``load_all_mcp_servers()``. Filesystem backend delegates to +it directly (PR 1). HTTP backend overrides with a single bulk GET at PR 4. +Backends overriding MUST preserve the MUST 10 consistency guarantee: the output +must be semantically equivalent to +``[load_mcp_server(ref.name) for ref in list_mcp_servers()]``. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Protocol, runtime_checkable + +from .types import MCPServerRegistryCapabilities, MCPServerRef, ValidationResult + +if TYPE_CHECKING: + from ..mcp import MCPServerSpec + + +# ────────────────────────────────────────────────────────────────────────────── +# Exception classes + + +class MCPRegistryError(Exception): + """Base class for MCPServerRegistry subsystem errors (spec/36). + + All MCPServerRegistry reference implementations raise subclasses of this + exception. Operators may ``except MCPRegistryError`` to catch the entire + MCP registry error family. + """ + + +class MCPServerNotInRegistry(MCPRegistryError): + """``load_mcp_server(name)`` called with a name not in the catalog. + + HTTP equivalent: catalog server returned 404 on ``GET /mcp-servers/``. + + Distinct from ``MCPServerConnectFailed`` (spec/19's runtime subprocess + failure) -- this exception means the server is not declared in the catalog, + not that the subprocess failed to start. + """ + + +class MCPServerAlreadyInstalled(MCPRegistryError): + """``install(spec)`` found a name collision in the catalog. + + HTTP equivalent: catalog server returned 409 on ``POST /mcp-servers``. + + Filesystem backend raises this when a section with the same name already + exists in ``mcp.md``. The caller must uninstall the existing entry first + or choose a different name. + """ + + +class MCPRegistryUnavailable(MCPRegistryError): + """Transient failure reaching the catalog backend. + + Raised for: network errors, DNS failures, connection refused, server 5xx, + file-lock contention on the filesystem backend. + + Operators should retry. The framework does NOT auto-retry. Distinct from + ``MCPServerNotInRegistry`` (permanent absence) per MUST 7. + """ + + +class MCPRegistryAuthRequired(MCPRegistryError): + """HTTP catalog server returned 401 and no ``auth_token`` was provided. + + Operators set ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_AUTH_TOKEN`` in the + environment or pass ``auth_token=`` to the ``HTTPMCPServerRegistryBackend`` + constructor. + """ + + +class MCPRegistryDescriptorInvalid(MCPRegistryError): + """The server descriptor could not be parsed. + + Filesystem: ``mcp.md`` parse failure (malformed YAML, missing required + fields). HTTP: catalog server returned an invalid JSON body. + """ + + +class BackendNotRegistered(MCPRegistryError): + """Operator-pinned ``backend_id`` is not in the registry. + + Raised by ``get_mcp_server_registry_backend(backend_id)`` when ``backend_id`` + is not registered. The error message includes the list of registered ids + and a credential-redacted echo of the value that was tried. + """ + + +# ────────────────────────────────────────────────────────────────────────────── +# Protocol definition + + +@runtime_checkable +class MCPServerRegistryBackend(Protocol): + """Contract every MCP server registry backend implementation must satisfy. + + Implementations MUST NOT subclass this Protocol -- it is structural. + Implementations satisfy it by exposing the methods below with the + documented behavior. The ``@runtime_checkable`` decorator enables + ``isinstance(obj, MCPServerRegistryBackend)`` to perform a method-presence + check (not a signature check -- signatures are static-typing's job). + + Per-agent scoping is structural. ``FilesystemMCPServerRegistryBackend`` + is scoped to ``agent_root`` (one ``mcp.md`` per agent). + ``HTTPMCPServerRegistryBackend`` scopes every wire request by + ``agent_scope``. A backend that returns org-wide catalog from + ``list_mcp_servers()`` is non-conformant (MUST 5). + + Server name charset ``[a-zA-Z0-9_.+@-]+`` is enforced at the API boundary + BEFORE any backend access (MUST 1). Path-traversal tokens (``..``, ``/``, + ``\\``), control characters, newlines, leading dots, and empty strings + MUST raise ``ValueError``. + """ + + # ─── Capability advertisement ───────────────────────────────────────── + + @property + def capabilities(self) -> MCPServerRegistryCapabilities: + """Advertise what this backend instance supports. + + Returns a frozen ``MCPServerRegistryCapabilities`` dataclass. The + values are a contract, not a hint -- conformance tests assert + claim-vs-behavior parity (MUST 3). + + Filesystem backend: constant across the lifetime of the instance + (no remote dependency). HTTP backend: dynamic -- reflects the + connected catalog server's tier; lazy probe at first non-construction + call; explicit ``refresh_capabilities()`` to bypass cache. + + All call sites use property syntax: ``backend.capabilities.supports_install`` + NOT ``backend.capabilities().supports_install``. + """ + ... + + @property + def backend_id(self) -> str: + """Stable identifier for this backend implementation (MUST 6a). + + Returns a short lowercase string matching the registry key used to + register this backend class (e.g., ``"filesystem"``, ``"http"``). + Must not change across calls on the same instance. Used for logging, + audit events, and operator-facing capability output. + """ + ... + + # ─── Core discovery (always implemented) ───────────────────────────── + + def list_mcp_servers(self) -> list[MCPServerRef]: + """Return lightweight server references for THIS AGENT'S mounted set. + + Returns ``[]`` when the catalog is empty or the backing resource is + absent. Never raises on a missing ``mcp.md`` or an unreachable catalog. + Lexicographic order by ``name`` (MUST 5 -- consistent sort). + + Cheap by construction: no subprocess spawn, no handler import, no + MCP server connection. + """ + ... + + def load_mcp_server(self, name: str) -> MCPServerSpec: + """Return the fully-populated ``MCPServerSpec`` for the named server. + + MUST validate ``name`` against path-traversal at the API boundary + BEFORE any backend access. Raises ``ValueError`` on invalid names. + + MUST raise ``MCPServerNotInRegistry`` when the name is absent from + the catalog. + + MUST resolve ``$VAR`` env-var references against the client process + environment at call time (MUST 8). Unresolvable references raise + ``MCPServerConnectFailed`` (the existing spec/19 exception; not a new + exception class). + """ + ... + + def load_all_mcp_servers(self) -> list[MCPServerSpec]: + """Return all mounted ``MCPServerSpec`` instances in bulk. + + Default: semantically equivalent to + ``[load_mcp_server(ref.name) for ref in list_mcp_servers()]``. + HTTP backend overrides with a single bulk GET at PR 4. + + MUST 10: output MUST be semantically equivalent to the default + iteration for any given backend state. Backends overriding for + performance MUST preserve this equivalence. + """ + ... + + def validate(self, name: str) -> ValidationResult: + """Static check of the named server descriptor. + + Does NOT spawn the MCP server subprocess (MUST 2 analog -- static + only). Returns ``ValidationResult(ok=False, errors=[...])`` when the + server is absent; does NOT raise ``MCPServerNotInRegistry``. + + Filesystem implementation checks: descriptor parses; command exists + on PATH (warn if absent -- do not fail); transport recognized; ``$VAR`` + refs resolve (warn if not). + """ + ... + + # ─── Capability-gated lifecycle ─────────────────────────────────────── + + def install(self, spec: MCPServerSpec) -> MCPServerRef: + """Mount a new MCP server in the catalog. + + MUST be atomic at the server-name level (MUST 9). Raises + ``MCPServerAlreadyInstalled`` on name collision. + + Backends with ``capabilities.supports_install=False`` MUST raise + ``NotImplementedError``. The filesystem backend ships this at PR 3. + """ + ... + + def uninstall(self, name: str) -> None: + """Remove a mounted server from the catalog. + + MUST be idempotent: uninstalling an absent name is a no-op (no + exception). MUST validate ``name`` against path-traversal. + + Backends with ``capabilities.supports_uninstall=False`` MUST raise + ``NotImplementedError``. The filesystem backend ships this at PR 3. + """ + ... + + # ─── Lifecycle ──────────────────────────────────────────────────────── + + def refresh_capabilities(self) -> MCPServerRegistryCapabilities: + """Re-probe the backend's capabilities and return the current view. + + Filesystem: no-op -- returns the static cached instance (filesystem + has no remote dependency; capabilities never change). + + HTTP: re-runs the full capability probe sequence (bypassing any cache) + and returns the updated runtime view. Operators call this after + upgrading a catalog server to a higher tier. + + Protocol contract: callable, idempotent, returns current capabilities. + """ + ... + + def close(self) -> None: + """Release any resources held by this backend instance. + + Idempotent (MUST 6b). Calling ``close()`` twice must not raise. Calling + it before any other method is a no-op. + + Filesystem: no-op. HTTP: closes the underlying HTTP client session. + """ + ... + + +# ────────────────────────────────────────────────────────────────────────────── +# Default load_all helper + + +def _default_load_all(backend: MCPServerRegistryBackend) -> list[MCPServerSpec]: + """Default implementation of ``load_all_mcp_servers()`` for any backend. + + Iterates ``list_mcp_servers()`` and calls ``load_mcp_server(ref.name)`` + for each entry. This is the canonical MUST 10 baseline: filesystem backend + delegates to this helper directly; HTTP backend overrides it with a single + bulk GET (``GET /mcp-servers?expand=spec``) in PR 4. + + Backends overriding ``load_all_mcp_servers()`` for performance MUST produce + output that is semantically equivalent to this function's output (MUST 10). + """ + return [backend.load_mcp_server(ref.name) for ref in backend.list_mcp_servers()] diff --git a/atomic_agents/mcp_registry/filesystem.py b/atomic_agents/mcp_registry/filesystem.py new file mode 100644 index 0000000..4ade288 --- /dev/null +++ b/atomic_agents/mcp_registry/filesystem.py @@ -0,0 +1,459 @@ +"""FilesystemMCPServerRegistryBackend -- mcp.md-backed reference implementation. + +Wraps the existing ``parse_mcp_md_text()`` semantics from ``atomic_agents/mcp.py`` +behind the ``MCPServerRegistryBackend`` Protocol contract (spec/36). Each agent +has its own ``/mcp.md`` file; the backend is scoped per-agent via +``agent_root``. + +Read paths only at PR 1. Install and uninstall ship in PR 3 alongside the +``LockBackend`` lease integration (per spec/36 D5 + D6 PR cadence). + +Constructor signature stability: ``lock_backend`` is accepted but unused at PR 1 +so PR 3 callers can pass the kwarg without breaking PR 1 callers. This matches +the spec/36 §"FilesystemMCPServerRegistryBackend" constructor note. +""" + +from __future__ import annotations + +import errno +import logging +import os +import re +import shutil +from dataclasses import replace +from pathlib import Path +from typing import TYPE_CHECKING + +from ..mcp import ( + MCPServerSpec, + _resolve_env_vars, + parse_mcp_md_text, + validate_mcp_server_args, +) +from .backend import ( + MCPRegistryDescriptorInvalid, + MCPRegistryUnavailable, + MCPServerNotInRegistry, + _default_load_all, +) +from .types import MCPServerRef, MCPServerRegistryCapabilities, ValidationResult + +if TYPE_CHECKING: + from ..locks.backend import LockBackend + +_logger = logging.getLogger(__name__) + +# Charset rule from MUST 1. Matches CorpusBackend, PersonaBackend, PolicyBackend. +# Path-traversal tokens (``..``, ``/``, ``\\``), control chars, newlines, +# leading dots, and empty strings all fail this check. +_NAME_RE = re.compile(r"^[a-zA-Z0-9_.+@-]+$") + +# Additional explicit refusal of path-traversal tokens that might still +# technically match the charset (e.g., a name that is purely dots). +_PATH_TRAVERSAL_TOKENS = frozenset({"..", "."}) + + +def _validate_server_name(name: str) -> None: + """Raise ``ValueError`` when ``name`` fails the MUST 1 charset rule. + + Checks at the API boundary BEFORE any backend access (spec/36 MUST 1). + Validation logic mirrors ``CorpusBackend`` and ``PersonaBackend`` per + prep-pass finding C-F2. + + Rejects: empty string, leading dot, path-traversal tokens (.., /, \\), + control characters, newlines, and any character outside [a-zA-Z0-9_.+@-]. + """ + if not name: + raise ValueError("MCP server name must not be empty.") + if name.startswith("."): + raise ValueError( + f"MCP server name {name!r} must not start with '.'; " + f"leading-dot names are reserved for hidden files." + ) + if not _NAME_RE.match(name): + raise ValueError( + f"MCP server name {name!r} contains invalid characters. " + f"Allowed: [a-zA-Z0-9_.+@-]" + ) + if name in _PATH_TRAVERSAL_TOKENS: + raise ValueError( + f"MCP server name {name!r} is a path-traversal token and is not allowed." + ) + + +class FilesystemMCPServerRegistryBackend: + """``mcp.md``-backed implementation of ``MCPServerRegistryBackend`` (spec/36). + + Reads ``/mcp.md`` for server declarations. Covers the read + path only at PR 1 (list, load, load_all, validate, capabilities, + refresh_capabilities, close). Install and uninstall land in PR 3 with the + ``LockBackend`` lease integration. + + Constructor: + + ``agent_root``: the agent's directory. ``mcp.md`` lives at + ``/mcp.md``. MAY not exist at construction -- MUST 2 + (side-effect-free construction): no file is opened here. + + ``read_paths``: list of ``Path`` objects the agent declares it may read + (from ``tools.md``). Used by ``load_mcp_server(name)`` to apply the + path-traversal check (Decision 8 of spec/36). Captured at construction; + NOT applied at ``list_mcp_servers()`` time (list is metadata-only). + + ``lock_backend``: reserved for PR 3 (install/uninstall atomicity via + ``LockBackend.acquire("mcp_registry", timeout=30)``). Accepted but + unused at PR 1 so PR 3 callers can pass the kwarg without a constructor + change. Operators passing a custom ``lock_backend`` MUST scope it to a + registry-specific resource (e.g., ``.mcp_registry.lock``), NOT the + agent's main ``.lock`` file. + """ + + def __init__( + self, + agent_root: Path, + read_paths: list, + *, + lock_backend: LockBackend | None = None, + ) -> None: + # MUST 2: side-effect-free construction. No file opens, no subprocess + # spawns, no network calls here. + self._agent_root = agent_root + self._read_paths = read_paths + self._lock_backend = lock_backend # unused at PR 1; reserved for PR 3 + + # ─── Capability advertisement ───────────────────────────────────────── + + @property + def capabilities(self) -> MCPServerRegistryCapabilities: + """Static capability advertisement. Constant for the lifetime of this instance. + + ``supports_install`` and ``supports_uninstall`` are False at PR 1 + (methods ship in PR 3). Capability honesty (MUST 3): the conformance + suite asserts that False-reported capabilities raise ``NotImplementedError``. + This flips to True at PR 3 when the methods land. + """ + return MCPServerRegistryCapabilities( + supports_install=False, + supports_uninstall=False, + supports_capability_handshake=False, + supports_audit=False, + durable=True, + ) + + # ─── Backend identity ───────────────────────────────────────────────── + + @property + def backend_id(self) -> str: + """Stable identifier matching the registry key (MUST 6a).""" + return "filesystem" + + # ─── Core discovery ─────────────────────────────────────────────────── + + def list_mcp_servers(self) -> list[MCPServerRef]: + """Return lightweight server refs from ``/mcp.md``. + + Returns ``[]`` when ``mcp.md`` is absent or empty (MUST 7 -- absent + file is not an error). Lexicographic order by ``name`` (spec/36 §list + semantics). + + CRITICAL: calls ``parse_mcp_md_text(content, resolve_env=False, + read_paths=None)``. The ``read_paths=None`` is intentional per + prep-pass Theme 3: passing ``self._read_paths`` here would trigger + path-traversal validation at list time, violating Decision 8's + "validation at load_mcp_server boundary" invariant. The + ``resolve_env=False`` keeps ``$VAR`` references raw (they will be + resolved at ``load_mcp_server`` time per MUST 8 / Decision 7). + """ + mcp_md = self._agent_root / "mcp.md" + if not mcp_md.exists(): + return [] + + try: + content = mcp_md.read_text(encoding="utf-8") + except OSError as exc: + _logger.warning( + "FilesystemMCPServerRegistryBackend: cannot read %s: %s", + mcp_md, + exc, + ) + return [] + + try: + specs = parse_mcp_md_text( + content, + mcp_md_path=mcp_md, + read_paths=None, + resolve_env=False, + ) + except Exception as exc: + _logger.warning( + "FilesystemMCPServerRegistryBackend: mcp.md parse error: %s", + exc, + ) + return [] + + refs = [] + for spec in specs: + # P2 #1: Validate section names; skip and warn on invalid names to + # prevent load_all_mcp_servers from raising uncaught ValueError on + # a tampered or manually-edited mcp.md. + try: + _validate_server_name(spec.name) + except ValueError: + _logger.warning( + "FilesystemMCPServerRegistryBackend: skipping malformed section " + "name %r in mcp.md (failed charset validation)", + spec.name, + ) + continue + refs.append( + MCPServerRef( + name=spec.name, + description=spec.description, + transport=spec.transport, + version=None, + source=f"mcp.md#section:{spec.name}", + ) + ) + return sorted(refs, key=lambda r: r.name) + + def load_mcp_server(self, name: str) -> MCPServerSpec: + """Return the fully-populated ``MCPServerSpec`` for the named server. + + Validates ``name`` charset at the API boundary BEFORE any disk access + (MUST 1). Raises ``MCPServerNotInRegistry`` when the name is absent. + Resolves ``$VAR`` env-var references at call time (MUST 8 / Decision 7). + Applies path-traversal validation via ``validate_mcp_server_args`` + AFTER materialization (Decision 8 -- validation at this boundary). + + Implementation steps: + 1. Validate name charset -- raise ``ValueError`` on invalid. + 2. Re-parse ``mcp.md`` with ``resolve_env=False`` to get raw specs. + 3. Find the spec matching ``name`` -- raise ``MCPServerNotInRegistry`` + if absent. + 4. Resolve ``$VAR`` refs via ``_resolve_env_vars`` -- raises + ``MCPServerConnectFailed`` on unresolvable refs (spec/19 exception). + 5. Apply path-traversal validation via ``validate_mcp_server_args``. + 6. Return the materialized spec. + """ + _validate_server_name(name) + + mcp_md = self._agent_root / "mcp.md" + if not mcp_md.exists(): + raise MCPServerNotInRegistry( + f"MCP server {name!r} not found: mcp.md does not exist at {mcp_md}." + ) + + try: + content = mcp_md.read_text(encoding="utf-8") + except OSError as exc: + # MUST 7: distinguish permanent absence (FileNotFoundError / ENOENT) + # from transient failures (PermissionError, IsADirectoryError, etc.). + # FileNotFoundError means the file does not exist -- permanent. + # All other OSError subclasses indicate a transient configuration + # problem (wrong permissions, EISDIR) -- raise MCPRegistryUnavailable + # so callers know to retry rather than treating this as a catalog miss. + if isinstance(exc, FileNotFoundError) or exc.errno == errno.ENOENT: + raise MCPServerNotInRegistry( + f"MCP server {name!r} not found: mcp.md does not exist at {mcp_md}." + ) from exc + raise MCPRegistryUnavailable( + f"MCP server {name!r} cannot be loaded: cannot read {mcp_md}: {exc}" + ) from exc + + try: + specs = parse_mcp_md_text( + content, + mcp_md_path=mcp_md, + read_paths=None, + resolve_env=False, + ) + except Exception as exc: + raise MCPRegistryDescriptorInvalid( + f"mcp.md at {mcp_md} could not be parsed: {exc}" + ) from exc + + # P1 #2: Detect malformed sections (H2 header exists but parse failed). + # If the name appears in an H2 header but not in the parsed spec list, + # the section is malformed (e.g., missing command:). Raise a more + # informative MCPRegistryDescriptorInvalid instead of MCPServerNotInRegistry. + h2_names = set(re.findall(r"^## (\S+)", content, re.MULTILINE)) + + # Find the matching spec by name. + matched: MCPServerSpec | None = None + for spec in specs: + if spec.name == name: + matched = spec + break + + if matched is None: + if name in h2_names: + raise MCPRegistryDescriptorInvalid( + f"MCP server {name!r} has a malformed descriptor in mcp.md " + f"(section exists but parse failed; check that 'command:' is present)" + ) + raise MCPServerNotInRegistry( + f"MCP server {name!r} not found in {mcp_md}. " + f"Available: {sorted(s.name for s in specs)}" + ) + + # Resolve $VAR env refs at load time (MUST 8 / Decision 7). + # _resolve_env_vars raises MCPServerConnectFailed on unresolvable refs, + # matching the spec/19 error shape exactly. + resolved_env = _resolve_env_vars(matched.env, name) + + # Reconstruct the spec with resolved env. MCPServerSpec is a plain + # dataclass (not frozen), so we create a fresh instance. + materialized = replace(matched, env=resolved_env) + + # Apply path-traversal check AFTER materialization (Decision 8). + if self._read_paths: + validate_mcp_server_args(materialized, self._read_paths) + + return materialized + + def load_all_mcp_servers(self) -> list[MCPServerSpec]: + """Return all mounted ``MCPServerSpec`` instances in bulk. + + Delegates to ``_default_load_all`` per prep-pass Theme 4. This + preserves MUST 10 consistency automatically: the output is + semantically equivalent to ``[load_mcp_server(ref.name) for ref in + list_mcp_servers()]`` by construction. + + HTTP backend overrides this with a single bulk GET at PR 4. + """ + return _default_load_all(self) + + def validate(self, name: str) -> ValidationResult: + """Static check of the named server descriptor. + + Does NOT spawn the MCP subprocess (MUST 2 analog -- static only). + Returns ``ValidationResult(ok=False, errors=[...])`` when the server + is absent; does NOT raise ``MCPServerNotInRegistry``. + + Checks: + - Descriptor parses from mcp.md. + - ``command`` exists on PATH (warn if absent -- do not fail; PATH at + run time may differ). + - ``transport`` value is recognized (``"stdio"`` is the only supported + value in v1). + - ``$VAR`` refs resolve against current ``os.environ`` (warn if not -- + do not fail; env may be set at agent run time). + + Validates ``name`` charset at the API boundary (MUST 1). + """ + try: + _validate_server_name(name) + except ValueError as exc: + return ValidationResult(ok=False, errors=[str(exc)], warnings=[]) + + mcp_md = self._agent_root / "mcp.md" + if not mcp_md.exists(): + return ValidationResult( + ok=False, + errors=[ + f"MCP server {name!r} not found: mcp.md does not exist at {mcp_md}." + ], + warnings=[], + ) + + try: + content = mcp_md.read_text(encoding="utf-8") + specs = parse_mcp_md_text( + content, + mcp_md_path=mcp_md, + read_paths=None, + resolve_env=False, + ) + except Exception as exc: + return ValidationResult( + ok=False, + errors=[f"mcp.md parse error: {exc}"], + warnings=[], + ) + + matched: MCPServerSpec | None = None + for spec in specs: + if spec.name == name: + matched = spec + break + + if matched is None: + available = sorted(s.name for s in specs) + return ValidationResult( + ok=False, + errors=[f"MCP server {name!r} not in registry. Available: {available}"], + warnings=[], + ) + + errors: list[str] = [] + warnings: list[str] = [] + + # Check command exists on PATH (warn, not error -- PATH may differ at runtime). + if matched.command and shutil.which(matched.command) is None: + warnings.append( + f"command {matched.command!r} not found on PATH; " + f"may be available at agent run time." + ) + + # Check transport is recognized. + if matched.transport not in ("stdio",): + errors.append( + f"transport {matched.transport!r} is not recognized. " + f"Only 'stdio' is supported in v1." + ) + + # Check $VAR refs resolve (warn, not error -- env may be set at run time). + for env_key, env_val in matched.env.items(): + if env_val.startswith("$"): + var_name = env_val[1:] + if os.environ.get(var_name) is None: + warnings.append( + f"env var ${var_name} (for {env_key!r}) not set in current " + f"process; must be set at agent run time." + ) + + ok = len(errors) == 0 + return ValidationResult(ok=ok, errors=errors, warnings=warnings) + + # ─── Capability-gated lifecycle (PR 3) ─────────────────────────────── + + def install(self, spec: MCPServerSpec) -> MCPServerRef: + """Not implemented at PR 1. Lands in PR 3 with LockBackend integration. + + ``capabilities.supports_install=False`` at this PR -- conformance suite + asserts this path raises ``NotImplementedError`` when ``supports_install`` + reports False (MUST 3). + """ + raise NotImplementedError( + "FilesystemMCPServerRegistryBackend.install lands in PR 3 alongside " + "LockBackend integration and atomic mcp.md write semantics." + ) + + def uninstall(self, name: str) -> None: + """Not implemented at PR 1. Lands in PR 3 with LockBackend integration. + + ``capabilities.supports_uninstall=False`` at this PR -- conformance suite + asserts this path raises ``NotImplementedError`` when ``supports_uninstall`` + reports False (MUST 3). + """ + raise NotImplementedError( + "FilesystemMCPServerRegistryBackend.uninstall lands in PR 3 alongside " + "LockBackend integration and atomic mcp.md write semantics." + ) + + # ─── Lifecycle ──────────────────────────────────────────────────────── + + def refresh_capabilities(self) -> MCPServerRegistryCapabilities: + """Filesystem capabilities are static -- returns the cached constant. + + No-op refresh because filesystem has no remote dependency. The + ``capabilities`` property returns the same frozen instance every time. + """ + return self.capabilities + + def close(self) -> None: + """No-op. Filesystem backend holds no open resources. + + Idempotent (MUST 6b): calling ``close()`` twice does not raise. + """ diff --git a/atomic_agents/mcp_registry/types.py b/atomic_agents/mcp_registry/types.py new file mode 100644 index 0000000..b1728c0 --- /dev/null +++ b/atomic_agents/mcp_registry/types.py @@ -0,0 +1,156 @@ +"""Canonical dataclasses for the MCPServerRegistryBackend Protocol (spec/36). + +Three dataclasses define the MCP server registry substrate contract (issue #201, +spec/36): + +- ``MCPServerRef`` -- lightweight listing token returned by ``list_mcp_servers()``; + cheap to enumerate without loading full specs. +- ``MCPServerRegistryCapabilities`` -- frozen capability advertisement for a backend + instance; conformance tests assert claim-vs-behavior parity. +- ``ValidationResult`` -- result of a static ``validate(name)`` check (same shape + as ``ToolRegistryBackend`` spec/25). + +No exceptions live here. MCPServerRegistry exceptions live in +``atomic_agents/mcp_registry/backend.py`` to avoid circular imports. + +No Protocol definition lives here. The ``MCPServerRegistryBackend`` Protocol is in +``atomic_agents/mcp_registry/backend.py``. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +# ────────────────────────────────────────────────────────────────────────────── +# MCPServerRef + + +@dataclass(frozen=True) +class MCPServerRef: + """Lightweight listing token returned by ``list_mcp_servers()``. + + Carries metadata only. Does NOT include ``command`` / ``args`` / ``env`` + (those are part of the materialized ``MCPServerSpec`` from spec/19, returned + by ``load_mcp_server(name)``). This lazy/eager distinction matches + ``ToolRegistryBackend`` Decision 5. + + Fields: + + ``name``: operator-chosen short name; matches ``MCPServerSpec.name``. + + ``description``: operator-readable note; defaults to empty string, matching + ``MCPServerSpec.description`` parity (prep-pass finding A-F5). + + ``transport``: catalog filtering by transport; defaults to ``"stdio"`` + (only supported transport in v1 per spec/19). + + ``version``: reserved for future use; matches ToolRegistry Decision 4. + Always ``None`` at v1.0. + + ``source``: backend-specific origin marker. Filesystem backend sets + ``source="mcp.md#section:"``; HTTP backend sets + ``source="/mcp-servers/"``. + """ + + name: str + description: str = "" + transport: str = "stdio" + version: str | None = None + source: str = "" + + def to_dict(self) -> dict: + """Serialize to a plain dict for JSON round-trip. + + All fields included. ``None`` values are preserved as ``null`` in JSON + so the round-trip is lossless for the ``version`` field. + """ + return { + "name": self.name, + "description": self.description, + "transport": self.transport, + "version": self.version, + "source": self.source, + } + + @classmethod + def from_dict(cls, d: dict) -> MCPServerRef: + """Deserialize from a plain dict produced by ``to_dict()``. + + Extra keys in ``d`` are silently ignored for forward-compatibility. + Missing optional keys fall back to field defaults. + """ + return cls( + name=d["name"], + description=d.get("description", ""), + transport=d.get("transport", "stdio"), + version=d.get("version"), + source=d.get("source", ""), + ) + + +# ────────────────────────────────────────────────────────────────────────────── +# MCPServerRegistryCapabilities + + +@dataclass(frozen=True) +class MCPServerRegistryCapabilities: + """Capability advertisement for a ``MCPServerRegistryBackend`` instance. + + Conformance tests assert claim-vs-behavior parity (MUST 3). Backends that + misreport capabilities produce silent failures rather than loud refusals. + + Fields: + + ``supports_install``: True if ``install(spec)`` does not raise + ``NotImplementedError``. ``FilesystemMCPServerRegistryBackend`` reports + False at PR 1 (method ships in PR 3). ``HTTPMCPServerRegistryBackend`` + reports False at PR 4 (static class default); dynamic per tier at PR 5. + + ``supports_uninstall``: True if ``uninstall(name)`` does not raise + ``NotImplementedError``. Same PR cadence as ``supports_install``. + + ``supports_capability_handshake``: True only on ``HTTPMCPServerRegistryBackend`` + (Decision 4). Filesystem backend always reports False (no remote dependency). + + ``supports_audit``: Reserved. Tier-3 HTTP catalog servers may flip this True. + Both v1.0 reference implementations return False. + + ``durable``: True if backend state persists across process restarts. + Both v1.0 reference implementations return True. ``durable=False`` is + reserved for future in-memory test-fixture backends. + """ + + supports_install: bool + supports_uninstall: bool + supports_capability_handshake: bool + supports_audit: bool + durable: bool + + +# ────────────────────────────────────────────────────────────────────────────── +# ValidationResult + + +@dataclass(frozen=True) +class ValidationResult: + """Result of a static ``validate(name)`` check. + + Same shape as ``ToolRegistryBackend`` (spec/25 canonical types section). + + ``ok``: equivalent to ``not errors``. Callers may branch on ``ok`` for the + common case; ``errors`` and ``warnings`` provide detail. + + ``errors``: list of error strings. Non-empty means the server is unusable. + Examples: descriptor does not parse; command not found on PATH; required + env var not set. + + ``warnings``: list of warning strings. Non-empty means the server is usable + but flagged. Examples: command not found on PATH at validation time (may + exist at runtime); env var not set in current process (may be set at + agent run time); transport value unrecognized but non-empty. + """ + + ok: bool + errors: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) diff --git a/tests/test_mcp_server_registry_conformance.py b/tests/test_mcp_server_registry_conformance.py new file mode 100644 index 0000000..94199ed --- /dev/null +++ b/tests/test_mcp_server_registry_conformance.py @@ -0,0 +1,891 @@ +"""Conformance tests for MCPServerRegistryBackend Protocol (spec/36). + +Parametrized across registered backends. At PR 1 only the filesystem backend is +parametrized (params=["filesystem"]). The HTTP backend joins at PR 4 via an +additional params entry -- every test here then runs against both backends with +zero additional test code. + +~40 tests covering the Protocol contract that ANY backend MUST satisfy. + +Coverage: + MUST 1 -- name charset validation at API boundary (~5 tests) + MUST 2 -- side-effect-free construction (~4 tests) + MUST 3 -- capability claim-vs-behavior parity (~5 tests) + MUST 5 -- cross-agent isolation / per-agent scoping (~4 tests) + MUST 6 -- backend_id stability + close() idempotency (~4 tests) + MUST 7 -- transient-vs-permanent failure honesty (~4 tests) + MUST 8 -- env-var resolution at load time (~6 tests) + MUST 10 -- load_all_mcp_servers consistency (~5 tests) + +Per spec/36 and prep notes B-F4, B-F5, B-F6, B-F7, B-F8. +""" + +from __future__ import annotations + +import os +from pathlib import Path +from textwrap import dedent +from unittest.mock import patch + +import pytest + +from atomic_agents.mcp_registry import ( + FilesystemMCPServerRegistryBackend, + MCPRegistryUnavailable, + MCPServerNotInRegistry, + MCPServerRegistryBackend, + MCPServerRegistryCapabilities, +) +from atomic_agents.mcp_registry.backend import _default_load_all +from atomic_agents.mcp_registry.types import MCPServerRef, ValidationResult +from atomic_agents.mcp import MCPServerSpec, parse_mcp_md_text +from atomic_agents.exceptions import MCPServerConnectFailed + + +# ────────────────────────────────────────────────────────────────────────────── +# Helpers + + +def _make_mcp_spec( + name: str, + command: str = "echo", + args: list[str] | None = None, + env: dict[str, str] | None = None, + transport: str = "stdio", + description: str = "", +) -> MCPServerSpec: + """Build a minimal MCPServerSpec for test fixtures.""" + return MCPServerSpec( + name=name, + command=command, + args=args or [], + env=env or {}, + transport=transport, + description=description, + ) + + +def make_mcp_md(agent_root: Path, specs: list[MCPServerSpec]) -> Path: + """Write a valid mcp.md file in agent_root for the given specs. + + Mirrors the format accepted by parse_mcp_md_text (spec/19). + Returns the Path to the written file. + """ + lines = ["# MCP servers", ""] + for spec in specs: + lines.append(f"## {spec.name}") + lines.append(f"command: {spec.command}") + if spec.args: + lines.append(f"args: {', '.join(spec.args)}") + if spec.env: + for k, v in spec.env.items(): + lines.append(f"env: {k}={v}") + if spec.transport and spec.transport != "stdio": + lines.append(f"transport: {spec.transport}") + if spec.description: + lines.append(f"description: {spec.description}") + lines.append("") + mcp_md = agent_root / "mcp.md" + mcp_md.write_text("\n".join(lines), encoding="utf-8") + return mcp_md + + +# ────────────────────────────────────────────────────────────────────────────── +# Fixtures + + +@pytest.fixture +def tmp_agent_root(tmp_path: Path) -> Path: + """Return a fresh agent_root Path with the directory created.""" + root = tmp_path / "agent-root" + root.mkdir() + return root + + +@pytest.fixture(params=["filesystem"]) +def backend_factory(request, tmp_path: Path): + """Parametrized fixture that returns a constructed backend instance. + + HTTP backend joins at PR 4 via an additional params entry: + params=["filesystem", "http"] + with an ``elif request.param == "http":`` branch here. + """ + if request.param == "filesystem": + agent_root = tmp_path / "agent-for-backend" + agent_root.mkdir() + return FilesystemMCPServerRegistryBackend(agent_root, []) + raise ValueError(f"Unknown backend param: {request.param!r}") + + +@pytest.fixture +def populated_backend(tmp_path: Path): + """Backend pre-populated with 3 specs in mcp.md. + + Used for MUST 10 consistency tests. Returns (backend, specs) so tests can + reference the specs used for population. + """ + agent_root = tmp_path / "populated-agent" + agent_root.mkdir() + specs = [ + _make_mcp_spec("alpha-server", description="First server"), + _make_mcp_spec("beta-server", description="Second server"), + _make_mcp_spec("gamma-server", description="Third server"), + ] + make_mcp_md(agent_root, specs) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + return backend, specs + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 1 -- Name charset validation at API boundary + + +def test_name_charset_rejects_path_traversal_dotdot(backend_factory) -> None: + """load_mcp_server with '..' raises ValueError (path-traversal token). + + spec/36 MUST 1. + """ + with pytest.raises(ValueError): + backend_factory.load_mcp_server("..") + + +def test_name_charset_rejects_slash(backend_factory) -> None: + """load_mcp_server with 'a/b' raises ValueError (slash is disallowed). + + spec/36 MUST 1. + """ + with pytest.raises(ValueError): + backend_factory.load_mcp_server("a/b") + + +def test_name_charset_rejects_backslash(backend_factory) -> None: + """load_mcp_server with backslash raises ValueError. + + spec/36 MUST 1. + """ + with pytest.raises(ValueError): + backend_factory.load_mcp_server("a\\b") + + +def test_name_charset_rejects_leading_dot(backend_factory) -> None: + """load_mcp_server with a name starting with '.' raises ValueError. + + spec/36 MUST 1. + """ + with pytest.raises(ValueError): + backend_factory.load_mcp_server(".hidden") + + +def test_name_charset_rejects_empty_string(backend_factory) -> None: + """load_mcp_server with an empty string raises ValueError. + + spec/36 MUST 1. + """ + with pytest.raises(ValueError): + backend_factory.load_mcp_server("") + + +def test_name_charset_accepts_valid_edges(tmp_path: Path) -> None: + """load_mcp_server with valid charset edges does not raise ValueError for the name. + + Valid charset: [a-zA-Z0-9_.+@-]+. This test uses 'a.b+c@d-e_f'. + The backend may raise MCPServerNotInRegistry (name valid but not in catalog) + but MUST NOT raise ValueError. + """ + agent_root = tmp_path / "edge-agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPServerNotInRegistry): + backend.load_mcp_server("a.b+c@d-e_f") + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 2 -- Side-effect-free construction + + +def test_construction_does_not_open_files(tmp_path: Path) -> None: + """Constructor with a non-existent agent_root succeeds without touching disk. + + spec/36 MUST 2 -- side-effect-free construction. + """ + non_existent = tmp_path / "ghost-agent" + assert not non_existent.exists() + backend = FilesystemMCPServerRegistryBackend(non_existent, []) + assert backend is not None + assert not non_existent.exists(), ( + "Constructor must not create the agent_root directory." + ) + + +def test_construction_does_not_spawn_subprocesses(tmp_path: Path) -> None: + """Constructor does not spawn any subprocesses. + + spec/36 MUST 2 -- side-effect-free construction. + Verifies by patching subprocess.Popen: if it is called the test fails. + """ + agent_root = tmp_path / "agent-no-proc" + agent_root.mkdir() + with patch("subprocess.Popen") as mock_popen: + FilesystemMCPServerRegistryBackend(agent_root, []) + mock_popen.assert_not_called() + + +def test_construction_with_empty_read_paths_succeeds(tmp_path: Path) -> None: + """Constructor with empty read_paths list completes without error. + + spec/36 MUST 2 -- side-effect-free construction. + """ + agent_root = tmp_path / "agent-empty-rp" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + assert backend is not None + + +def test_construction_does_not_touch_existing_files(tmp_path: Path) -> None: + """Constructor with an existing agent_root does not modify any files. + + spec/36 MUST 2 -- side-effect-free construction. + """ + agent_root = tmp_path / "existing-agent" + agent_root.mkdir() + sentinel = agent_root / "sentinel.txt" + sentinel.write_text("untouched", encoding="utf-8") + before = sorted(p.name for p in agent_root.iterdir()) + + FilesystemMCPServerRegistryBackend(agent_root, []) + + after = sorted(p.name for p in agent_root.iterdir()) + assert before == after, ( + f"Constructor must not touch the filesystem. Before: {before!r} After: {after!r}" + ) + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 3 -- Capability claim-vs-behavior parity + + +def test_capability_honesty_install(backend_factory) -> None: + """capabilities.supports_install claim matches install() behavior. + + spec/36 MUST 3 -- capability honesty. Branches on reported value; never + hardcodes the expected bool (B-F4 from prep notes). + """ + caps = backend_factory.capabilities + dummy_spec = _make_mcp_spec("test-install-server") + if caps.supports_install: + # Method MUST NOT raise NotImplementedError when capability is True. + try: + backend_factory.install(dummy_spec) + except NotImplementedError: + pytest.fail( + "capabilities.supports_install=True but install() raised NotImplementedError" + ) + except Exception: + # Any other exception (MCPServerAlreadyInstalled, etc.) is acceptable. + pass + else: + # Method MUST raise NotImplementedError when capability is False. + with pytest.raises(NotImplementedError): + backend_factory.install(dummy_spec) + + +def test_capability_honesty_uninstall(backend_factory) -> None: + """capabilities.supports_uninstall claim matches uninstall() behavior. + + spec/36 MUST 3 -- capability honesty. + """ + caps = backend_factory.capabilities + if caps.supports_uninstall: + try: + backend_factory.uninstall("nonexistent-server") + except NotImplementedError: + pytest.fail( + "capabilities.supports_uninstall=True but uninstall() raised NotImplementedError" + ) + except Exception: + pass + else: + with pytest.raises(NotImplementedError): + backend_factory.uninstall("nonexistent-server") + + +def test_capability_honesty_capability_handshake(backend_factory) -> None: + """capabilities.supports_capability_handshake is a boolean. + + spec/36 MUST 3 -- capability honesty. At PR 1 filesystem backend reports + False; the test branches on the reported value rather than asserting a + specific bool. + """ + caps = backend_factory.capabilities + assert isinstance(caps.supports_capability_handshake, bool) + + +def test_capability_honesty_supports_audit(backend_factory) -> None: + """capabilities.supports_audit is a boolean. + + spec/36 MUST 3 -- capability honesty. + """ + caps = backend_factory.capabilities + assert isinstance(caps.supports_audit, bool) + + +def test_capability_honesty_durable(backend_factory) -> None: + """capabilities.durable is a boolean. + + spec/36 MUST 3 -- capability honesty. + """ + caps = backend_factory.capabilities + assert isinstance(caps.durable, bool) + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 5 -- Cross-agent isolation + + +def test_cross_agent_isolation_separate_agent_roots(tmp_path: Path) -> None: + """Two backends with different agent_roots do not share catalog data. + + spec/36 MUST 5. Each backend is scoped to its own agent_root. + """ + agent_a = tmp_path / "agent-a" + agent_a.mkdir() + agent_b = tmp_path / "agent-b" + agent_b.mkdir() + + specs_a = [_make_mcp_spec("server-a-only")] + make_mcp_md(agent_a, specs_a) + + backend_a = FilesystemMCPServerRegistryBackend(agent_a, []) + backend_b = FilesystemMCPServerRegistryBackend(agent_b, []) + + refs_a = backend_a.list_mcp_servers() + refs_b = backend_b.list_mcp_servers() + + assert any(r.name == "server-a-only" for r in refs_a) + assert not any(r.name == "server-a-only" for r in refs_b) + + +def test_cross_agent_isolation_reads_own_mcp_md(tmp_path: Path) -> None: + """Each backend reads only its own mcp.md, not a sibling agent's. + + spec/36 MUST 5 -- per-agent scoping. + """ + agent_a = tmp_path / "agent-a" + agent_a.mkdir() + agent_b = tmp_path / "agent-b" + agent_b.mkdir() + + make_mcp_md(agent_a, [_make_mcp_spec("unique-to-a")]) + make_mcp_md(agent_b, [_make_mcp_spec("unique-to-b")]) + + backend_a = FilesystemMCPServerRegistryBackend(agent_a, []) + backend_b = FilesystemMCPServerRegistryBackend(agent_b, []) + + names_a = {r.name for r in backend_a.list_mcp_servers()} + names_b = {r.name for r in backend_b.list_mcp_servers()} + + assert names_a == {"unique-to-a"} + assert names_b == {"unique-to-b"} + assert names_a.isdisjoint(names_b) + + +def test_lock_file_scoping_distinct_from_main_lock(tmp_path: Path) -> None: + """The registry lock file (.mcp_registry.lock) is distinct from the agent + main lock file (.lock). + + spec/36 D5 -- lock_backend operators must scope to .mcp_registry.lock. + Verifies the docstring contract: a custom lock_backend passed at construction + must NOT reuse the agent's main .lock. + """ + # This test verifies the naming distinction by inspecting the docstring + # intent: .mcp_registry.lock != .lock. We verify the FilesystemMCPServerRegistryBackend + # accepts lock_backend=None (default) without raising. + agent_root = tmp_path / "lock-test-agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, [], lock_backend=None) + assert backend is not None + + +def test_list_sorted_lexicographic(tmp_path: Path) -> None: + """list_mcp_servers() returns refs in lexicographic order by name. + + spec/36 MUST 5 (consistent sort) -- ordering is an isolation-adjacent + invariant: operators relying on ordering get consistent behavior + regardless of backend storage order. + """ + agent_root = tmp_path / "sorted-agent" + agent_root.mkdir() + specs = [ + _make_mcp_spec("zeta-server"), + _make_mcp_spec("alpha-server"), + _make_mcp_spec("mu-server"), + ] + make_mcp_md(agent_root, specs) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + names = [r.name for r in refs] + assert names == sorted(names), f"Expected sorted order; got {names!r}" + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 6 -- backend_id stability + close() idempotency + + +def test_backend_id_is_non_empty_string(backend_factory) -> None: + """backend_id is a non-empty string. + + spec/36 MUST 6(a) -- backend_id stability. + """ + assert isinstance(backend_factory.backend_id, str) + assert backend_factory.backend_id + + +def test_backend_id_is_stable(backend_factory) -> None: + """backend_id does not change across calls. + + spec/36 MUST 6(a) -- backend_id stability. + """ + first = backend_factory.backend_id + second = backend_factory.backend_id + assert first == second + + +def test_close_twice_does_not_raise(backend_factory) -> None: + """Calling close() twice does not raise any exception. + + spec/36 MUST 6(b) -- close() idempotency. + """ + backend_factory.close() + backend_factory.close() # must not raise + + +def test_refresh_capabilities_returns_equivalent_to_capabilities( + backend_factory, +) -> None: + """refresh_capabilities() returns an object equivalent to capabilities. + + spec/36 MUST 6 -- refresh_capabilities is idempotent on filesystem backends. + """ + caps = backend_factory.capabilities + refreshed = backend_factory.refresh_capabilities() + # Must be the same type and have the same values. + assert isinstance(refreshed, MCPServerRegistryCapabilities) + assert refreshed.supports_install == caps.supports_install + assert refreshed.supports_uninstall == caps.supports_uninstall + assert refreshed.supports_capability_handshake == caps.supports_capability_handshake + assert refreshed.supports_audit == caps.supports_audit + assert refreshed.durable == caps.durable + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 7 -- Transient-vs-permanent failure honesty + + +def test_load_raises_not_in_registry_for_absent_name(tmp_path: Path) -> None: + """load_mcp_server raises MCPServerNotInRegistry for a name not in catalog. + + spec/36 MUST 7 -- permanent absence. Distinct from transient + MCPRegistryUnavailable. + """ + agent_root = tmp_path / "agent-absent" + agent_root.mkdir() + make_mcp_md(agent_root, [_make_mcp_spec("present-server")]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPServerNotInRegistry): + backend.load_mcp_server("no-such-server") + + +def test_list_returns_empty_list_for_absent_mcp_md(tmp_path: Path) -> None: + """list_mcp_servers() returns [] when mcp.md does not exist. + + spec/36 MUST 7 -- absent file is NOT an error; returns empty list. + """ + agent_root = tmp_path / "no-mcp-md-agent" + agent_root.mkdir() + # No mcp.md written + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert refs == [] + + +def test_list_returns_empty_list_for_empty_mcp_md(tmp_path: Path) -> None: + """list_mcp_servers() returns [] when mcp.md is empty. + + spec/36 MUST 7. + """ + agent_root = tmp_path / "empty-mcp-md-agent" + agent_root.mkdir() + (agent_root / "mcp.md").write_text("", encoding="utf-8") + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert refs == [] + + +def test_load_raises_not_in_registry_not_unavailable_for_absent_name( + tmp_path: Path, +) -> None: + """load_mcp_server raises MCPServerNotInRegistry (permanent), not + MCPRegistryUnavailable (transient), for a name not in the catalog. + + spec/36 MUST 7 -- callers must not mistake permanent absence for + transient unavailability. + """ + agent_root = tmp_path / "agent-perm" + agent_root.mkdir() + make_mcp_md(agent_root, [_make_mcp_spec("other-server")]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + exc = None + try: + backend.load_mcp_server("not-here") + except MCPServerNotInRegistry as e: + exc = e + except MCPRegistryUnavailable: + pytest.fail( + "Raised MCPRegistryUnavailable (transient) instead of MCPServerNotInRegistry (permanent)" + ) + assert exc is not None, "Expected MCPServerNotInRegistry but nothing was raised" + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 8 -- Env-var resolution at load time + + +def test_env_var_resolves_at_load_mcp_server(tmp_path: Path, monkeypatch) -> None: + """load_mcp_server resolves $VAR env references at call time. + + spec/36 MUST 8 / Decision 7. monkeypatch.setenv is used per B-F5. + """ + monkeypatch.setenv("MY_API_KEY", "resolved-value-123") + + agent_root = tmp_path / "env-agent" + agent_root.mkdir() + spec = _make_mcp_spec("env-server", env={"API_KEY": "$MY_API_KEY"}) + make_mcp_md(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + loaded = backend.load_mcp_server("env-server") + + assert loaded.env["API_KEY"] == "resolved-value-123" + + +def test_list_does_not_resolve_env_vars(tmp_path: Path, monkeypatch) -> None: + """list_mcp_servers() does NOT resolve $VAR -- returns MCPServerRef metadata only. + + spec/36 MUST 8 / Decision 7 -- resolution deferred to load_mcp_server. + list_mcp_servers must not raise MCPServerConnectFailed on unset vars. + """ + # Deliberately NOT setting the env var -- list should not care. + monkeypatch.delenv("UNSET_VAR_FOR_LIST_TEST", raising=False) + + agent_root = tmp_path / "list-no-resolve-agent" + agent_root.mkdir() + spec = _make_mcp_spec("server-with-env", env={"KEY": "$UNSET_VAR_FOR_LIST_TEST"}) + make_mcp_md(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + # Must not raise MCPServerConnectFailed even though the env var is unset. + refs = backend.list_mcp_servers() + assert len(refs) == 1 + assert refs[0].name == "server-with-env" + + +def test_unresolvable_env_var_raises_connect_failed( + tmp_path: Path, monkeypatch +) -> None: + """load_mcp_server raises MCPServerConnectFailed for an unresolvable $VAR. + + spec/36 MUST 8. monkeypatch ensures the var is absent (B-F5). + """ + monkeypatch.delenv("DEFINITELY_NOT_SET_VAR_XYZ", raising=False) + + agent_root = tmp_path / "unresolvable-agent" + agent_root.mkdir() + spec = _make_mcp_spec("bad-env-server", env={"KEY": "$DEFINITELY_NOT_SET_VAR_XYZ"}) + make_mcp_md(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPServerConnectFailed): + backend.load_mcp_server("bad-env-server") + + +def test_parse_mcp_md_text_resolve_env_false_preserves_dollar_var(monkeypatch) -> None: + """parse_mcp_md_text with resolve_env=False preserves $VAR strings verbatim. + + spec/36 Theme 1 (prep notes C-F1). Callers using resolve_env=False (e.g. + FilesystemMCPServerRegistryBackend.list_mcp_servers) must not get + MCPServerConnectFailed and must see the raw $VAR string. + """ + # parse_mcp_md_text is imported at the top of this file. + # Deliberately absent var -- resolve_env=False must not raise. + monkeypatch.delenv("TOTALLY_ABSENT_ENV_VAR_FOR_TEST", raising=False) + + content = dedent("""\ + # MCP servers + + ## my-server + command: echo + env: KEY=$TOTALLY_ABSENT_ENV_VAR_FOR_TEST + description: Test server + """) + specs = parse_mcp_md_text(content, resolve_env=False) + assert len(specs) == 1 + assert specs[0].env["KEY"] == "$TOTALLY_ABSENT_ENV_VAR_FOR_TEST" + + +def test_env_var_mid_session_mutation_reflected(tmp_path: Path, monkeypatch) -> None: + """load_mcp_server reflects mid-session os.environ mutations. + + spec/36 MUST 8 -- resolution at call time, not at backend construction. + Each call to load_mcp_server reads from the CURRENT os.environ. + """ + monkeypatch.setenv("DYNAMIC_VAR", "first-value") + + agent_root = tmp_path / "dynamic-agent" + agent_root.mkdir() + spec = _make_mcp_spec("dynamic-server", env={"TOKEN": "$DYNAMIC_VAR"}) + make_mcp_md(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + first = backend.load_mcp_server("dynamic-server") + assert first.env["TOKEN"] == "first-value" + + monkeypatch.setenv("DYNAMIC_VAR", "second-value") + second = backend.load_mcp_server("dynamic-server") + assert second.env["TOKEN"] == "second-value" + + +def test_env_var_literal_value_not_mistaken_for_dollar_var(tmp_path: Path) -> None: + """A literal env value without a leading $ is not resolved as a var. + + spec/36 MUST 8 -- only $VAR-prefixed values are resolved. + """ + agent_root = tmp_path / "literal-env-agent" + agent_root.mkdir() + spec = _make_mcp_spec("literal-server", env={"KEY": "literal-value"}) + make_mcp_md(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + loaded = backend.load_mcp_server("literal-server") + assert loaded.env["KEY"] == "literal-value" + + +# ────────────────────────────────────────────────────────────────────────────── +# MUST 10 -- load_all_mcp_servers consistency + + +def test_load_all_returns_empty_list_for_absent_mcp_md(tmp_path: Path) -> None: + """load_all_mcp_servers() returns [] when catalog is empty. + + spec/36 MUST 10. + """ + agent_root = tmp_path / "empty-catalog-agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.load_all_mcp_servers() + assert result == [] + + +def test_load_all_returns_populated_catalog(populated_backend) -> None: + """load_all_mcp_servers() returns all specs when catalog is populated. + + spec/36 MUST 10. + """ + backend, specs = populated_backend + all_specs = backend.load_all_mcp_servers() + assert len(all_specs) == len(specs) + loaded_names = {s.name for s in all_specs} + expected_names = {s.name for s in specs} + assert loaded_names == expected_names + + +def test_load_all_ordering_parity(populated_backend) -> None: + """load_all_mcp_servers() order matches list_mcp_servers() (lexicographic). + + spec/36 MUST 10 -- consistent sort. + """ + backend, _ = populated_backend + refs = backend.list_mcp_servers() + all_specs = backend.load_all_mcp_servers() + ref_names = [r.name for r in refs] + spec_names = [s.name for s in all_specs] + assert spec_names == ref_names + + +def test_load_all_consistency_with_default_load_all(populated_backend) -> None: + """load_all_mcp_servers() output is byte-identical to _default_load_all(backend). + + spec/36 MUST 10 -- default-impl delegation parity. + The filesystem backend delegates to _default_load_all; this test pins that + the output is equivalent to calling the helper directly. + """ + backend, _ = populated_backend + via_method = backend.load_all_mcp_servers() + via_helper = _default_load_all(backend) + + assert len(via_method) == len(via_helper) + for a, b in zip(via_method, via_helper): + assert a.name == b.name + assert a.command == b.command + assert a.args == b.args + assert a.env == b.env + assert a.transport == b.transport + assert a.description == b.description + + +def test_load_all_consistency_under_list_ordering(tmp_path: Path) -> None: + """load_all_mcp_servers produces names consistent with list_mcp_servers. + + spec/36 MUST 10 -- mutation between list and load_all does not break + ordering contract (verified by checking name order matches after each). + """ + agent_root = tmp_path / "consistency-agent" + agent_root.mkdir() + specs = [ + _make_mcp_spec("zz-last"), + _make_mcp_spec("aa-first"), + _make_mcp_spec("mm-middle"), + ] + make_mcp_md(agent_root, specs) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + refs = backend.list_mcp_servers() + all_specs = backend.load_all_mcp_servers() + + assert [r.name for r in refs] == [s.name for s in all_specs] + assert [s.name for s in all_specs] == sorted(s.name for s in all_specs) + + +# ────────────────────────────────────────────────────────────────────────────── +# Module-level unit tests: _redact_for_error_message (MUST 4 -- PR 1 scope) + + +def test_redact_strips_url_credentials() -> None: + """_redact_for_error_message strips credentials from URLs. + + spec/36 MUST 4 -- env-var paste credential redaction at the + BackendNotRegistered error site. + """ + from atomic_agents.mcp_registry import _redact_for_error_message + + result = _redact_for_error_message("https://user:pass@catalog.example.com/api") + assert result == "https://..." + assert "user" not in result + assert "pass" not in result + + +def test_redact_truncates_long_non_url_string() -> None: + """_redact_for_error_message truncates long non-URL strings at max_len. + + spec/36 MUST 4. + """ + from atomic_agents.mcp_registry import _redact_for_error_message + + long_val = "a" * 50 + result = _redact_for_error_message(long_val, max_len=32) + assert result.endswith("...") + assert len(result) <= 35 # max_len + len("...") + + +def test_redact_preserves_short_non_url_string() -> None: + """_redact_for_error_message returns short non-URL strings unchanged. + + spec/36 MUST 4. + """ + from atomic_agents.mcp_registry import _redact_for_error_message + + result = _redact_for_error_message("filesystem") + assert result == "filesystem" + + +def test_backend_not_registered_uses_redacted_value( + monkeypatch, tmp_path: Path +) -> None: + """get_default_mcp_server_registry_backend with a URL-like backend_id + raises BackendNotRegistered with a redacted error message. + + spec/36 MUST 4 -- credential leak prevention at the operator-config factory. + """ + from atomic_agents.mcp_registry import ( + BackendNotRegistered, + get_default_mcp_server_registry_backend, + ) + + # Simulate an operator accidentally pasting a URL with credentials. + monkeypatch.setenv( + "ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND", + "https://user:secret@catalog.example.com", + ) + + agent_root = tmp_path / "dummy-agent" + with pytest.raises(BackendNotRegistered) as exc_info: + get_default_mcp_server_registry_backend(agent_root, []) + + error_msg = str(exc_info.value) + assert "secret" not in error_msg + assert "user" not in error_msg + assert "https://..." in error_msg + + +# ────────────────────────────────────────────────────────────────────────────── +# P1 #5 -- Protocol declares backend_id property + + +def test_protocol_declares_backend_id_property(backend_factory) -> None: + """MCPServerRegistryBackend Protocol declares backend_id as a property. + + P1 #5 fix: backend_id was implemented on FilesystemMCPServerRegistryBackend + but missing from the Protocol surface. Verifies the attribute is accessible + and returns a non-empty string on the concrete backend. + spec/36 MUST 6(a). + """ + # Verify accessible on instance (concrete backend) + assert hasattr(backend_factory, "backend_id"), ( + "backend_id must be accessible on the backend instance" + ) + assert isinstance(backend_factory.backend_id, str) + assert backend_factory.backend_id, "backend_id must be a non-empty string" + + +def test_protocol_backend_id_introspectable_via_protocol() -> None: + """The MCPServerRegistryBackend Protocol body mentions 'backend_id'. + + P1 #5 conformance: the Protocol class must declare backend_id so static + type checkers and runtime introspection can surface it. + """ + # The Protocol is structural, but we can verify via annotations / members. + # 'backend_id' must appear in the Protocol's declared members. + members = dir(MCPServerRegistryBackend) + assert "backend_id" in members, ( + "MCPServerRegistryBackend Protocol must declare 'backend_id'" + ) + + +# ────────────────────────────────────────────────────────────────────────────── +# P2 #2 -- _redact_for_error_message DSN heuristic + + +def test_redact_strips_dsn_without_scheme() -> None: + """_redact_for_error_message catches DSN-style strings without a scheme. + + P2 #2 fix: 'user:password@host/db' has no '://' but contains credentials. + The DSN heuristic 'user:something@host' must return a redacted placeholder. + spec/36 MUST 4 -- credential redaction. + """ + from atomic_agents.mcp_registry import _redact_for_error_message + + result = _redact_for_error_message("user:secretpass@db-host/mydb") + assert "secretpass" not in result + assert result == "[redacted-connection-string]" + + +def test_redact_dsn_without_scheme_does_not_match_plain_at_sign() -> None: + """_redact_for_error_message does not over-redact simple strings with '@'. + + P2 #2: 'user@host' has an '@' but no 'colon-password' pattern, so it + should not be treated as a DSN. + """ + from atomic_agents.mcp_registry import _redact_for_error_message + + result = _redact_for_error_message("user@host") + # Should NOT be redacted as a DSN -- no colon-before-@ pattern. + assert result == "user@host" diff --git a/tests/test_mcp_server_registry_filesystem_backend.py b/tests/test_mcp_server_registry_filesystem_backend.py new file mode 100644 index 0000000..3015813 --- /dev/null +++ b/tests/test_mcp_server_registry_filesystem_backend.py @@ -0,0 +1,841 @@ +"""FilesystemMCPServerRegistryBackend-specific tests. + +Tests that exercise filesystem-specific implementation details not in the +conformance suite: + +- Path-traversal at API boundary edge cases +- Env-var resolution timing +- mcp.md parse semantics +- MCPServerRef.source field population +- Default LockBackend construction +- load_all_mcp_servers delegation to _default_load_all + +Per spec/36, prep finding B-F6, and the filesystem-specific test shape in +test_corpus_filesystem_backend.py. +""" + +from __future__ import annotations + +import os +from pathlib import Path +from textwrap import dedent +from unittest.mock import MagicMock, patch + +import pytest + +from atomic_agents.mcp_registry import ( + FilesystemMCPServerRegistryBackend, + MCPRegistryDescriptorInvalid, + MCPRegistryUnavailable, + MCPServerNotInRegistry, +) +from atomic_agents.mcp_registry.backend import _default_load_all +from atomic_agents.mcp_registry.types import ValidationResult +from atomic_agents.mcp import MCPServerSpec, parse_mcp_md_text +from atomic_agents.exceptions import MCPServerConnectFailed + + +# ────────────────────────────────────────────────────────────────────────────── +# Helpers + + +def _make_spec( + name: str = "test-server", + command: str = "echo", + args: list[str] | None = None, + env: dict[str, str] | None = None, + transport: str = "stdio", + description: str = "", +) -> MCPServerSpec: + """Build a minimal MCPServerSpec for test use.""" + return MCPServerSpec( + name=name, + command=command, + args=args or [], + env=env or {}, + transport=transport, + description=description, + ) + + +def _write_mcp_md(agent_root: Path, content: str) -> Path: + """Write raw mcp.md content to agent_root/mcp.md.""" + mcp_md = agent_root / "mcp.md" + mcp_md.write_text(content, encoding="utf-8") + return mcp_md + + +def _write_mcp_md_from_specs(agent_root: Path, specs: list[MCPServerSpec]) -> Path: + """Write a well-formed mcp.md from a list of MCPServerSpec objects.""" + lines = ["# MCP servers", ""] + for spec in specs: + lines.append(f"## {spec.name}") + lines.append(f"command: {spec.command}") + if spec.args: + lines.append(f"args: {', '.join(spec.args)}") + if spec.env: + for k, v in spec.env.items(): + lines.append(f"env: {k}={v}") + if spec.transport and spec.transport != "stdio": + lines.append(f"transport: {spec.transport}") + if spec.description: + lines.append(f"description: {spec.description}") + lines.append("") + return _write_mcp_md(agent_root, "\n".join(lines)) + + +# ────────────────────────────────────────────────────────────────────────────── +# Path-traversal at API boundary edge cases + + +def test_path_traversal_slash(tmp_path: Path) -> None: + """load_mcp_server with a slash in the name raises ValueError. + + spec/36 MUST 1 -- path-traversal refusal at API boundary. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server("a/b") + + +def test_path_traversal_backslash(tmp_path: Path) -> None: + """load_mcp_server with a backslash raises ValueError. + + spec/36 MUST 1. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server("a\\b") + + +def test_path_traversal_dotdot(tmp_path: Path) -> None: + """load_mcp_server with '..' raises ValueError. + + spec/36 MUST 1. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server("..") + + +def test_path_traversal_leading_dot(tmp_path: Path) -> None: + """load_mcp_server with a leading '.' raises ValueError. + + spec/36 MUST 1. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server(".hidden-server") + + +def test_path_traversal_empty_string(tmp_path: Path) -> None: + """load_mcp_server with an empty string raises ValueError. + + spec/36 MUST 1. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server("") + + +def test_path_traversal_control_char(tmp_path: Path) -> None: + """load_mcp_server with a control character raises ValueError. + + spec/36 MUST 1. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server("bad\x00name") + + +def test_path_traversal_newline(tmp_path: Path) -> None: + """load_mcp_server with a newline raises ValueError. + + spec/36 MUST 1. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(ValueError): + backend.load_mcp_server("bad\nname") + + +def test_valid_alphanumeric_name_does_not_raise_on_charset(tmp_path: Path) -> None: + """load_mcp_server with a valid alphanumeric name raises MCPServerNotInRegistry + (valid name, absent from catalog), NOT ValueError. + + spec/36 MUST 1 -- valid charset passes the boundary check. + """ + agent_root = tmp_path / "agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPServerNotInRegistry): + backend.load_mcp_server("valid-server-name123") + + +# ────────────────────────────────────────────────────────────────────────────── +# Env-var resolution timing + + +def test_resolve_env_false_preserves_dollar_var(tmp_path: Path, monkeypatch) -> None: + """parse_mcp_md_text with resolve_env=False preserves '$VAR' strings. + + spec/36 Theme 1 (prep notes C-F1). list_mcp_servers calls parse_mcp_md_text + with resolve_env=False; the raw $VAR must survive. + """ + monkeypatch.delenv("ABSENT_VAR_RESOLUTION_TEST", raising=False) + + content = dedent("""\ + # MCP servers + + ## my-server + command: npx + env: TOKEN=$ABSENT_VAR_RESOLUTION_TEST + """) + specs = parse_mcp_md_text(content, resolve_env=False) + assert len(specs) == 1 + assert specs[0].env["TOKEN"] == "$ABSENT_VAR_RESOLUTION_TEST" + + +def test_dollar_var_resolves_at_load_mcp_server_not_list( + tmp_path: Path, monkeypatch +) -> None: + """$VAR is NOT resolved at list_mcp_servers time; resolved at load_mcp_server. + + spec/36 MUST 8 / Decision 7. list must not raise even with unset vars. + """ + monkeypatch.delenv("TIMING_TEST_VAR_ABSENT", raising=False) + + agent_root = tmp_path / "timing-agent" + agent_root.mkdir() + spec = _make_spec("timing-server", env={"KEY": "$TIMING_TEST_VAR_ABSENT"}) + _write_mcp_md_from_specs(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + # list_mcp_servers must NOT raise despite the absent var. + refs = backend.list_mcp_servers() + assert len(refs) == 1 + + # load_mcp_server MUST raise because the var is still absent. + with pytest.raises(MCPServerConnectFailed): + backend.load_mcp_server("timing-server") + + +def test_unresolvable_var_raises_connect_failed(tmp_path: Path, monkeypatch) -> None: + """load_mcp_server raises MCPServerConnectFailed for unresolvable $VAR. + + spec/36 MUST 8. monkeypatch ensures absence (B-F5). + """ + monkeypatch.delenv("UNRESOLVABLE_XYZ_VAR", raising=False) + + agent_root = tmp_path / "unresolvable-agent" + agent_root.mkdir() + spec = _make_spec("bad-server", env={"API": "$UNRESOLVABLE_XYZ_VAR"}) + _write_mcp_md_from_specs(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPServerConnectFailed): + backend.load_mcp_server("bad-server") + + +def test_mid_session_env_mutation_reflected_at_load_time( + tmp_path: Path, monkeypatch +) -> None: + """load_mcp_server reads from os.environ at call time, so mid-session + mutations are visible on the next call. + + spec/36 MUST 8. + """ + monkeypatch.setenv("SESSION_VAR", "value-at-start") + + agent_root = tmp_path / "session-agent" + agent_root.mkdir() + spec = _make_spec("session-server", env={"KEY": "$SESSION_VAR"}) + _write_mcp_md_from_specs(agent_root, [spec]) + + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + s1 = backend.load_mcp_server("session-server") + assert s1.env["KEY"] == "value-at-start" + + monkeypatch.setenv("SESSION_VAR", "value-after-change") + s2 = backend.load_mcp_server("session-server") + assert s2.env["KEY"] == "value-after-change" + + +# ────────────────────────────────────────────────────────────────────────────── +# mcp.md parse semantics + + +def test_empty_file_returns_empty_list(tmp_path: Path) -> None: + """list_mcp_servers returns [] for an empty mcp.md file. + + spec/36 MUST 7. + """ + agent_root = tmp_path / "empty-agent" + agent_root.mkdir() + _write_mcp_md(agent_root, "") + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert refs == [] + + +def test_missing_file_returns_empty_list(tmp_path: Path) -> None: + """list_mcp_servers returns [] when mcp.md does not exist. + + spec/36 MUST 7. + """ + agent_root = tmp_path / "missing-md-agent" + agent_root.mkdir() + # No mcp.md written. + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert refs == [] + + +def test_single_section_returns_one_spec(tmp_path: Path) -> None: + """A single H2 section in mcp.md produces exactly one MCPServerRef. + + spec/36 list semantics. + """ + agent_root = tmp_path / "single-section-agent" + agent_root.mkdir() + spec = _make_spec("only-server", description="The only one") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert len(refs) == 1 + assert refs[0].name == "only-server" + + +def test_multi_section_mcp_md_returns_multiple_specs(tmp_path: Path) -> None: + """Multiple H2 sections in mcp.md produce multiple MCPServerRefs. + + spec/36 list semantics. prep finding B-F6. + """ + agent_root = tmp_path / "multi-section-agent" + agent_root.mkdir() + specs = [ + _make_spec("server-one"), + _make_spec("server-two"), + _make_spec("server-three"), + ] + _write_mcp_md_from_specs(agent_root, specs) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert len(refs) == 3 + names = {r.name for r in refs} + assert names == {"server-one", "server-two", "server-three"} + + +def test_malformed_section_no_command_skipped_silently(tmp_path: Path) -> None: + """A section with no 'command:' key is silently skipped. + + spec/36 / parse_mcp_md_text behavior at mcp.py _build_spec: if no command + key, the spec is None and the section is dropped with a logged warning. + """ + agent_root = tmp_path / "no-command-agent" + agent_root.mkdir() + content = dedent("""\ + # MCP servers + + ## no-command-server + description: This server has no command key + + ## good-server + command: echo + description: This one is fine + """) + _write_mcp_md(agent_root, content) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + names = {r.name for r in refs} + # no-command-server must be silently skipped; good-server must be present. + assert "good-server" in names + assert "no-command-server" not in names + + +def test_load_all_mcp_servers_delegates_to_default_load_all(tmp_path: Path) -> None: + """load_all_mcp_servers delegates to _default_load_all. + + spec/36 MUST 10 / prep finding Theme 4. Verified by patching _default_load_all + in the backend module and asserting it was called. + """ + agent_root = tmp_path / "delegation-agent" + agent_root.mkdir() + spec = _make_spec("delegate-server") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + call_count = {"n": 0} + original = _default_load_all + + def tracking_default_load_all(b): + call_count["n"] += 1 + return original(b) + + with patch( + "atomic_agents.mcp_registry.filesystem._default_load_all", + side_effect=tracking_default_load_all, + ): + result = backend.load_all_mcp_servers() + + assert call_count["n"] == 1, ( + "load_all_mcp_servers must delegate to _default_load_all" + ) + assert len(result) == 1 + assert result[0].name == "delegate-server" + + +# ────────────────────────────────────────────────────────────────────────────── +# MCPServerRef source field + + +def test_list_mcp_servers_source_field_is_mcp_md_section(tmp_path: Path) -> None: + """MCPServerRef.source is populated as 'mcp.md#section:'. + + spec/36 MCPServerRef.source field contract. prep finding B-F6. + """ + agent_root = tmp_path / "source-field-agent" + agent_root.mkdir() + spec = _make_spec("source-test-server") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + assert len(refs) == 1 + assert refs[0].source == "mcp.md#section:source-test-server" + + +# ────────────────────────────────────────────────────────────────────────────── +# Default LockBackend construction + + +def test_default_lock_backend_is_none_at_pr1(tmp_path: Path) -> None: + """FilesystemMCPServerRegistryBackend accepts lock_backend=None (PR 1 default). + + spec/36 -- lock_backend accepted at construction but unused until PR 3. + The constructor signature is stable so PR 3 callers can pass the kwarg + without a constructor change. + """ + agent_root = tmp_path / "lock-agent" + agent_root.mkdir() + # Passing None explicitly mirrors the default. + backend = FilesystemMCPServerRegistryBackend(agent_root, [], lock_backend=None) + assert backend is not None + + +def test_lock_backend_kwarg_accepted_without_error(tmp_path: Path) -> None: + """A non-None lock_backend kwarg is accepted and stored but not used at PR 1. + + spec/36 constructor stability -- PR 3 callers can pre-wire lock_backend. + """ + agent_root = tmp_path / "lock-kwarg-agent" + agent_root.mkdir() + mock_lock_backend = MagicMock() # MagicMock now imported at top + # Should not raise even though lock_backend is not None. + backend = FilesystemMCPServerRegistryBackend( + agent_root, [], lock_backend=mock_lock_backend + ) + assert backend is not None + + +# ────────────────────────────────────────────────────────────────────────────── +# Install/uninstall stubs (deferred to PR 3) + + +# ────────────────────────────────────────────────────────────────────────────── +# G4 -- validate() coverage (10 tests) + + +def _make_mcp_spec( + name: str = "test-server", + command: str = "echo", + args: list[str] | None = None, + env: dict[str, str] | None = None, + transport: str = "stdio", + description: str = "", +) -> MCPServerSpec: + """Build a minimal MCPServerSpec for validate() tests.""" + return MCPServerSpec( + name=name, + command=command, + args=args or [], + env=env or {}, + transport=transport, + description=description, + ) + + +def test_validate_returns_validation_result_for_valid_server(tmp_path: Path) -> None: + """Happy path: valid stdio server with a known command returns ok=True, no errors + or warnings. + + spec/36 validate() -- all checks pass. + """ + agent_root = tmp_path / "valid-agent" + agent_root.mkdir() + spec = _make_mcp_spec("my-server", command="echo") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("my-server") + assert result.ok is True + assert result.errors == [] + + +def test_validate_returns_not_ok_when_mcp_md_absent(tmp_path: Path) -> None: + """validate() returns ok=False when no mcp.md exists. + + spec/36 validate() -- absent file is reported as a result, not an exception. + """ + agent_root = tmp_path / "absent-md-agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("my-server") + assert result.ok is False + assert len(result.errors) >= 1 + + +def test_validate_returns_not_ok_when_server_name_absent_in_mcp_md( + tmp_path: Path, +) -> None: + """validate() returns ok=False when mcp.md exists but lacks the requested name. + + spec/36 validate() -- absent server in populated registry is an error. + """ + agent_root = tmp_path / "absent-name-agent" + agent_root.mkdir() + spec = _make_mcp_spec("other-server", command="echo") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("my-server") + assert result.ok is False + assert len(result.errors) >= 1 + # The error must mention the requested name. + combined_errors = " ".join(result.errors) + assert "my-server" in combined_errors + + +def test_validate_warns_when_command_not_on_path(tmp_path: Path) -> None: + """validate() produces a warning when the command is not on PATH. + + spec/36 validate() -- PATH check is warn-only; ok stays True. + """ + agent_root = tmp_path / "bad-cmd-agent" + agent_root.mkdir() + spec = _make_mcp_spec("bad-cmd-server", command="nonexistent_binary_xyz_unique") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("bad-cmd-server") + assert result.ok is True + assert len(result.warnings) >= 1 + combined_warnings = " ".join(result.warnings) + assert "nonexistent_binary_xyz_unique" in combined_warnings + + +def test_validate_passes_when_command_on_path(tmp_path: Path) -> None: + """validate() does not warn when the command is on PATH. + + spec/36 validate() -- PATH check is warn-only; a found command produces no warning. + """ + agent_root = tmp_path / "good-cmd-agent" + agent_root.mkdir() + spec = _make_mcp_spec("good-cmd-server", command="echo") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("good-cmd-server") + assert result.ok is True + # No PATH warning for 'echo' which is always available. + path_warnings = [w for w in result.warnings if "not found on PATH" in w] + assert path_warnings == [] + + +def test_validate_warns_on_unresolvable_env_var(tmp_path: Path, monkeypatch) -> None: + """validate() warns when a $VAR in env is not set in the current process. + + spec/36 validate() -- unset env var is warn-only; ok stays True. + """ + monkeypatch.delenv("UNSET_VAR_VALIDATE_TEST", raising=False) + agent_root = tmp_path / "unset-var-agent" + agent_root.mkdir() + spec = _make_mcp_spec( + "env-server", command="echo", env={"API_KEY": "$UNSET_VAR_VALIDATE_TEST"} + ) + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("env-server") + assert result.ok is True + assert len(result.warnings) >= 1 + combined_warnings = " ".join(result.warnings) + assert "UNSET_VAR_VALIDATE_TEST" in combined_warnings + + +def test_validate_returns_not_ok_for_unrecognized_transport(tmp_path: Path) -> None: + """validate() returns ok=False when transport is not 'stdio'. + + spec/36 validate() -- unrecognized transport is an error (only 'stdio' in v1). + NOTE: _write_mcp_md_from_specs skips transport when it is 'stdio'; we write + raw mcp.md content here to inject a non-stdio transport value. + """ + agent_root = tmp_path / "bad-transport-agent" + agent_root.mkdir() + content = dedent("""\ + # MCP servers + + ## http-server + command: echo + transport: http + """) + _write_mcp_md(agent_root, content) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + result = backend.validate("http-server") + assert result.ok is False + assert len(result.errors) >= 1 + combined_errors = " ".join(result.errors) + assert "transport" in combined_errors.lower() or "http" in combined_errors + + +def test_validate_returns_not_ok_for_invalid_name_at_api_boundary( + tmp_path: Path, +) -> None: + """validate() returns ValidationResult(ok=False) for path-traversal names. + + spec/36 MUST 1 + validate() spec: invalid charset is caught at the API + boundary and returned as a result, NOT raised as ValueError. + """ + agent_root = tmp_path / "invalid-name-agent" + agent_root.mkdir() + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + # "../etc/passwd" contains "/" which fails the charset check. + result = backend.validate("../etc/passwd") + assert isinstance(result, ValidationResult) + assert result.ok is False + + +def test_validate_does_not_spawn_subprocess(tmp_path: Path) -> None: + """validate() is a static check and must NOT spawn any subprocess. + + spec/36 MUST 2 analog -- validate is static only. + """ + agent_root = tmp_path / "no-spawn-agent" + agent_root.mkdir() + spec = _make_mcp_spec("no-spawn-server", command="echo") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + mock_popen = MagicMock() + with patch("subprocess.Popen", mock_popen): + backend.validate("no-spawn-server") + + mock_popen.assert_not_called() + + +def test_validate_result_round_trips_through_dataclass_defaults( + tmp_path: Path, +) -> None: + """ValidationResult fields (ok, errors, warnings) are correctly typed and set. + + spec/36 ValidationResult dataclass -- both ok=True and ok=False cases. + """ + # ok=True case: valid server with known command. + agent_root = tmp_path / "roundtrip-agent" + agent_root.mkdir() + spec = _make_mcp_spec("roundtrip-server", command="echo") + _write_mcp_md_from_specs(agent_root, [spec]) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + + ok_result = backend.validate("roundtrip-server") + assert isinstance(ok_result, ValidationResult) + assert ok_result.ok is True + assert isinstance(ok_result.errors, list) + assert isinstance(ok_result.warnings, list) + + # ok=False case: name not in registry. + fail_result = backend.validate("does-not-exist") + assert isinstance(fail_result, ValidationResult) + assert fail_result.ok is False + assert isinstance(fail_result.errors, list) + assert len(fail_result.errors) >= 1 + + +# ────────────────────────────────────────────────────────────────────────────── +# G5 -- MCPRegistryDescriptorInvalid parse-failure test + + +def test_malformed_mcp_md_raises_descriptor_invalid(tmp_path: Path) -> None: + """load_mcp_server raises MCPRegistryDescriptorInvalid when a section header + exists in mcp.md but the section is missing required fields (e.g., command:). + + spec/36 prep finding B-F6. P1 #2 fix: the H2-header scan in load_mcp_server + detects the section exists but failed to parse, raising MCPRegistryDescriptorInvalid + instead of the confusing MCPServerNotInRegistry. + """ + agent_root = tmp_path / "malformed-agent" + agent_root.mkdir() + # A section with a valid H2 header but no command: line -- parse returns None + # for this section, causing the old code to raise MCPServerNotInRegistry. + # The fix raises MCPRegistryDescriptorInvalid instead. + content = dedent("""\ + # MCP servers + + ## malformed-server + description: This section has no command key + """) + _write_mcp_md(agent_root, content) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPRegistryDescriptorInvalid): + backend.load_mcp_server("malformed-server") + + +# ────────────────────────────────────────────────────────────────────────────── +# Install/uninstall stubs (deferred to PR 3) + + +@pytest.mark.skip( + reason="install/uninstall land in PR 3 alongside LockBackend integration" +) +def test_atomic_install_collision_via_concurrent_threads(tmp_path: Path) -> None: + """Atomic install collision via concurrent threads. + + Deferred to PR 3 when FilesystemMCPServerRegistryBackend.install ships + with LockBackend integration and atomic mcp.md write semantics. + """ + pass + + +@pytest.mark.skip( + reason="install/uninstall land in PR 3 alongside LockBackend integration" +) +def test_lock_file_acquired_during_install(tmp_path: Path) -> None: + """Lock file (.mcp_registry.lock) is acquired during install. + + Deferred to PR 3 when LockBackend integration lands. + """ + pass + + +# ────────────────────────────────────────────────────────────────────────────── +# P1 #1 -- OSError MUST 7 violation regression tests + + +def test_load_raises_unavailable_on_permission_error(tmp_path: Path) -> None: + """load_mcp_server raises MCPRegistryUnavailable (transient) for PermissionError. + + P1 #1 fix: PermissionError is NOT FileNotFoundError, so it must map to + MCPRegistryUnavailable (transient), not MCPServerNotInRegistry (permanent). + spec/36 MUST 7. + """ + import stat + + agent_root = tmp_path / "perm-agent" + agent_root.mkdir() + mcp_md = agent_root / "mcp.md" + mcp_md.write_text( + "# MCP servers\n\n## perm-server\ncommand: echo\n", + encoding="utf-8", + ) + # chmod 000 to cause PermissionError on read. + mcp_md.chmod(0o000) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + try: + with pytest.raises(MCPRegistryUnavailable): + backend.load_mcp_server("perm-server") + finally: + # Restore permissions so tmp_path cleanup can remove the file. + mcp_md.chmod(stat.S_IRUSR | stat.S_IWUSR) + + +def test_load_raises_not_in_registry_on_missing_file(tmp_path: Path) -> None: + """load_mcp_server raises MCPServerNotInRegistry (permanent) when mcp.md is absent. + + P1 #1 fix: FileNotFoundError is a permanent absence and must map to + MCPServerNotInRegistry, not MCPRegistryUnavailable. + spec/36 MUST 7. + """ + agent_root = tmp_path / "missing-file-agent" + agent_root.mkdir() + # No mcp.md written -- the file does not exist. + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + with pytest.raises(MCPServerNotInRegistry): + backend.load_mcp_server("any-server") + + +# ────────────────────────────────────────────────────────────────────────────── +# P1 #2 -- Malformed-section MCPRegistryDescriptorInvalid regression test + + +def test_load_raises_descriptor_invalid_for_malformed_section(tmp_path: Path) -> None: + """load_mcp_server raises MCPRegistryDescriptorInvalid (not MCPServerNotInRegistry) + when the section header exists but the section has no command: key. + + P1 #2 fix: the H2-header scan distinguishes 'section exists but parse failed' + from 'section not in mcp.md at all'. + spec/36 MCPRegistryDescriptorInvalid semantics. + """ + agent_root = tmp_path / "malformed-section-agent" + agent_root.mkdir() + content = dedent("""\ + # MCP servers + + ## foo + description: incomplete section, no command key + """) + _write_mcp_md(agent_root, content) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + # Must NOT raise MCPServerNotInRegistry -- that would be misleading since + # the section 'foo' IS present in the file. + with pytest.raises(MCPRegistryDescriptorInvalid): + backend.load_mcp_server("foo") + + +# ────────────────────────────────────────────────────────────────────────────── +# P2 #1 -- list_mcp_servers skips sections with invalid names + + +def test_list_skips_sections_with_invalid_names(tmp_path: Path) -> None: + """list_mcp_servers skips sections whose names fail the charset validator. + + P2 #1 fix: invalid-name sections are skipped with a logged warning rather + than propagating uncaught ValueError from load_all_mcp_servers. + spec/36 MUST 1 / tampered-mcp.md resilience. + + Note: parse_mcp_md_text uses whitespace-splitting, so injecting a truly + invalid-charset name requires manually writing raw mcp.md content with + a name that parse_mcp_md_text passes through but _validate_server_name + rejects (e.g., names that are pure-dot or pass parse but fail the regex). + We use a name starting with '.' which is rejected by _validate_server_name. + """ + agent_root = tmp_path / "invalid-name-list-agent" + agent_root.mkdir() + # Write a mcp.md that has one good section and one section where the name + # starts with '.', which _validate_server_name rejects with a leading-dot rule. + # We write raw content because _write_mcp_md_from_specs validates names itself. + content = dedent("""\ + # MCP servers + + ## good-server + command: echo + description: This one is fine + + ## .hidden-server + command: npx + description: This name starts with a dot and should be skipped + """) + _write_mcp_md(agent_root, content) + backend = FilesystemMCPServerRegistryBackend(agent_root, []) + refs = backend.list_mcp_servers() + names = {r.name for r in refs} + assert "good-server" in names, "valid server must appear in list" + assert ".hidden-server" not in names, "dot-prefixed name must be skipped" From 2eec37d191b1cff7e1e6e958aa47508461b7bf59 Mon Sep 17 00:00:00 2001 From: Dan Powers Date: Wed, 3 Jun 2026 08:54:31 -0500 Subject: [PATCH 3/5] feat(cli): atomic-agents mcp-registry subcommand group Adds four read-only subcommands at PR 1 (install/uninstall ship at PR 3 alongside LockBackend integration): - mcp-registry list: prints a table of mounted MCP servers (name, transport, description). - mcp-registry show : prints the MCPServerSpec for a single server. CRITICAL: this command re-parses mcp.md with resolve_env=False to print the UNRESOLVED $VAR refs from the file, NOT the resolved values returned by load_mcp_server. Printing resolved values would leak secrets ($GITHUB_PAT becomes a real token in terminal scrollback, CI logs, shell capture). Operator-debugging utility is preserved (operator sees env: SECRET=$GITHUB_PAT) without the leak. - mcp-registry validate : runs the static descriptor check; prints ValidationResult. - mcp-registry refresh-capabilities: prints the current capability view. No-op on filesystem (no remote dependency); HTTP backend at PR 4 re-probes the catalog server. _cmd_mcp_registry catches MCPServerNotInRegistry, MCPRegistryAuthRequired, MCPRegistryUnavailable, MCPRegistryDescriptorInvalid, MCPServerConnectFailed (unset $VAR resolution), ValueError (invalid name), and the (OSError, PermissionError) tuple. Every exception path prints a clean "Error: ..." to stderr and returns exit code 1; no Python tracebacks escape to the operator. _resolve_mcp_registry_agent_root has three branches: --agent-root flag takes priority; ATOMIC_AGENTS_AGENT_ROOT env var second; Path.cwd() as fallback (matches the corpus subcommand precedent). CLI passes read_paths=[] to FilesystemMCPServerRegistryBackend with an inline comment documenting the intent: CLI is a read-only inspection surface and intentionally skips path-traversal validation. PR 3 install will require non-empty read_paths from agent runtime context (resolved from tools.md). tests/test_mcp_registry_cli.py: 16 tests covering all four subcommands + 3 agent_root resolver branches + 3 exception handler paths. The regression test test_cli_show_does_not_leak_resolved_env_values asserts the actual resolved env value (the secret) does NOT appear in stdout while the $VAR reference DOES (operator-debugging visibility preserved). --- atomic_agents/cli.py | 250 ++++++++++++++++++ tests/test_mcp_registry_cli.py | 463 +++++++++++++++++++++++++++++++++ 2 files changed, 713 insertions(+) create mode 100644 tests/test_mcp_registry_cli.py diff --git a/atomic_agents/cli.py b/atomic_agents/cli.py index c5a1893..9195a0a 100644 --- a/atomic_agents/cli.py +++ b/atomic_agents/cli.py @@ -375,6 +375,70 @@ def main(argv: list[str] | None = None) -> int: help="override ATOMIC_AGENTS_AGENT_ROOT (default: $ATOMIC_AGENTS_AGENT_ROOT or cwd)", ) + # ── mcp-registry subcommand group ──────────────────────────────────── + mcp_registry_cmd = sub.add_parser( + "mcp-registry", + help="Inspect mounted MCP servers (list, show, validate, refresh-capabilities)", + description=( + "Read-only inspection of the MCP server registry for an agent. " + "Uses the filesystem backend by default (reads /mcp.md). " + "Override with ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND env var. " + "Install and uninstall subcommands are deferred to PR 3." + ), + ) + mcp_registry_sub = mcp_registry_cmd.add_subparsers( + dest="mcp_registry_cmd", required=True + ) + + # mcp-registry list [--agent-root PATH] + mcp_registry_list = mcp_registry_sub.add_parser( + "list", + help="List all mounted MCP servers (name, description, transport)", + ) + mcp_registry_list.add_argument( + "--agent-root", + default=None, + help="override ATOMIC_AGENTS_AGENT_ROOT (default: $ATOMIC_AGENTS_AGENT_ROOT or cwd)", + ) + + # mcp-registry show [--agent-root PATH] + mcp_registry_show = mcp_registry_sub.add_parser( + "show", + help="Show full spec for a named MCP server", + ) + mcp_registry_show.add_argument("name", help="MCP server name") + mcp_registry_show.add_argument( + "--agent-root", + default=None, + help="override ATOMIC_AGENTS_AGENT_ROOT (default: $ATOMIC_AGENTS_AGENT_ROOT or cwd)", + ) + + # mcp-registry validate [--agent-root PATH] + mcp_registry_validate = mcp_registry_sub.add_parser( + "validate", + help="Static validation of a named MCP server descriptor", + ) + mcp_registry_validate.add_argument("name", help="MCP server name") + mcp_registry_validate.add_argument( + "--agent-root", + default=None, + help="override ATOMIC_AGENTS_AGENT_ROOT (default: $ATOMIC_AGENTS_AGENT_ROOT or cwd)", + ) + + # mcp-registry refresh-capabilities [--agent-root PATH] + mcp_registry_refresh = mcp_registry_sub.add_parser( + "refresh-capabilities", + help=( + "Print current backend capabilities (filesystem backend has no remote " + "dependency; HTTP backend at PR 4 re-probes the catalog server)" + ), + ) + mcp_registry_refresh.add_argument( + "--agent-root", + default=None, + help="override ATOMIC_AGENTS_AGENT_ROOT (default: $ATOMIC_AGENTS_AGENT_ROOT or cwd)", + ) + # ── init subcommand ─────────────────────────────────────────────────── init_cmd = sub.add_parser( "init", @@ -433,6 +497,10 @@ def main(argv: list[str] | None = None) -> int: if args.cmd == "corpus": return _cmd_corpus(args) + # `mcp-registry` resolves its own agent_root from --agent-root or env var / cwd. + if args.cmd == "mcp-registry": + return _cmd_mcp_registry(args) + # `init` resolves its own agents_root from --agents-root or env var. # It does not use the agents-root / agent-name hierarchy. if args.cmd == "init": @@ -1065,5 +1133,187 @@ def _cmd_init(args) -> int: return run_init(args) +def _resolve_mcp_registry_agent_root(args) -> Path: + """Resolve agent_root for mcp-registry subcommands. + + Resolution order mirrors the corpus subcommand pattern: + 1. ``--agent-root`` CLI flag (explicit wins). + 2. ``ATOMIC_AGENTS_AGENT_ROOT`` env var. + 3. Current working directory (``Path.cwd()``). + """ + if args.agent_root is not None: + return Path(args.agent_root).expanduser().resolve() + env_val = os.environ.get("ATOMIC_AGENTS_AGENT_ROOT") + if env_val: + return Path(env_val).expanduser().resolve() + return Path.cwd() + + +def _cmd_mcp_registry(args) -> int: + """Dispatch mcp-registry subcommands. + + Instantiates the operator-pinned backend via + ``get_default_mcp_server_registry_backend(agent_root, read_paths)``, + which reads ``ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND`` (default + ``"filesystem"``) so the CLI surface honours the same env-var override + as the runtime. + + Exit codes: 0 on success, 1 on any error. Errors go to stderr; + normal output to stdout. Zero LLM calls -- pure local I/O. + + Install and uninstall subcommands are deferred to PR 3. + """ + from .mcp_registry import ( + MCPRegistryAuthRequired, + MCPRegistryDescriptorInvalid, + MCPRegistryUnavailable, + MCPServerNotInRegistry, + get_default_mcp_server_registry_backend, + ) + from .exceptions import MCPServerConnectFailed + + agent_root = _resolve_mcp_registry_agent_root(args) + + # Empty read_paths means the CLI skips path-traversal validation. This is + # consistent with the read-only inspection use case (mcp-registry show/validate/ + # list); install/uninstall in PR 3 will require non-empty read_paths from agent + # runtime context. + try: + backend = get_default_mcp_server_registry_backend(agent_root, []) + except Exception as e: + print(f"Error: failed to resolve MCP registry backend: {e}", file=sys.stderr) + return 1 + + sub_cmd = args.mcp_registry_cmd + + try: + if sub_cmd == "list": + return _mcp_registry_list(backend) + elif sub_cmd == "show": + return _mcp_registry_show(backend, args.name) + elif sub_cmd == "validate": + return _mcp_registry_validate(backend, args.name) + elif sub_cmd == "refresh-capabilities": + return _mcp_registry_refresh_capabilities(backend) + except MCPServerConnectFailed as e: + print(f"Error: env var not resolved: {e}", file=sys.stderr) + return 1 + except MCPRegistryDescriptorInvalid as e: + print(f"Error: MCP server descriptor is invalid: {e}", file=sys.stderr) + return 1 + except MCPRegistryAuthRequired as e: + print( + f"Error: authentication required for MCP registry backend: {e}", + file=sys.stderr, + ) + return 1 + except MCPRegistryUnavailable as e: + print( + f"Error: MCP registry backend temporarily unavailable: {e}", + file=sys.stderr, + ) + return 1 + except MCPServerNotInRegistry as e: + print(f"Error: MCP server not found: {e}", file=sys.stderr) + return 1 + except ValueError as e: + print(f"Error: invalid server name: {e}", file=sys.stderr) + return 1 + except (OSError, PermissionError) as e: + print(f"Error: {e}", file=sys.stderr) + return 1 + return 0 + + +def _mcp_registry_list(backend) -> int: + """List all mounted MCP servers as a table: name / description / transport.""" + refs = backend.list_mcp_servers() + if not refs: + print("No MCP servers registered.") + return 0 + name_w = max(len(r.name) for r in refs) + transport_w = max(len(r.transport) for r in refs) + header = f"{'NAME':<{name_w}} {'TRANSPORT':<{transport_w}} DESCRIPTION" + print(header) + print("-" * len(header)) + for ref in refs: + desc = ref.description or "" + print(f"{ref.name:<{name_w}} {ref.transport:<{transport_w}} {desc}") + return 0 + + +def _mcp_registry_show(backend, name: str) -> int: + """Show the full MCPServerSpec for a named server as a formatted table. + + We print the unresolved $VAR refs from mcp.md, NOT the resolved values + from load_mcp_server, to avoid leaking secrets to stdout. + """ + # Call load_mcp_server first for validation: name charset check, path-traversal + # check, and confirmation that the server exists in the catalog. We discard the + # resolved spec's env (which has real secret values) and re-read raw mcp.md. + spec = backend.load_mcp_server(name) + + # Re-parse mcp.md with resolve_env=False to get unresolved $VAR refs. + # This is the safe display path: operators see the wiring ($VAR names) + # without the resolved values being emitted to stdout. + unresolved_env: dict[str, str] | None = None + mcp_md = backend._agent_root / "mcp.md" + try: + from .mcp import parse_mcp_md_text + + content = mcp_md.read_text(encoding="utf-8") + raw_specs = parse_mcp_md_text(content, resolve_env=False, read_paths=None) + for raw_spec in raw_specs: + if raw_spec.name == name: + unresolved_env = raw_spec.env + break + except Exception: + # If re-reading mcp.md fails (file gone between load and re-read), + # do NOT fall back to printing resolved values. Warn on stderr instead. + if spec.env: + print( + f"", + file=sys.stderr, + ) + + print(f"name: {spec.name}") + print(f"command: {spec.command}") + if spec.args: + print(f"args: {' '.join(spec.args)}") + display_env = unresolved_env if unresolved_env is not None else {} + if display_env: + print("env:") + for k, v in display_env.items(): + print(f" {k}: {v}") + print(f"transport: {spec.transport}") + if spec.description: + print(f"description: {spec.description}") + return 0 + + +def _mcp_registry_validate(backend, name: str) -> int: + """Run a static validation check on a named server descriptor.""" + result = backend.validate(name) + status = "OK" if result.ok else "FAIL" + print(f"validate {name!r}: {status}") + for err in result.errors: + print(f" ERROR: {err}") + for warn in result.warnings: + print(f" WARNING: {warn}") + return 0 if result.ok else 1 + + +def _mcp_registry_refresh_capabilities(backend) -> int: + """Re-probe backend capabilities and print the current view.""" + caps = backend.refresh_capabilities() + print(f"backend_id: {backend.backend_id}") + print(f"supports_install: {caps.supports_install}") + print(f"supports_uninstall: {caps.supports_uninstall}") + print(f"supports_capability_handshake: {caps.supports_capability_handshake}") + print(f"supports_audit: {caps.supports_audit}") + print(f"durable: {caps.durable}") + return 0 + + if __name__ == "__main__": sys.exit(main()) diff --git a/tests/test_mcp_registry_cli.py b/tests/test_mcp_registry_cli.py new file mode 100644 index 0000000..4f191af --- /dev/null +++ b/tests/test_mcp_registry_cli.py @@ -0,0 +1,463 @@ +"""CLI integration tests for ``atomic-agents mcp-registry`` subcommands. + +Invokes ``atomic_agents.cli.main(argv=[...])`` directly with explicit argv. +Uses ``capsys`` for stdout/stderr capture and ``tmp_path`` for a fresh +agent root on every test. Passes ``--agent-root`` to pin the backend +to the tmp directory so no test touches the real project mcp.md. + +Pattern mirrors ``tests/test_persona_cli.py``. +""" + +from __future__ import annotations + +import sys +from pathlib import Path +from textwrap import dedent +from unittest.mock import MagicMock, patch + +import pytest + +from atomic_agents.cli import main +from atomic_agents.mcp import MCPServerSpec + + +# ───────────────────────────────────────────────────────────────────────────── +# Helpers + + +def _write_mcp_md(agent_root: Path, content: str) -> Path: + """Write raw mcp.md content to agent_root/mcp.md.""" + mcp_md = agent_root / "mcp.md" + mcp_md.write_text(content, encoding="utf-8") + return mcp_md + + +def _write_valid_mcp_md(agent_root: Path, server_name: str = "my-server") -> Path: + """Write a minimal valid mcp.md with one stdio server.""" + content = dedent(f"""\ + # MCP servers + + ## {server_name} + command: echo + description: Test server for CLI tests + """) + return _write_mcp_md(agent_root, content) + + +def _run( + argv: list[str], + tmp_path: Path, + capsys, +) -> tuple[int, str, str]: + """Run ``main(argv)`` and return (exit_code, stdout, stderr).""" + code = main(argv) + captured = capsys.readouterr() + return code, captured.out, captured.err + + +# ───────────────────────────────────────────────────────────────────────────── +# _resolve_mcp_registry_agent_root -- 3 branches + + +def test_resolve_agent_root_uses_explicit_flag(tmp_path: Path, capsys) -> None: + """--agent-root flag wins over env vars and cwd.""" + agent_root = tmp_path / "explicit" + agent_root.mkdir() + code, out, _err = _run( + ["mcp-registry", "list", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + assert "No MCP servers" in out + + +def test_resolve_agent_root_falls_back_to_env_var( + tmp_path: Path, monkeypatch, capsys +) -> None: + """When no --agent-root flag is given, ATOMIC_AGENTS_AGENT_ROOT is used.""" + agent_root = tmp_path / "from-env" + agent_root.mkdir() + monkeypatch.setenv("ATOMIC_AGENTS_AGENT_ROOT", str(agent_root)) + # Pass no --agent-root flag; backend resolves from env var. + code, out, _err = _run( + ["mcp-registry", "list"], + tmp_path, + capsys, + ) + assert code == 0 + assert "No MCP servers" in out + + +# The zero-assertion cwd-fallback test was removed (P2 #4). The test below +# exercises the same code path with real assertions via the resolver function. +def test_resolve_agent_root_falls_back_to_cwd_via_resolver( + tmp_path: Path, monkeypatch +) -> None: + """_resolve_mcp_registry_agent_root returns Path.cwd() when no flag or env var.""" + import pathlib + + monkeypatch.delenv("ATOMIC_AGENTS_AGENT_ROOT", raising=False) + from atomic_agents.cli import _resolve_mcp_registry_agent_root + + class FakeArgs: + agent_root = None + + result = _resolve_mcp_registry_agent_root(FakeArgs()) + assert isinstance(result, pathlib.Path) + # Should equal the actual cwd at test time. + assert result == pathlib.Path.cwd() + + +# ───────────────────────────────────────────────────────────────────────────── +# mcp-registry list -- 3 tests + + +def test_cli_list_empty_catalog_prints_no_servers_message( + tmp_path: Path, capsys +) -> None: + """``mcp-registry list`` on an empty agent root prints 'No MCP servers'.""" + agent_root = tmp_path / "empty-agent" + agent_root.mkdir() + code, out, _err = _run( + ["mcp-registry", "list", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + assert "No MCP servers" in out + + +def test_cli_list_populated_catalog_prints_table(tmp_path: Path, capsys) -> None: + """``mcp-registry list`` with a populated mcp.md prints server names.""" + agent_root = tmp_path / "populated-agent" + agent_root.mkdir() + _write_valid_mcp_md(agent_root, "my-server") + code, out, _err = _run( + ["mcp-registry", "list", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + assert "my-server" in out + + +def test_cli_list_exit_code_zero_on_success(tmp_path: Path, capsys) -> None: + """``mcp-registry list`` returns exit code 0 on success.""" + agent_root = tmp_path / "exit-code-agent" + agent_root.mkdir() + code, _out, _err = _run( + ["mcp-registry", "list", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + + +# ───────────────────────────────────────────────────────────────────────────── +# mcp-registry show -- 3 tests + + +def test_cli_show_happy_path_prints_spec(tmp_path: Path, capsys) -> None: + """``mcp-registry show `` prints command and transport fields.""" + agent_root = tmp_path / "show-agent" + agent_root.mkdir() + _write_valid_mcp_md(agent_root, "show-server") + code, out, _err = _run( + ["mcp-registry", "show", "show-server", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + assert "show-server" in out + assert "echo" in out + + +def test_cli_show_missing_name_prints_error_exit_1(tmp_path: Path, capsys) -> None: + """``mcp-registry show`` for an absent server prints an error and exits 1.""" + agent_root = tmp_path / "missing-agent" + agent_root.mkdir() + _write_valid_mcp_md(agent_root, "other-server") + code, _out, err = _run( + ["mcp-registry", "show", "does-not-exist", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 1 + assert "Error" in err or "error" in err.lower() + + +def test_cli_show_invalid_name_prints_error_exit_1(tmp_path: Path, capsys) -> None: + """``mcp-registry show`` with a path-traversal name prints an error and exits 1.""" + agent_root = tmp_path / "invalid-name-show-agent" + agent_root.mkdir() + code, _out, err = _run( + ["mcp-registry", "show", "../etc/passwd", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 1 + assert "Error" in err or "error" in err.lower() + + +# ───────────────────────────────────────────────────────────────────────────── +# mcp-registry validate -- 2 tests + + +def test_cli_validate_ok_prints_pass_exit_0(tmp_path: Path, capsys) -> None: + """``mcp-registry validate `` for a valid server prints OK and exits 0.""" + agent_root = tmp_path / "validate-ok-agent" + agent_root.mkdir() + _write_valid_mcp_md(agent_root, "ok-server") + code, out, _err = _run( + ["mcp-registry", "validate", "ok-server", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + assert "OK" in out + + +def test_cli_validate_not_ok_prints_errors_exit_1(tmp_path: Path, capsys) -> None: + """``mcp-registry validate `` for an absent server exits 1 with error output.""" + agent_root = tmp_path / "validate-fail-agent" + agent_root.mkdir() + # No mcp.md -- the server is absent from the registry. + code, out, err = _run( + [ + "mcp-registry", + "validate", + "absent-server", + "--agent-root", + str(agent_root), + ], + tmp_path, + capsys, + ) + assert code == 1 + # validate prints results to stdout (per _mcp_registry_validate implementation). + assert "FAIL" in out or "ERROR" in out + + +# ───────────────────────────────────────────────────────────────────────────── +# mcp-registry refresh-capabilities -- 1 test + + +def test_cli_refresh_capabilities_prints_capability_fields( + tmp_path: Path, capsys +) -> None: + """``mcp-registry refresh-capabilities`` prints all 5 capability fields.""" + agent_root = tmp_path / "caps-agent" + agent_root.mkdir() + code, out, _err = _run( + [ + "mcp-registry", + "refresh-capabilities", + "--agent-root", + str(agent_root), + ], + tmp_path, + capsys, + ) + assert code == 0 + assert "supports_install" in out + assert "supports_uninstall" in out + assert "supports_capability_handshake" in out + assert "supports_audit" in out + assert "durable" in out + + +# ───────────────────────────────────────────────────────────────────────────── +# Exception handler coverage -- 3 tests + + +def test_cli_handles_unknown_backend_id_with_redacted_url( + tmp_path: Path, monkeypatch, capsys +) -> None: + """When ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND is a URL, stderr shows the + redacted form (scheme://...) and exit code is 1. + + spec/36 credential-redaction in error messages. + """ + agent_root = tmp_path / "bad-backend-agent" + agent_root.mkdir() + monkeypatch.setenv( + "ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND", + "https://user:pass@catalog/", + ) + code, _out, err = _run( + ["mcp-registry", "list", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 1 + assert "Error" in err + # Credentials must NOT appear; only the redacted form (https://...) may. + assert "user:pass" not in err + assert "https://..." in err + + +def test_cli_handles_descriptor_invalid_cleanly(tmp_path: Path, capsys) -> None: + """When load_mcp_server raises MCPRegistryDescriptorInvalid, the CLI prints a + clean error to stderr and exits 1 (no uncaught Python traceback). + + spec/36 G8a -- MCPRegistryDescriptorInvalid must be caught in _cmd_mcp_registry. + """ + from atomic_agents.mcp_registry import MCPRegistryDescriptorInvalid + + agent_root = tmp_path / "descriptor-invalid-agent" + agent_root.mkdir() + _write_valid_mcp_md(agent_root, "broken-server") + + # Patch the backend's load_mcp_server to simulate a descriptor-parse failure. + with patch( + "atomic_agents.mcp_registry.filesystem.FilesystemMCPServerRegistryBackend.load_mcp_server", + side_effect=MCPRegistryDescriptorInvalid("mcp.md at /x could not be parsed"), + ): + code, _out, err = _run( + [ + "mcp-registry", + "show", + "broken-server", + "--agent-root", + str(agent_root), + ], + tmp_path, + capsys, + ) + assert code == 1 + assert "Error" in err + # Must NOT be an uncaught traceback (no "Traceback (most recent call last)"). + assert "Traceback" not in err + + +def test_cli_handles_unavailable_backend_cleanly(tmp_path: Path, capsys) -> None: + """When list_mcp_servers raises MCPRegistryUnavailable, the CLI prints a clean + error to stderr and exits 1. + + spec/36 MCPRegistryUnavailable exception handling. + """ + from atomic_agents.mcp_registry import MCPRegistryUnavailable + + agent_root = tmp_path / "unavailable-agent" + agent_root.mkdir() + + with patch( + "atomic_agents.mcp_registry.filesystem.FilesystemMCPServerRegistryBackend.list_mcp_servers", + side_effect=MCPRegistryUnavailable("backend temporarily unavailable"), + ): + code, _out, err = _run( + ["mcp-registry", "list", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 1 + assert "Error" in err + assert "Traceback" not in err + + +# ───────────────────────────────────────────────────────────────────────────── +# P0 regression -- mcp-registry show must not leak resolved env values + + +def test_cli_show_does_not_leak_resolved_env_values( + tmp_path: Path, monkeypatch, capsys +) -> None: + """mcp-registry show MUST NOT print resolved secret values to stdout. + + P0 fix: _mcp_registry_show previously printed spec.env which contained + the resolved values from load_mcp_server. It must instead re-parse mcp.md + with resolve_env=False and print the raw $VAR references. + + spec/36 secret-leak prevention. + """ + monkeypatch.setenv("TESTSECRET", "actual-secret-value-NEVER-PRINT") + + agent_root = tmp_path / "secret-agent" + agent_root.mkdir() + content = dedent("""\ + # MCP servers + + ## secret-server + command: echo + env: SECRET=$TESTSECRET + description: Server with a secret env var + """) + _write_mcp_md(agent_root, content) + + code, out, _err = _run( + ["mcp-registry", "show", "secret-server", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 0 + # The resolved secret value must NOT appear anywhere in stdout. + assert "actual-secret-value-NEVER-PRINT" not in out, ( + "Resolved secret value must never be printed to stdout" + ) + # The raw $VAR reference MUST appear so operators can see the wiring. + assert "$TESTSECRET" in out, ( + "Raw $VAR reference must appear in stdout so operator sees the wiring" + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# P1 #4 regression -- MCPServerConnectFailed escape from _cmd_mcp_registry + + +def test_cli_show_handles_unset_env_var_cleanly( + tmp_path: Path, monkeypatch, capsys +) -> None: + """mcp-registry show exits 1 with a clean error when a required env var is unset. + + P1 #4 fix: MCPServerConnectFailed was not caught in _cmd_mcp_registry's + exception chain, causing an uncaught traceback. The fix adds a handler + that prints a clean error to stderr and exits 1. + + spec/36 MCPServerConnectFailed handling. + """ + monkeypatch.delenv("UNSET_CLI_VAR", raising=False) + + agent_root = tmp_path / "unset-env-agent" + agent_root.mkdir() + content = dedent("""\ + # MCP servers + + ## token-server + command: echo + env: TOKEN=$UNSET_CLI_VAR + description: Server with an unset env var + """) + _write_mcp_md(agent_root, content) + + code, _out, err = _run( + ["mcp-registry", "show", "token-server", "--agent-root", str(agent_root)], + tmp_path, + capsys, + ) + assert code == 1, "CLI must exit 1 when env var is unset" + # Must be a clean error message, not a Python traceback. + assert "Traceback" not in err, "Must not show a Python traceback" + assert "Error" in err or "error" in err.lower(), ( + "stderr must contain a human-readable error message" + ) + + +# ───────────────────────────────────────────────────────────────────────────── +# P1 #3 regression -- empty read_paths comment exists in _cmd_mcp_registry + + +def test_cli_uses_empty_read_paths_comment_present() -> None: + """The inline comment explaining empty read_paths is present in cli.py. + + P1 #3: the CLI uses read_paths=[] for inspection-only commands. The comment + explains this is intentional (consistent with read-only inspection use case) + and documents that PR 3 install/uninstall will require non-empty read_paths. + """ + import inspect + from atomic_agents.cli import _cmd_mcp_registry + + source = inspect.getsource(_cmd_mcp_registry) + assert "Empty read_paths" in source, ( + "_cmd_mcp_registry must contain the inline comment explaining empty read_paths" + ) From 6888235aafc98162ff4bbc967ae69061dea6c5da Mon Sep 17 00:00:00 2001 From: Dan Powers Date: Wed, 3 Jun 2026 08:54:53 -0500 Subject: [PATCH 4/5] docs(mcp_registry): spec/36 DRAFT + CHANGELOG (#201 PR 1 of 5) docs/spec/36-mcp-server-registry-backend.md ships as DRAFT at 696 lines with 10 normative MUSTs in the Implementer Contract (placed before the Shipping plan section per spec/34 ordering): 1. Name charset validation at API boundary ([a-zA-Z0-9_.+@-]+ minus leading-dot per the cross-Protocol path-traversal pattern). 2. Side-effect-free construction (no network, subprocess, or file open at __init__). 3. Capability honesty including the static-vs-dynamic distinction novel to this Protocol (filesystem static; HTTP dynamic per tier 1/2/3 capability negotiation at PR 4). 4. URL credential redaction across operator-facing error paths; _redact_for_error_message catches both ://-scheme URLs and DSN-style user:password@host connection strings without scheme. 5. Cross-agent isolation at storage layer (filesystem per agent_root; HTTP per agent_scope query param; lock file path distinct from the agent's main .lock). 6. backend_id stability + close() idempotency. 7. Transient-vs-permanent failure honesty (MCPRegistryUnavailable for PermissionError + I/O errors; MCPServerNotInRegistry for FileNotFoundError + ENOENT). 8. Env-var resolution semantics on MCPServerSpec at load_mcp_server time (not at parse time); the helper is module-level in atomic_agents/mcp.py and shared with the filesystem backend. 9. Install/uninstall atomicity (filesystem uses LockBackend lease per spec/21; HTTP relies on catalog server transactional storage). 10. load_all_mcp_servers consistency (output semantically equivalent to [load_mcp_server(ref.name) for ref in list_mcp_servers()]). The spec re-languages "catalog of available servers" to "mounted server source" so the per-agent_scope filtering model is unambiguous. HTTP backend tier-1/2/3 capability negotiation via GET /capabilities and OPTIONS /mcp-servers + the AgentProfile.mcp_servers_resolved sibling field for substrate-agnostic audit (spec/24 D1 addendum) ship at PR 2-4. 5-PR shipping plan in the spec body: PR 1 Protocol scaffolding + DRAFT spec + filesystem read paths + CLI read-only subcommands. PR 2 agent.py wiring + AgentProfile sibling field + IRON RULE regression suite + doctor coherence check. PR 3 filesystem install/uninstall + LockBackend integration + CLI install/uninstall. PR 4 HTTPMCPServerRegistryBackend read paths + tier capability negotiation + httpx via new [http] optional extra in pyproject.toml. PR 5 HTTP install/uninstall + spec/36 LOCK at 10 MUSTs + v1.0.0 release tag (twelve of twelve backend protocols shipped). CHANGELOG [Unreleased] entry covers the PR 1 deliverables + review army findings + fixes. Closes the office-hours / plan-eng-review / plan-subagent / /ship review pipeline for PR 1. Cross-model triple-confirmed P0 secret leak in mcp-registry show fixed before push. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 2 + docs/spec/36-mcp-server-registry-backend.md | 696 ++++++++++++++++++++ 2 files changed, 698 insertions(+) create mode 100644 docs/spec/36-mcp-server-registry-backend.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 8027d51..a9b38df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,8 @@ CHANGELOG entry. ### Added +- **MCPServerRegistryBackend Protocol scaffolding + FilesystemMCPServerRegistryBackend reference impl + DRAFT spec/36 + `atomic-agents mcp-registry` CLI** ([#201](https://github.com/dep0we/atomic-agents-stack/issues/201) -- MCPServerRegistryBackend arc **PR 1 of 5**, the v1.0 closer arc). Operators with a fleet of agents stop hand-syncing `mcp.md` files across N agents. The Protocol seals the per-agent mounted-server-source seam so SaaS / org-internal / public-registry futures arrive via the same env-var pin pattern used by the eleven prior backend protocols. New `atomic_agents/mcp_registry/` package with `__init__.py` (`register_mcp_server_registry_backend` / `get_mcp_server_registry_backend` / `list_mcp_server_registry_backends` + `get_default_mcp_server_registry_backend` factory + `_redact_for_error_message` helper with DSN heuristic for non-scheme connection strings), `types.py` (`MCPServerRef` frozen dataclass with `to_dict`/`from_dict` roundtrip, `MCPServerRegistryCapabilities` with 5 capability flags including the static-vs-dynamic distinction novel to this Protocol, `ValidationResult`), `backend.py` (`@runtime_checkable` `MCPServerRegistryBackend` Protocol with 8 methods + `@property backend_id` + `@property capabilities`, 6 exception classes, `_default_load_all(backend)` module-level helper that backends MAY override for performance), and `filesystem.py` (`FilesystemMCPServerRegistryBackend(agent_root, read_paths, *, lock_backend=None)` reading `/mcp.md`; install/uninstall raise `NotImplementedError` at PR 1 with `supports_install=False` static class default per the capability-honesty contract (MUST 3); install/uninstall ship at PR 3 alongside `LockBackend` integration). `parse_mcp_md_text` and `_build_spec` in `atomic_agents/mcp.py` gain an additive `resolve_env: bool = True` keyword-only parameter; the `_flush()` closure threads the parameter through to `_build_spec`; a new module-level `_resolve_env_vars(env, server_name)` helper is extracted from `_build_spec`'s inline resolution block and is now shared with `FilesystemMCPServerRegistryBackend.load_mcp_server` so the spec/19-canonical `MCPServerConnectFailed` message shape stays identical across both call paths. The public `parse_mcp_md` at `mcp.py:410` does NOT receive the parameter; it always resolves at parse time and the docstring documents the API asymmetry (callers needing lazy resolution call `parse_mcp_md_text(..., resolve_env=False)` directly). Two hidden callers preserved: `profile/types.py:336` (`AgentProfile.from_dict`) and `profile/filesystem.py:374` (`FilesystemAgentProfileBackend.load_profile`) each gain an inline comment documenting the `resolve_env=True` invariant their `AgentProfile.mcp_servers` consumers depend on. CLI gains the `atomic-agents mcp-registry` subcommand group with four read-only subcommands at PR 1 (install/uninstall ship in PR 3): `list` prints a table of mounted servers, `show ` prints the spec with `$VAR` env refs UNRESOLVED (re-parses mcp.md with `resolve_env=False` to avoid leaking resolved secrets to stdout per the cross-model-confirmed P0 from /ship's review army), `validate ` runs the static descriptor check, and `refresh-capabilities` prints the runtime capability view (no-op on filesystem; HTTP backend at PR 4 re-probes the catalog server). `docs/spec/36-mcp-server-registry-backend.md` ships as DRAFT at 696 lines with 10 normative MUSTs in the Implementer Contract (placed before Shipping plan per spec/34 ordering): 1 name charset validation at API boundary, 2 side-effect-free construction, 3 capability honesty including the static-vs-dynamic distinction, 4 URL credential redaction, 5 cross-agent isolation at storage layer, 6 backend_id stability + close() idempotency, 7 transient-vs-permanent failure honesty (MCPRegistryUnavailable vs MCPServerNotInRegistry distinction), 8 env-var resolution semantics on MCPServerSpec at load_mcp_server time, 9 install/uninstall atomicity (filesystem uses LockBackend lease, HTTP relies on catalog server transactional storage), 10 `load_all_mcp_servers` consistency (output semantically equivalent to `[load_mcp_server(ref.name) for ref in list_mcp_servers()]`). The spec re-languages "catalog of available servers" to "mounted server source" per Codex's outside-voice finding so the per-`agent_scope` filtering model is unambiguous; HTTP backend tier-1/2/3 capability negotiation via `GET /capabilities` and `OPTIONS /mcp-servers` ships at PR 4. `AgentProfile.mcp_servers_resolved` sibling field for the substrate-agnostic audit capture (spec/24 D1 addendum) ships at PR 2 alongside the wiring. Tests: 3090 to 3101 passing, 52 skipped, zero regressions across the full suite. 100 new MCP-registry tests across 3 files (`tests/test_mcp_server_registry_conformance.py` 42 tests covering MUSTs 1-3, 5-8, 10 parametrized across registered backends with HTTP joining at PR 4; `tests/test_mcp_server_registry_filesystem_backend.py` 32 tests + 2 skipped (install/uninstall stubs reserved for PR 3 with `pytest.mark.skip(reason="land in PR 3")` markers); `tests/test_mcp_registry_cli.py` 16 tests covering all four CLI subcommands + exception handler paths including the secret-leak regression test). Pre-impl review: 5-stream parallel Sonnet plan-subagent pass (Protocol surface, conformance test plan, filesystem correctness, parse_mcp_md_text refactor risk, spec/36 coherence) caught 5 P0 + 15 P1 + 15 P2 = 35 findings BEFORE any code shipped (9 times the CorpusBackend PR 4 prep pass's 4 findings, tracking the v1.0-closer-arc complexity). /ship review army: 6 parallel reviewers (4 specialists: testing, maintainability, security, performance + Claude adversarial subagent + Codex adversarial via codex exec) surfaced 30 findings post-implementation including the cross-model triple-confirmed P0 (`mcp-registry show` leaked resolved `$VAR` env values to stdout, found independently by Codex + the security specialist + Claude adversarial) plus 5 P1 (OSError misclassified as permanent-absence violating MUST 7, malformed-section silent drop, CLI `read_paths=[]` skipping path-traversal validation, `MCPServerConnectFailed` escaping the CLI exception handler with a Python traceback, `backend_id` property absent from the Protocol declaration despite being implemented on the class) plus 8 mechanical P2 AUTO-FIXes (section names not validated before listing, `_redact_for_error_message` missing the DSN heuristic for non-scheme connection strings, vacuous test assertions, hardcoded `/tmp/dummy-agent` path, 6 late imports moved to top of file, unused Protocol import in filesystem.py, misleading "Re-probe" CLI help text on filesystem backend). All P0 + P1 + critical P2s addressed inline before push; 15 regression tests added covering each fix including `test_cli_show_does_not_leak_resolved_env_values` that asserts a resolved env value (the actual GitHub token) does NOT appear in stdout while the unresolved `$VAR` reference DOES (operator-debugging utility preserved without the leak). After PR 4 of 5 of #201 lands, `atomic-agents-stack` hits v1.0 with twelve of twelve backend protocols shipped; PR 1 extends the post-#285-revert /ship streak to 8. + - **`atomic-agents init` wizard PR 2 of 2: researcher + writer templates + Add-to-it recovery merge + 8 polish items** ([#94](https://github.com/dep0we/atomic-agents-stack/issues/94) -- init-wizard arc **PR 2 of 2**, arc closer). Operators now have three starter templates instead of one: `advisor` (general-purpose, Cautious autonomy), `researcher` (curiosity-first investigator with research integrity layer 1 ON, raw/ source intake, $1.50/day cost cap for multi-source synthesis, Cautious preset), and `writer` (voice-first content agent with Sonnet primary + Haiku fallback, drafts/ + revisions/ write paths, Cautious preset). Each template declares its operating mode explicitly per spec/01 (advisor and writer reactive; researcher hybrid for sustained multi-session investigations). The advisor template gains an `## Operating mode` section so all three templates conform to spec/01's mode-declaration requirement. Re-running `atomic-agents init` on an existing agent now offers a third recovery branch: **Add to it** (in addition to Overwrite and Cancel). The collision prompt in `_check_collision` offers three options: overwrite, add_to_it, cancel. Choosing add_to_it triggers section detection via the ATX h2 state machine (`_extract_h2_headers`, `_detect_sections`): code fences, HTML comments, and YAML frontmatter are skipped to avoid false positives; setext-style headings (underline) fail closed. Detection failure routes the operator back to overwrite/cancel via a plain-English message. On detection success, `_compute_merged_content` performs a file-level section-aware merge for every file in `TEMPLATE_SECTION_SCHEMA[template_name]`: the preamble before the first h2 is preserved verbatim; orphan h2 sections (operator-authored, not in schema) are preserved verbatim in their original position; schema h2 sections merge with operator preamble + existing h3 subsections preserved in order + new template h3 subsections appended at end. A unified diff preview (`_render_diff_preview`) is shown with CRLF and UTF-8 BOM normalization so Windows-originating vaults do not show every line as changed; if zero files changed the preview exits early with "up to date" and skips the Confirm prompt. On confirmation, each file is committed via `_io.atomic_write` (tmp + fsync + rename) directly into `agent_dir` in sorted relpath order -- no staging directory. A mid-commit KeyboardInterrupt prints a summary of written vs pending files before re-raising so the operator knows exactly what landed. Operator-authored memory notes (under `memory/` except `INDEX.md`), journal entries, log files, and raw documents are never touched; schema-owned scaffolding files (`memory/INDEX.md`, `wiki/INDEX.md`) ARE rewritten because they are template-owned routing/structure files. Missing template-owned files trigger a backfill path (label `[new file]` in the diff preview). 8 polish items from PR 1 adversarial review land in this PR: `DEFAULT_AGENTS_ROOT` is now lazy-computed in `get_agents_root()` so test monkeypatches of `HOME` work correctly; `_doctor_handoff` splits its try/except around `run_doctor` and `render_human` so a render-phase failure surfaces the exit code rather than masquerading as inconclusive; `_from_template` gains a symmetric length check before the regex match so a too-long name produces the correct error message; `_test_call` extracts an `_types(mod, *names)` helper for the exception catalog so the dispatch reads cleanly and tolerates missing SDK installs; `_translate_oserror` adds a specific message for `errno.ENOENT` ("the folder disappeared between collision check and overwrite") so operators get actionable guidance instead of a generic write failure; `_walk_traversable` converts from recursion to iterative deque with a `MAX_TEMPLATE_DEPTH=16` cap to defend future deep template trees; backup and staging directory timestamps now include microseconds (`%Y%m%dT%H%M%S_%fZ`) so two simultaneous overwrites within the same second produce distinct names; the persona-backend warning prints a credential-redacted URL via a new `redact_url_credentials` helper (drops `user:pass@` from netloc while keeping scheme + host + path visible, fixing the original advisor-warning pattern that hid the host entirely). PR 1 adversarial Round 2 deferred a `KeyboardInterrupt` cleanup gap (the existing `except Exception` did not catch KeyboardInterrupt because it inherits from BaseException, not Exception); PR 2 restructures all wizard cleanup paths to `except BaseException` with explicit `KeyboardInterrupt` re-raise after cleanup, plus an outermost handler in `run_init` that prints `"Canceled."` and returns exit code 130 (when KI lands mid-`_commit_merges`, the per-commit summary already printed the written vs pending file list). `--from-template ` and `--list-templates` are now fully CI-friendly: the non-TTY guard and the API key pre-flight both carve out these paths (per amended spec/35 MUST 2, 7, and 11), so a CI build can scaffold an agent without an interactive terminal and without the operator setting `ANTHROPIC_API_KEY` at build time (doctor catches the missing key later at first run). `_cmd_list_templates` enumerates all three templates with one-line descriptions. The action class glosses in spec/28 amend to explicitly classify web search HTTP GETs as `read_only` (they do not change external state), so researcher templates use the same Cautious preset as advisor without paying judge_required overhead on every search query. spec/35 grows to 15 normative MUSTs with MUST 5 updated to distinguish operator-authored data files from schema-owned scaffolding files (memory/INDEX.md and wiki/INDEX.md are template-owned and ARE rewritten; operator notes, journal, log, raw are not) and a new MUST 15 documenting the section-detection algorithm and file-level merge contract. New constants in `atomic_agents/init/constants.py`: `TEMPLATE_PRESET_DEFAULTS` per-template preset map, `TEMPLATE_SECTION_SCHEMA` per-template per-file h2 header schema (the source of truth that Add-to-it section detection validates against), `MAX_TEMPLATE_DEPTH`, three new MSG_ constants for the new recovery flow paths, and the `redact_url_credentials` helper. New `atomic_agents/init/templates/researcher/` (7 files, 360 LOC) and `atomic_agents/init/templates/writer/` (7 files, 352 LOC) trees use the same 12 locked template variables as advisor with template-specific section schemas. 59 net new tests across `test_init_cli.py` (--list-templates enumerates 3; researcher and writer choices accepted; unknown still rejected), `test_init_wizard.py` (section detection state machine; Add-to-it dispatch + detection; detection-failure fallback to cancel; file-level merge success + failure + KI-safe; _split_sections/_join_sections round-trip; _compute_merged_content preamble preservation + orphan h2 position + h3 preservation; CRLF + BOM normalization; all 8 polish items; per-template preset dispatch), `test_init_templates.py` (researcher + writer schema conformance; structural conformance per spec/01; ALL template variables used; zero em dashes per template; Operating mode sections present), and `test_init_smoke.py` (researcher and writer e2e happy paths; --from-template works without API key per the P3 carve-out). Test suite: 2939 + 50 skipped to 3051 collected, zero regressions. Pre-impl prep (4 parallel subagents) caught 13 SEVERE + 15 HIGH + 17 MEDIUM + 13 LOW = 58 findings across the brief BEFORE any code shipped (exceeding PR 1 of #94's 53 findings in a smaller scope), including 6 SEVERE locks emerged from cross-corroboration (known_templates not expanded with cli.py choices, `_default_template_vars` hardcoding the advisor preset for all templates, Operating mode section missing from IDENTITY.md across all templates, KeyboardInterrupt bypassing the cleanup block, TEMPLATE_SECTION_SCHEMA values not locked in the brief, spec/35 MUST 5 + MUST 15 text not drafted in the brief). Three architectural decisions surfaced via AskUserQuestion gates: web search action class (locked to read_only with spec/28 amendment), writer template model default (Sonnet primary + Haiku fallback with Opus upgrade path documented inline), and --from-template carve-out from MUST 7 (no API key required for scaffold; CI-friendly end-to-end). With PR 2 of the arc landing, Issue #94 closes: the half-day home-user deploy is now an under-10-minute first-run experience with three starter shapes plus a non-destructive recovery flow for re-running on existing agents. - **`atomic-agents init` wizard** ([#94](https://github.com/dep0we/atomic-agents-stack/issues/94) -- init-wizard arc **PR 1 of 2**). Operators can now run `atomic-agents init ` and have a callable home-user agent in under 10 minutes, including time spent thinking about what the agent should be. The wizard walks seven structured questions (name, mission, scope in/out, autonomy, voice, communication preferences, hard refusals), composes `persona/{IDENTITY,SOUL,USER}.md` + `tools.md` + `model.md` + `memory/INDEX.md` + `wiki/INDEX.md` deterministically from the answers, creates empty `journal/` and `log/` directories, and ends with a `doctor` health check on the new agent followed by an opt-in test call against the configured LLM. `--from-template advisor` skips the interview and scaffolds a Caldwell-shaped starter agent in under 30 seconds. `--list-templates` enumerates available templates. Re-running `init` on an existing agent name offers Overwrite (atomic backup+restore: rename existing to `.bak.`, write fresh, rmtree backup on success, restore on failure) or Cancel (default; pressing Enter is a no-op exit). The wizard refuses non-interactive terminals on the interactive Q&A path with a plain-English pointer to `--from-template`; `--from-template ` and `--list-templates` work in CI without a terminal. The opt-in test call catches `anthropic.RateLimitError`, `anthropic.AuthenticationError`, `anthropic.APIConnectionError` / `httpx.ConnectError`, `AtomicAgentsError`, and generic `Exception` with operator-friendly messages; every exception path exits status 0 (the scaffold succeeded; the call is best-effort). The wizard warns before any file write when `ATOMIC_AGENTS_PERSONA_BACKEND_URL` is set non-empty (the case where wizard output diverges from PersonaBackend's view); decline exits 0 with zero files written. ANTHROPIC_API_KEY pre-flight uses `_llm._get_key` directly so operators with the key in macOS Keychain or `~/.config/atomic_agents/keys.json` are not false-negatived. Closes the half-day deploy: the brief's seven pain points compress from approximately 4-5 hours total to approximately 5-10 minutes, with `mcp.md` configuration (pain 5) the only one explicitly deferred (`doctor` skips cleanly when absent). `rich` adopted as the canonical operator-facing CLI rendering library (documented in spec/35 as the rendering primitive future arcs migrate `doctor`, `bundle`, and `corpus` output to). spec/35 ships with 14 normative MUSTs that the implementation honors and that the adversarial review army verifies. New `atomic_agents/init/` package with `wizard.py` (the interactive flow), `constants.py` (the single source of truth for action class vocabulary, template variable names, error messages, reserved names, and the `agent_name` regex), and `templates/advisor/` (seven `.md` files using `string.Template` `${var}` substitution via `safe_substitute`). cli.py edits are bounded to one lazy import (inside `_cmd_init`, matching the existing pattern at `_cmd_doctor` and `_cmd_persona`), one `sub.add_parser("init", ...)` block with arguments, one dispatch case in the doctor/persona/corpus early branch, plus two Usage / Subcommands docstring lines. 50 net new tests across 5 files (`test_init_cli.py` argparse + dispatch, `test_init_wizard.py` Q1-Q7 + Q4 preset/customize + non-TTY + persona-backend warning + collision recovery + OSError translation + template substitution + agents_root single-resolution, `test_init_templates.py` advisor structure + locked variable conformance, `test_init_smoke.py` end-to-end with mocked LLM, `test_init_wheel_install.py` opt-in wheel build + install verification gated by `RUN_WHEEL_INSTALL_TESTS=1`). Test suite: 2889 + 48 skipped to 2939 + 50 skipped, zero regressions. Pre-impl prep (4 parallel subagents) caught 8 SEVERE + 21 HIGH + 16 MEDIUM + 8 LOW findings across the brief BEFORE any code shipped, including the Q4 action-class vocabulary mismatch (the brief used shorthand `audit`/`judge` where spec/28 defines `allow_with_audit`/`judge_required`), the hatchling force-include misconfiguration (templates auto-include via existing `packages = ["atomic_agents"]`; the brief's "add a force-include line" was a no-op or build break), the pre-flight resolver chain incompleteness (Keychain and `keys.json` operators got false-negative on env-var-only check), the USER.md "Things to avoid" section missing (Q7 originally routed only to tools.md; now renders to both with surface-appropriate phrasing per the persona-vs-enforcement separation), and the overwrite atomicity gap (`rm -rf` then write is not crash-safe; backup+restore preserves operator work). diff --git a/docs/spec/36-mcp-server-registry-backend.md b/docs/spec/36-mcp-server-registry-backend.md new file mode 100644 index 0000000..3601fbe --- /dev/null +++ b/docs/spec/36-mcp-server-registry-backend.md @@ -0,0 +1,696 @@ +# spec/36: MCPServerRegistryBackend Protocol + +> **Status:** DRAFT. Not locked until PR 5. Lock happens when HTTP write paths ship and the conformance suite covers all 10 MUSTs across both reference implementations. + +--- + +## Origin + +Carved out from [#64](https://github.com/dep0we/atomic-agents-stack/issues/64) (`ToolRegistryBackend`) per spec/25 Decision 3. Filed as [#201](https://github.com/dep0we/atomic-agents-stack/issues/201) before #64 PR 4 merged. The twelfth and final backend protocol for v1.0. + +**Current arc state (2026-06-01 EOD):** Eleven of twelve backend protocols shipped. CorpusBackend (#65) arc closed at PR #316 (`e91286c`); only MCPServerRegistryBackend remains for v1.0. The seven-clean-ship streak in the post-#285-revert period (#286, #293, #294, #297, #298, #304, #316) holds; PR 1 of this arc extends the streak to 8. + +**Cross-links:** +- spec/19. MCP integration. `MCPServerSpec` dataclass shape unchanged. `MCPClientPool` keeps subprocess lifecycle. +- spec/21. LockBackend. `LockBackend.acquire(name, timeout)` signature used in filesystem install/uninstall. +- spec/24. AgentProfileBackend. `mcp_md_raw` snapshot pattern unchanged; `mcp_servers_resolved` sibling field added (Decision 9). +- spec/25. ToolRegistryBackend. Decision 3 carve-out is the origin of this arc. +- spec/32. PolicyBackend. `_policy_backend_was_explicit` delegate-threading precedent. +- spec/33. PersonaBackend. Most recently locked spec; Implementer Contract structure used as template. +- spec/34. CorpusBackend. `@property capabilities` declaration and 9-MUST contract used as template. + +--- + +## Shipping plan (5 PRs, revised per Decision 6) + +- **PR 1.** Protocol scaffolding + dataclasses + `FilesystemMCPServerRegistryBackend` reference impl + spec/36 DRAFT + ~60 conformance and filesystem-specific tests + `atomic-agents mcp-registry list/show/validate/refresh-capabilities` CLI (read-only subcommands). +- **PR 2.** `agent.py:_load_config()` wiring + per-runner kwargs on `OutcomeRunner`/`EvalRunner`/`DreamRunner` + `delegate.py` explicit-only threading + `AgentProfile.mcp_servers_resolved` sibling field (spec/24 addendum) + IRON RULE byte-identical regression suite + `doctor.check_mcp_server_registry_backend` coherence check + ~35 tests. Filesystem-only at this PR; the audit-snapshot pattern is proven before HTTP introduces additional risk. +- **PR 3.** Filesystem CLI install/uninstall subcommands + `_io.atomic_write` + `LockBackend` lease integration + ~15 tests. Filesystem-only write path; the LockBackend acquisition pattern is proven before HTTP introduces wire-format atomicity concerns. +- **PR 4.** `HTTPMCPServerRegistryBackend` reference impl (read-only mode: `list/load/load_all/validate`) + tier-1/2/3 capability negotiation + lazy probe with `probe_failure_cache_s` + fail-closed wiring + auth (bearer token via env var) + URL credential redaction + ~31 tests. HTTP backend arrives with the audit pattern and LockBackend pattern already proven. +- **PR 5.** HTTP install/uninstall (write path with capability gating) + tier-3 audit capability advertisement + spec/36 LOCKED at 10 MUSTs + arc-closer CHANGELOG + status flip to "Twelve of twelve backend protocols shipped" across CLAUDE.md, README.md, ROADMAP.md + v1.0 RELEASE candidate. + +--- + +## Overview + +`MCPServerRegistryBackend` is the **twelfth** open Protocol in the protocol-pattern series (Memory, LLM, Judge, Lock, Log, AgentProfile, ToolRegistry, Mandate, Policy, Persona, Corpus, **MCPServerRegistry**). It abstracts the set of MCP servers this agent is configured to use behind a Protocol so the framework's core stays small and alternate catalog substrates (HTTP-backed, SaaS tenant catalog, org-internal registry) drop in without forking. + +`AtomicAgent` exposes `agent.mcp_server_registry_backend: MCPServerRegistryBackend`. The existing `MCPClientPool` (`atomic_agents/mcp.py:126`) stays as the runtime subprocess-lifecycle class. The two seams compose: + +``` +backend.list_mcp_servers() -> list[MCPServerRef] # this agent's mounted servers (metadata) +backend.load_mcp_server(name) -> MCPServerSpec # one mounted server (full spec) +backend.load_all_mcp_servers() -> list[MCPServerSpec] # all this agent's mounted specs (bulk) +agent.mcp_pool = MCPClientPool(specs, agents_root) # subprocess lifecycle (spec/19, unchanged) +``` + +The Protocol is **not** a generic MCP-server-management API and is **not** an org-wide catalog browser. It is the minimal contract the framework needs to keep the markdown-as-config aesthetic (CLAUDE.md §7) and the audit trail invariant (CLAUDE.md §5) intact while making SaaS, org, and public-registry futures real at v1.0. + +**Backwards-compatibility promise.** Existing `/mcp.md` files work unchanged. Filesystem backend wraps the existing `parse_mcp_md()` semantics. All 166 existing `AtomicAgent(...)` construction sites observe byte-identical behavior when no backend is configured. Wiring lands in PR 2. + +--- + +## Why MCP servers need a Protocol seam + +Today, an atomic agent's MCP server set is declared in `/mcp.md` per spec/19. The `parse_mcp_md()` function at `atomic_agents/mcp.py:410` walks the file and produces a `list[MCPServerSpec]`; `MCPClientPool(_specs, agents_root)` consumes the list, manages subprocess lifecycle (connect_all in try, disconnect_all in finally), and registers MCP-provided tools into the agent's `ToolRegistry`. This works on a single host where each operator hand-maintains their agent's `mcp.md`. + +It breaks the moment the deployment shape becomes: + +- **SaaS multi-tenant catalog.** A hosted dashboard publishes a vetted set of MCP servers per tenant; each tenant's agents subscribe. The framework needs a catalog API, not a per-agent file walk. +- **Org-internal vetted registry.** A company posts a manifest of approved MCP servers with mandatory audit logging, capability flags, and allow/deny semantics. Agents subscribe over HTTP. The framework needs to consume the catalog without owning storage. +- **Public registry / package-registry-style discovery.** The MCP ecosystem is publicly discussing a "server registry" protocol analogous to npm or PyPI for MCP servers. Agents discover available servers over HTTP. The framework needs a catalog Protocol that survives upstream protocol evolution. +- **Cross-agent install audit.** "Operator A enabled server X for tenant T at 2026-04-01" needs a catalog-level audit trail. Today's per-agent `mcp.md` doesn't have one. + +The Protocol is the **mounted server source** seam for MCP: it returns the set of MCP servers THIS AGENT uses, scoped per-agent via `agent_root` (filesystem) or `agent_scope` (HTTP / SaaS). The catalog-server's view of "what's available org-wide" lives upstream of the framework in the catalog server's own admin tooling; the framework's HTTP backend consumes only the agent's mounted subset. + +--- + +## Module layout + +``` +atomic_agents/mcp_registry/ +├── __init__.py # registry: register_mcp_server_registry_backend / +│ # get_mcp_server_registry_backend / +│ # list_mcp_server_registry_backends + +│ # get_default_mcp_server_registry_backend factory + +│ # _redact_for_error_message helper +├── types.py # canonical types: MCPServerRef, MCPServerRegistryCapabilities, +│ # ValidationResult +├── backend.py # MCPServerRegistryBackend Protocol contract + exception classes + +│ # _default_load_all helper +├── filesystem.py # FilesystemMCPServerRegistryBackend reference implementation +└── http.py # HTTPMCPServerRegistryBackend reference implementation + + # tier negotiation +``` + +Package name `mcp_registry` (not `mcp` since spec/19 owns that; not `registry` since spec/25 owns that). The split into `types.py` separate from `backend.py` matches the precedent across `profile/`, `logs/`, `registry/`, `corpus/`, `persona/`. + +Per the project's `register_backend_placement_convention` learning (2026-05-29): `register_mcp_server_registry_backend` lives in `__init__.py` alongside the factory and redaction helper, matching the dominant pattern (6 of 10 backends including `locks/`, `logs/`, `profile/`, `registry/`, `judge/`, and `corpus/`). + +--- + +## Load-bearing design decisions + +These decisions are surfaced by reading the existing MCP code surface (`atomic_agents/mcp.py:84` for `MCPServerSpec`; `agent.py:2337` for the discovery overlay; `parse_mcp_md` for env-var resolution and path-traversal validation) and pressure-tested against the recurring trap shapes from the 11 prior arcs. Each has a Why clause that future contributors and alternate backend authors need before changing the shape. + +### Decision 1: `MCPServerRegistryBackend` owns the mounted-server-source layer; `MCPClientPool` keeps subprocess lifecycle + +`atomic_agents/mcp.py:MCPClientPool` (line 126) is the per-agent subprocess pool used during `call()` (connect_all in try; disconnect_all in finally). It stays. `MCPServerRegistryBackend` lives one level above: it is the **mounted server source** (what MCP servers THIS AGENT uses) that produces `MCPServerSpec` instances which then get consumed by the pool at agent construction. + +**Per-agent scoping is structural, not optional.** Filesystem backend is scoped to `agent_root` (one `mcp.md` per agent). HTTP backend is scoped to `agent_scope` (every wire request includes `?agent_scope=`; the catalog server filters to the mounted subset for that scope server-side). A backend that returns the org-wide "what's available to install" surface from `list_mcp_servers()` is non-conformant; that is the catalog server's own admin tooling, not the framework's responsibility. + +**Why:** spec/19's `connect_all` / `disconnect_all` discipline is locked. The `finally`-block teardown invariant prevents process leaks; replacing it would touch every code path that spawns or tears down a subprocess. Mounted-server-source and lifecycle are different concerns; merging them violates the layers-compose principle (CLAUDE.md §3) and inflates the Protocol surface with subprocess-management methods that filesystem-backed sources don't need. The pattern parallels `ToolRegistryBackend` (spec/25): the Protocol covers which tools this agent uses, not the in-memory invocation machinery. + +**Framework-level behavior on catalog unreachable:** `_load_config()` MUST re-raise `MCPRegistryUnavailable` if `load_all_mcp_servers()` fails at agent construction (fail-closed per Decision 1). No silent fallback to a different backend; no soft-degrade to empty pool. Operator's explicit backend pin is respected. The operator-facing error message names the resolved backend with credentials redacted per MUST 4. + +### Decision 2: `mcp.md` is the filesystem reference's file format, NOT the universal Protocol interface + +Today's `/mcp.md` (spec/19) stays unchanged. Future `HTTPMCPServerRegistryBackend` / `SaaSMCPServerRegistryBackend` / `OrgRegistryMCPServerRegistryBackend` backends do NOT need an `mcp.md` file. They use a CLI (`atomic-agents mcp-registry install`) or a dashboard. The mcp.md format is preserved as the filesystem reference's documented file format, not as the Protocol's universal interface. + +**Why:** the catalog is what's available; the file is one of many possible storage shapes. Locking mcp.md as the universal interface would force a SaaS deployment to invent a file just to satisfy the Protocol. Same shape as `ToolRegistryBackend` (filesystem reads `/tools/.{md,py}`; SQLite reads rows; HTTP reads JSON). Markdown-as-config (CLAUDE.md §7) is a per-backend aesthetic, not a Protocol-wide constraint. + +### Decision 3: Two reference implementations, filesystem and HTTP-client + +The Protocol arc ships two reference backends: + +- **`FilesystemMCPServerRegistryBackend(agent_root, read_paths)`:** wraps the existing `parse_mcp_md()` semantics; provides atomic install/uninstall via `_io.atomic_write` of `mcp.md`. +- **`HTTPMCPServerRegistryBackend(catalog_url, agent_scope, *, auth_token=None, request_timeout_s=10)`:** talks JSON over HTTPS to a catalog server; adapts to multiple server tiers via capability negotiation (Decision 4). + +**Why:** matches the README's existing v1 promise. The HTTP-client reference makes "behind an HTTP service" concrete for the first time. SQLite was considered as the second ref impl (matches the 11-arc precedent) but rejected because MCP catalogs in production are HTTP-shaped, not SQLite-shaped. The three non-filesystem futures (SaaS tenant catalog, org-internal vetted registry, public package registry) are all HTTP. Shipping SQLite would have been precedent-following without serving the actual user shape. Per `feedback_atomic_agents_best_not_cheapest`: default to BEST. HTTP is BEST here. + +### Decision 4: HTTP backend uses tier-negotiated capability handshake; ONE implementation adapts to multiple server tiers + +The framework's HTTP backend works against three documented catalog-server shapes: + +- **Tier 1: Read-only public catalog.** Server implements `GET /mcp-servers` and `GET /mcp-servers/` (REQUIRED). `GET /mcp-servers//validate` is OPTIONAL; if absent, HTTP backend returns `ValidationResult(ok=False, errors=["catalog server does not implement /validate; cannot statically validate"], warnings=[])`. Auth optional. Use case: a company posts a manifest of vetted MCP servers behind nginx; tenants read it; install lives elsewhere. +- **Tier 2: Read-write authenticated catalog.** Tier 1 plus `POST /mcp-servers` and `DELETE /mcp-servers/`. Bearer-token auth required. Per-tenant scoped storage via the `agent_scope` constructor argument. Use case: SaaS deployment where each tenant has their own catalog; framework's CLI is the canonical install surface. +- **Tier 3: Admin catalog with capability advertisement.** Tier 2 plus `GET /capabilities` (advertises audit logging, custom capability flags, server-side validation policies). Use case: enterprise deployments with compliance requirements. + +The framework ships ONE `HTTPMCPServerRegistryBackend` implementation that probes capabilities at first non-construction call (lazy; per MUST 2 in the Implementer Contract). The probe is side-effect-free and uses HTTP-standard method-discovery semantics. + +**Capability probe sequence (deterministic, side-effect-free):** + +1. `GET /capabilities`: if 200, parse server tier from response body. Expected JSON shape: `{"tier": , "supports_install": bool, "supports_uninstall": bool, "supports_audit": bool, "wire_version": ""}`. This is authoritative; values from this endpoint win. +2. If `GET /capabilities` returns 404 (server doesn't implement the endpoint), fall back to method discovery via `OPTIONS /mcp-servers`. The HTTP-standard `Allow` response header lists supported methods. Tier inference: `Allow: GET` only means tier 1 (read-only); `Allow: GET, POST, DELETE` means tier 2 (read-write). Tier 3 is only detectable via the explicit `GET /capabilities` endpoint; absence of `/capabilities` implies the server is at most tier 2. +3. If `OPTIONS /mcp-servers` returns 404 or 405 (server doesn't implement OPTIONS), default to **tier 1 (read-only)**. Conservative fallback; the operator can run `atomic-agents mcp-registry refresh-capabilities` after confirming the catalog server supports more. +4. If `GET /capabilities` OR `OPTIONS /mcp-servers` returns 5xx or fails with a network error, raise `MCPRegistryUnavailable`. Backend caches the failure for `probe_failure_cache_s` seconds before re-probing on the next call; explicit `.refresh_capabilities()` always bypasses the cache. +5. If `GET /capabilities` returns 401 (auth required and `auth_token` absent), raise `MCPRegistryAuthRequired`. Caching applies. + +The backend's `capabilities` property returns the **runtime view** reflecting what the connected server actually allows. Static class capabilities (`supports_install=True` as the class default) are distinct from runtime capabilities (`supports_install` may report False if the catalog server is tier 1). + +**Mid-session tier regression (cached capability stale).** If a tier-2 server later regresses to tier 1 (e.g., admin disables writes), the backend's cached capability is stale. Behavior contract: a stale `supports_install=True` followed by a `POST /mcp-servers` that returns 405 from the now-tier-1 server triggers an inline re-probe (one extra round-trip), updates the cached capabilities, and raises `NotImplementedError` to the caller (consistent with how a statically-False capability behaves). No silent retry; the operator-facing error message names the tier change explicitly. + +**Why:** the wire-format-divergence risk vs. upstream MCP ecosystem registry-protocol discussions becomes manageable when the framework's HTTP backend can adapt across multiple server shapes. When upstream MCP ships a registry protocol, it slots in as another tier (tier 4+) without breaking tiers 1-3. spec/36 v1.0 documents the spectrum; future tiers are additive, not revisions. This is the structural escape hatch, not just a soft mitigation. + +### Decision 5: Unified install path across all backends, with capability flags that evolve as methods land + +Both reference implementations target `supports_install=True` as the **eventual static class default**, but the flag flips True only at the PR where the method ships. Capability honesty (MUST 3) is preserved at every PR by aligning the static class default with the method's implementation state: + +| Backend | PR 1 | PR 2 | PR 3 | PR 4 | PR 5 | +|---|---|---|---|---|---| +| `FilesystemMCPServerRegistryBackend.capabilities.supports_install` | False (no install method yet) | False (still no method) | **True (method lands)** | True | True | +| `HTTPMCPServerRegistryBackend.capabilities.supports_install` | n/a | n/a | n/a | **False static class default** (read-only mode at PR 4) | **True static class default** (dynamic per tier) | + +``` +atomic-agents mcp-registry install github # works on filesystem from PR 3 onward +atomic-agents mcp-registry install github # works on HTTP tier 2+ from PR 5 onward +``` + +**Why staggered class defaults:** MUST 3 (capability honesty) says if `supports_install=True` is reported, calling `install()` must not raise `NotImplementedError`. Shipping `supports_install=True` at PR 1 while `install()` is unimplemented would be a capability lie; the conformance suite would have to `@pytest.mark.skip` MUST 9 tests to mask it. Instead: the static class default flips True at the PR where the method first works. The conformance suite runs all 10 MUST tests at every PR; MUST 9 tests trivially pass at PR 1-2 (capability False; conformance asserts the absent-method path raises `NotImplementedError`) and become meaningful at PR 3. No skip masks; no capability lies in any PR's snapshot. + +**Why:** same operator UX across every deployment shape. The framework's CLI is the canonical install surface. The README's "same agent definitions, same call flow, different backends" promise extends to operator commands at v1.0. Per `feedback_no_scaling_down_in_planning`, the full vision is the target. + +### Decision 6: Static-vs-dynamic capability flags + +Capability flags on `MCPServerRegistryCapabilities` distinguish two concepts: + +- **Static (class-level):** what the backend's class is capable of doing. `FilesystemMCPServerRegistryBackend` and `HTTPMCPServerRegistryBackend` both static-support install and uninstall. +- **Dynamic (runtime):** what the connected resource actually allows right now. For filesystem, static equals dynamic (no remote dependency). For HTTP, dynamic depends on the catalog server's tier; the backend negotiates and reflects the truth. + +**Why:** filesystem backend has no remote dependency, so its `capabilities` property is a constant. HTTP backend's runtime depends on the catalog server's response to the capability probe; static-only capability flags would lie. The capability honesty MUST (#3 in the Implementer Contract) requires claim-vs-behavior parity, which means HTTP backend's `capabilities` property MUST reflect the runtime, not the static class default. This is novel relative to the 11 prior arcs (which all had static-equals-dynamic capabilities). The Implementer Contract documents it explicitly so future Postgres / SaaS / Redis backends know which kind of capability flags they own. + +### Decision 7: Env-var references resolve at `load_mcp_server` time on ALL backends + +Today's `parse_mcp_md()` resolves `$VAR` references in `env:` lines at parse time, against the framework's process environment. The resolved values land in `MCPServerSpec.env`. Under the Protocol: + +- All backends MUST resolve `$VAR` references at `load_mcp_server(name)` time, against the client process environment. +- HTTP catalog servers MAY store unresolved `$VAR` strings (the wire format permits them); the framework's HTTP client resolves at materialization. +- Filesystem backend preserves today's behavior but moves the resolution from `parse_mcp_md` time to `load_mcp_server` time. + +**Why:** substrate-agnostic security. An HTTP catalog server returning resolved values from its OWN process env would silently leak secrets across deployment boundaries (the server's `$GITHUB_PAT` is not the client's `$GITHUB_PAT`). Client-side resolution is the only secure default. + +**Implementation note (spec/19 addendum):** the existing `parse_mcp_md_text()` at `atomic_agents/mcp.py:432` resolves `$VAR` references inline inside `_build_spec()` at `mcp.py:558-566`. PR 1 adds an optional `resolve_env: bool = True` parameter to `parse_mcp_md_text` (and `_build_spec`); existing callers (`parse_mcp_md`, `doctor.py:678`) keep the True default and observe byte-identical behavior. `FilesystemMCPServerRegistryBackend` calls with `resolve_env=False` so spec values come back with unresolved `$VAR` strings, then performs resolution itself inside `load_mcp_server(name)`. spec/19 gets an addendum note pointing to spec/36's resolution-timing contract; no normative change to spec/19's existing "resolved at parse time" claim for direct `parse_mcp_md` callers. + +### Decision 8: Path-traversal validation moves inside `load_mcp_server` on the filesystem backend + +Today `parse_mcp_md(read_paths)` validates path-shape args at parse time (per spec/19 §"Path-traversal best-effort"). Under the Protocol: + +- Filesystem backend captures `read_paths` at construction (`FilesystemMCPServerRegistryBackend(agent_root, read_paths)`). +- `load_mcp_server(name)` applies the path-traversal check at materialization time. +- The Protocol surface `load_mcp_server(name) -> MCPServerSpec` does NOT take a `read_paths` parameter; per-backend security policy can vary. + +**Why:** the universal Protocol surface stays clean. `read_paths` is a filesystem-only concept (HTTP catalog server's responses arrive as text, with no filesystem-path semantics; the catalog server is trusted to have validated its own content). Leaking `read_paths` into the Protocol would force HTTP / SaaS backends to fake a value they don't have. + +### Decision 9: `AgentProfile` gains a sibling `mcp_servers_resolved` field; `mcp_md_raw` stays for backward-compat (spec/24 D1 addendum) + +The existing per-agent snapshot discipline (`AgentProfile.mcp_md_raw` captures the unresolved mcp.md text at snapshot time, per spec/24 Decision 1) stays unchanged for filesystem backend. For HTTP / SaaS backends, `mcp_md_raw` would be empty/`None` and the audit snapshot would NOT record what specs the agent actually ran with. That breaks the framework's audit-as-structural invariant (CLAUDE.md §5). + +**Fix (spec/24 addendum, additive):** AgentProfile gains a sibling field: + +```python +mcp_servers_resolved: list[MCPServerSpec] = field(default_factory=list) +``` + +`agent.py:_load_config()` populates this field with the materialized MCPServerSpec list (the same list passed to `MCPClientPool`) at agent construction, BEFORE the pool consumes them. For filesystem: both fields populate; the audit trail has redundant information (text and parsed specs). For HTTP: only `mcp_servers_resolved` populates; the audit trail still answers "what specs did this agent run with at time T?" + +**Why:** the framework promises "audit trail is structural" (CLAUDE.md §5) and "same agent definitions, same call flow, same audit trail, different backends" (README throughline). A sibling field substrate-agnostic-by-construction is the minimal additive fix. spec/24 LOCK gets a small addendum; the existing `mcp_md_raw` field stays for backward-compat so legacy callers see byte-identical behavior on filesystem. + +--- + +## Canonical types + +All types are defined in `atomic_agents/mcp_registry/types.py`. + +### `MCPServerRef`: the unit of catalog listing + +```python +@dataclass(frozen=True) +class MCPServerRef: + name: str # operator-chosen short name (matches MCPServerSpec.name) + description: str = "" # operator-readable note (defaults to empty per MCPServerSpec parity) + transport: str = "stdio" # catalog filtering by transport + version: str | None = None # reserved (matches ToolRegistry Decision 4) + source: str = "" # backend-specific origin marker +``` + +`MCPServerRef.to_dict()` / `from_dict()` round-trip is byte-shape preserving for every field. The Ref carries metadata only; it does NOT include `command` / `args` / `env` (those are part of the materialized `MCPServerSpec` from spec/19, returned by `load_mcp_server(name)`). This lazy/eager distinction matches `ToolRegistryBackend` Decision 5. + +**Projection from `MCPServerSpec`:** `install(spec) -> MCPServerRef` constructs the returned Ref by projecting `name`, `description`, `transport` from the input `MCPServerSpec`; `version` defaults to None; `source` is set by the backend (e.g., filesystem returns `source=f"mcp.md#section:{name}"`; HTTP returns `source=f"{catalog_url}/mcp-servers/{name}"`). The projection is mechanical; the conformance suite asserts the round-trip. + +### `MCPServerRegistryCapabilities` + +```python +@dataclass(frozen=True) +class MCPServerRegistryCapabilities: + supports_install: bool # filesystem: static True (at PR 3+); HTTP: dynamic per tier + supports_uninstall: bool # filesystem: static True (at PR 3+); HTTP: dynamic per tier + supports_capability_handshake: bool # True only on HTTP backend (Decision 4) + supports_audit: bool # reserved; tier-3 HTTP servers flip True; both ref impls return False at v1.0 + durable: bool # True on both ref impls + # durable=False is reserved for future in-memory test-fixture backends +``` + +Conformance tests assert claim-vs-behavior parity (per Implementer Contract MUST 3). Mirrors `ProfileCapabilities` / `LogCapabilities` / `CorpusCapabilities` precedent. + +### `ValidationResult` + +Same shape as `ToolRegistryBackend` (spec/25 §"Canonical types"): + +```python +@dataclass(frozen=True) +class ValidationResult: + ok: bool # equivalent to `not errors` + errors: list[str] # server unusable + warnings: list[str] # server usable but flagged +``` + +--- + +## `MCPServerRegistryBackend` Protocol surface + +```python +@runtime_checkable +class MCPServerRegistryBackend(Protocol): + @property + def backend_id(self) -> str: ... + + # Core discovery (always implemented) + def list_mcp_servers(self) -> list[MCPServerRef]: ... + def load_mcp_server(self, name: str) -> MCPServerSpec: ... # MCPServerSpec from spec/19 + def load_all_mcp_servers(self) -> list[MCPServerSpec]: ... # bulk; default impl iterates list+load + def validate(self, name: str) -> ValidationResult: ... + + # Capability-gated lifecycle + def install(self, spec: MCPServerSpec) -> MCPServerRef: ... + def uninstall(self, name: str) -> None: ... + + @property + def capabilities(self) -> MCPServerRegistryCapabilities: ... + + def refresh_capabilities(self) -> MCPServerRegistryCapabilities: ... + def close(self) -> None: ... +``` + +**Property vs method:** `capabilities` is a `@property` (read like an attribute: `backend.capabilities.supports_install`); `refresh_capabilities()` is a method (call to trigger re-probe: `backend.refresh_capabilities()`). This matches CorpusBackend at `corpus/backend.py:70-71`. All design call sites and test assertions use the property syntax `backend.capabilities.supports_X`, NOT `backend.capabilities().supports_X`. + +`load_all_mcp_servers()` exists for the agent-construction hot path (avoids N+1 HTTP round-trips on materialization). Default impl in `mcp_registry/backend.py`: + +```python +def _default_load_all(backend: MCPServerRegistryBackend) -> list[MCPServerSpec]: + return [backend.load_mcp_server(ref.name) for ref in backend.list_mcp_servers()] +``` + +Backends MAY override for performance; HTTP backend overrides with a single bulk GET (`GET /mcp-servers?expand=spec`). Backends overriding MUST preserve consistency: `load_all_mcp_servers()` output is semantically equivalent to the default-impl iteration (MUST 10). + +`refresh_capabilities()` is on the Protocol surface (not HTTP-backend-specific) so the CLI does not duck-check. Filesystem implementation: returns the cached static `MCPServerRegistryCapabilities` instance (no-op refresh; static capabilities don't change). HTTP implementation: re-runs the capability probe sequence (bypassing any cache) and returns the updated runtime view. + + + +### `list_mcp_servers` semantics + +- Returns `[]` (NOT raise) when catalog is empty or backing resource is absent. +- Lexicographic order by `name`. Database / HTTP backends MUST sort consistently. +- Cheap by construction: no subprocess spawn, no handler import, no MCP server connection. + +### `load_mcp_server` semantics + +- Returns a fully-populated `MCPServerSpec` (the spec/19 dataclass) with all 6 fields populated. +- MUST raise `MCPServerNotInRegistry` when `name` is absent from the catalog. +- MUST validate `name` against path-traversal at the API boundary BEFORE any backend access. +- MUST resolve `$VAR` env-var references against the client process environment (per Decision 7). +- Unresolvable `$VAR` reference raises `MCPServerConnectFailed` (matches spec/19's existing exception shape; the registry layer does not invent a new exception class for env-var failures). + +### `validate` semantics + +- Static check; does NOT spawn the MCP server subprocess (matches `ToolRegistryBackend` Decision 6). +- Filesystem implementation checks: descriptor parses; `command` exists on PATH (best-effort `shutil.which`); `$VAR` references resolve; `transport` value is recognized. +- HTTP implementation: calls `GET /mcp-servers//validate`; relays the server's verdict. +- Returns `ValidationResult(ok, errors, warnings)`. MUST NOT raise on missing server (returns `ValidationResult(ok=False, errors=["server 'X' not in registry"], warnings=[])` instead). + +### `install` / `uninstall` semantics + +Unlike `ToolRegistryBackend.install(source: str, version: str | None = None)` which takes a package source string for PyPI/git discovery, `MCPServerRegistryBackend.install(spec: MCPServerSpec)` takes the full spec because MCP servers don't have package-index sources at v1.0. + +- Both MAY raise `NotImplementedError` when the runtime capability flag (`capabilities.supports_install` / `supports_uninstall`) reports False. Filesystem always allows (at PR 3+); HTTP depends on tier. +- `install(spec)` MUST be atomic at the server-name level (per MUST 9). Filesystem: atomic edit of `mcp.md` via `_io.atomic_write` with collision detection. HTTP: `POST /mcp-servers` returning 409 on collision; backend translates to `MCPServerAlreadyInstalled`. +- `uninstall(name)` MUST be idempotent. Uninstalling a name that doesn't exist is a no-op (no exception). Matches `ToolRegistryBackend` uninstall precedent. +- Both MUST validate `name` against path-traversal at the API boundary. + +### `capabilities` semantics + +- Returns `MCPServerRegistryCapabilities` reflecting the **runtime** view. +- Filesystem: static (constant across the backend's lifetime). +- HTTP: dynamic; reflects connected catalog server's tier. Lazy probe at first non-construction call; explicit `.refresh_capabilities()` method available for operators who want to re-probe after a server upgrade. + +### `close` semantics + +- Idempotent (matches MUST 6). +- Filesystem: no-op (no resources to release). +- HTTP: closes the underlying `httpx.Client()`. + +--- + +## Reference implementations + +### `FilesystemMCPServerRegistryBackend` + +Ships at PR 1 (read paths) and PR 3 (install/uninstall). + +```python +FilesystemMCPServerRegistryBackend( + agent_root: Path, + read_paths: list[Path], + *, + lock_backend: LockBackend | None = None, +) +``` + +- `agent_root` is the agent's directory. mcp.md lives at `/mcp.md`. +- `agent_root` MAY not exist at construction (matches `FilesystemToolRegistryBackend` precedent). `list_mcp_servers()` returns `[]` for missing / empty mcp.md. +- `read_paths` is the list of paths the agent declares it may read (from `tools.md`). Used by `load_mcp_server(name)` to apply path-traversal validation (per Decision 8). +- `lock_backend` (per MUST 9): if absent, defaults to `FilesystemLockBackend(agent_root / ".mcp_registry.lock")`. The default lock file `.mcp_registry.lock` is **distinct** from the agent's main `.lock` (which the runtime acquires inside `agent.call()` for cost-cap and run-id serialization); the two locks never overlap and cannot deadlock even on non-reentrant `LockBackend` implementations. Operators passing a custom `lock_backend` MUST scope it to `.mcp_registry.lock` (or an equivalent registry-specific resource), NOT the agent's main lock; the docstring on the constructor parameter calls this out explicitly. +- Reuses `parse_mcp_md_text()` from `atomic_agents/mcp.py:432` with the new `resolve_env=False` parameter (per Decision 7's implementation note); the backend resolves `$VAR` references itself at `load_mcp_server(name)` time. +- `name` validated against path-traversal at API boundary: refuses `/`, `\\`, `..`, leading `.`, control chars, newlines. + +**`install(spec)` atomicity (PR 3):** + +1. `lock_backend.acquire("mcp_registry", timeout=30)` (per spec/21's `acquire(name="", timeout=0.0)` signature at `locks/backend.py:82`). +2. Read current mcp.md. +3. Parse with `parse_mcp_md_text(content, resolve_env=False)`. +4. Check name collision; if present, release lock and raise `MCPServerAlreadyInstalled`. +5. Append new `## ` section to the markdown. +6. `_io.atomic_write` the merged content back (temp + fsync + rename + parent dir fsync). +7. Release lock via the returned `LockHandle.release()` (the spec/21 idiom). + +Crash between steps 1 and 6 leaves the original mcp.md intact; crash between 6 and 7 leaves the new content with a stale lock (lease expiry per spec/21's `LockLost` discipline handles recovery). + +**`uninstall(name)` atomicity (PR 3):** same shape; idempotent. Missing-name case completes steps 1-3, observes the name is absent, releases the lock, returns without raising. + +**`validate(name)`:** runs descriptor parses; `command` exists on PATH (warn if absent; do not fail validation since the agent's PATH at run time may differ); `transport` value recognized; `$VAR` refs resolve against current `os.environ` (warn if not). + +**Capabilities (PR 1/2):** `supports_install=False, supports_uninstall=False, supports_capability_handshake=False, supports_audit=False, durable=True`. Flips to `supports_install=True, supports_uninstall=True` at PR 3 when the methods land. Static; no runtime probe required. + +**`list_mcp_servers()`** MUST call `parse_mcp_md_text(content, resolve_env=False, read_paths=None)` with `read_paths=None` explicitly, NOT `self._read_paths`. Passing `self._read_paths` at list time triggers `validate_mcp_server_args` at parse time and violates Decision 8's "validation at `load_mcp_server` boundary" invariant (prep notes Theme 3). + +**`load_mcp_server(name)`** MUST call `validate_mcp_server_args(spec, self._read_paths)` from `mcp.py:589` after materializing the MCPServerSpec but before returning. Do NOT reimplement the check inline. + +**`load_all_mcp_servers()`** at PR 1 delegates to `_default_load_all(self)` (NOT a custom loop). MUST 10 conformance asserts set equality AND ordering parity; a custom loop could silently drift on sort order (prep notes Theme 4). + +### `HTTPMCPServerRegistryBackend` + +Ships at PR 4 (read-only) and PR 5 (write paths). + +```python +HTTPMCPServerRegistryBackend( + catalog_url: str, + agent_scope: str, + *, + auth_token: str | None = None, + request_timeout_s: float = 10.0, + probe_failure_cache_s: float = 60.0, +) +``` + +`request_timeout_s` (default 10s) is the per-request HTTP timeout (how long to wait for one response). `probe_failure_cache_s` (default 60s) is the failure-cache timeout (how long after a probe failure before re-probing). The two are deliberately separate: a 10s request timeout makes sense for one HTTP call; a 10s failure cache would cause thrashing on sustained catalog outages (30+ probe attempts in 5 minutes). With 60s failure cache, sustained outage produces approximately 5 probes per 5 minutes, not approximately 30. + +Constructed from the URL family via `make_http_mcp_server_registry_backend_from_url(url)` honoring the `https://[:port]/?agent_scope=` shape (default `agent_scope="default"` when the query param is absent). + +- **Side-effect-free construction.** `__init__` does NOT call the network. Capability probe is lazy (first non-construction call) per MUST 2. +- **Capability negotiation** (Decision 4): first non-construction call triggers `_probe_capabilities()`. The full probe sequence is specified under Decision 4; this section does not re-specify it. +- **Wire format (documented in spec/36 §"HTTP wire format" added at PR 4):** + - `GET /mcp-servers?agent_scope=` returns the mounted server set for this agent_scope as `{"servers": [{"name": ..., "description": ..., "transport": ..., "version": null, "source": ""}, ...]}`. Catalog servers MUST filter server-side by `agent_scope`; returning the org-wide catalog from this endpoint is non-conformant. + - `GET /mcp-servers?agent_scope=&expand=spec` returns the bulk-expansion form for `load_all_mcp_servers()`: `{"servers": [, ...]}`. Single round trip for N servers; eliminates N+1 cost at agent construction. + - `GET /mcp-servers/?agent_scope=` returns the full MCPServerSpec as JSON for the single mounted server. + - `GET /mcp-servers//validate?agent_scope=` returns `{"ok": bool, "errors": [...], "warnings": [...]}`. + - `POST /mcp-servers?agent_scope=` accepts the MCPServerSpec as JSON; returns 201 with the created Ref on success, 409 on collision. + - `DELETE /mcp-servers/?agent_scope=` returns 204 on success or absence (idempotent). Unmounts from the specified scope only. +- **Auth:** if `auth_token` provided, every request includes `Authorization: Bearer `. If absent, requests go unauthenticated. Catalog servers requiring auth respond 401; backend translates to `MCPRegistryAuthRequired`. +- **Transient failure:** network errors (DNS, connection refused, timeout) raise `MCPRegistryUnavailable`. HTTP 5xx responses also raise `MCPRegistryUnavailable`. HTTP 404 on `/mcp-servers/` raises `MCPServerNotInRegistry` (per MUST 7). +- **URL credential redaction:** if `catalog_url` contains embedded credentials (`https://user:pass@host/`), they are stripped from error messages (per MUST 4; matches the spec/22 / spec/24 / spec/25 / spec/32 / spec/34 precedent). +- **Cross-tenant isolation:** every request includes `?agent_scope=`; the catalog server filters by scope at the SQL / storage layer. Scope is hardcoded from the constructor; `install()` does NOT accept a scope parameter (per MUST 5; same shape as `SQLiteToolRegistryBackend`). +- **HTTP client library:** `httpx` via a new optional extra `[http]` in `pyproject.toml`. The HTTP backend lazy-imports `httpx` at first use; if absent, raises `ImportError` with a clear message: `"HTTPMCPServerRegistryBackend requires 'httpx'. Install with: pip install atomic-agents-stack[http]"`. Mirrors how `[redis]` is shipped for `RedisLockBackend`. + +**Capabilities:** dynamic per tier. Default class-level at PR 4: `supports_install=False, supports_uninstall=False, supports_capability_handshake=True, supports_audit=False, durable=True`. At PR 5: `supports_install=True, supports_uninstall=True` (dynamic per tier at runtime). Runtime values may differ from class defaults based on tier negotiation. + +--- + +## Exception surface + +All exceptions live in `atomic_agents/exceptions.py` and are re-exported from `atomic_agents.mcp_registry` for ergonomic access (per the PersonaBackend D-PI-1 precedent). + +- `MCPServerNotInRegistry`: `load_mcp_server(name)` called with an unknown name; HTTP 404. Distinct from `MCPServerConnectFailed` (spec/19's runtime subprocess failure). +- `MCPServerAlreadyInstalled`: `install(spec)` collided on server name; HTTP 409. +- `MCPRegistryUnavailable`: transient failure (network, file lock contention, server 5xx). Operators retry; framework does NOT auto-retry. +- `MCPRegistryAuthRequired`: HTTP 401 without `auth_token`. Operators set the env var or constructor kwarg. +- `MCPRegistryDescriptorInvalid`: mcp.md parse failure (filesystem); HTTP response body invalid JSON (HTTP). +- `BackendNotRegistered`: operator-pinned `backend_id` isn't in the registry. Matches every prior arc's `BackendNotRegistered` shape. +- `ValueError`: invalid server name (path separator, empty, parent-dir token, leading `.`, control chars). +- `NotImplementedError`: capability-gated method on a backend that doesn't support it at runtime. +- `MCPServerConnectFailed`: re-raised from `load_mcp_server` when env-var resolution fails (matches existing spec/19 exception; not a new exception class). + +--- + +## Registry + +```python +from atomic_agents.mcp_registry import ( + register_mcp_server_registry_backend, + get_mcp_server_registry_backend, + list_mcp_server_registry_backends, +) + +register_mcp_server_registry_backend("filesystem", FilesystemMCPServerRegistryBackend) +register_mcp_server_registry_backend("http", HTTPMCPServerRegistryBackend) +cls = get_mcp_server_registry_backend("filesystem") # returns FilesystemMCPServerRegistryBackend +backend = cls(agent_root, read_paths) # caller instantiates with scope +ids = list_mcp_server_registry_backends() # ["filesystem", "http"] +``` + +The registry stores **classes**, not instances (matches `ProfileBackend` + `LogBackend` + `LockBackend` + `ToolRegistryBackend` + `CorpusBackend` registries). Default `"filesystem"` and `"http"` registrations happen at import time inside `atomic_agents/mcp_registry/__init__.py`. + +--- + +## Operator surface + +MCP server registry backend choice is a **deployment-level** decision (the whole framework instance picks "filesystem" or "http" or a future SaaS backend), not an agent-author-level decision. Contrast with `mcp.md` (per-agent, declarative). + +There is no `mcp_registry.md` markdown config: the catalog backend is the layer that LOADS the catalog; circular. + +The operator surface exposes the choice via TWO paths (parallel to `LogBackend` / `LockBackend` / `ProfileBackend` / `ToolRegistryBackend` / `CorpusBackend`): + +1. **Constructor kwarg.** Programmatic operators pass `AtomicAgent(..., mcp_server_registry_backend=HTTPMCPServerRegistryBackend(...))` to bypass env-var resolution entirely. + +2. **Environment variables:** + - `ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND`: backend id (default `filesystem`). Recognized: `filesystem`, `http`. + - `ATOMIC_AGENTS_MCP_SERVER_REGISTRY_BACKEND_URL`: connection string for non-filesystem backends. HTTP shape: `https://catalog.example.com/?agent_scope=`. + - `ATOMIC_AGENTS_MCP_SERVER_REGISTRY_AUTH_TOKEN`: bearer token for HTTP (optional; absence means unauthenticated requests). + +**Credential safety:** `get_default_mcp_server_registry_backend` sanitizes the BACKEND env var before echoing in error messages (strips anything following `://` and truncates at 32 chars). Mirrors the redaction helper from `logs/__init__.py:316` and `profile/__init__.py:_redact_for_error_message`. + +The constructor kwarg ALWAYS wins. Operator-config layering: env vars are deployment-level; the kwarg is per-agent-construction. + +**Per-runner threading.** Mirrors the established pattern from spec/22 / 24 / 25 / 32 / 33 / 34: + +- `OutcomeRunner(..., mcp_server_registry_backend=...)` threads to `_outcome_iteration_loop`. +- `EvalRunner(..., mcp_server_registry_backend=...)` threads to `_eval_iteration_loop`. +- `DreamRunner(..., mcp_server_registry_backend=...)` stores as `self._mcp_server_registry_backend` for API parity. +- `delegate.py` threads the backend via the `_mcp_server_registry_backend_was_explicit` flag pattern (mirrors `PersonaBackend` D-ER-2 at `agent.py:401` and `CorpusBackend` at `agent.py:431`). The catalog is per-agent semantic context, not fleet-scoped, so delegates only inherit when explicitly threaded; default-resolved backends do not leak the coordinator's `agent_root` or `catalog_url` to delegates. + +--- + +## CLI surface + +`atomic-agents mcp-registry` subcommands (zero LLM calls; env-var-aware): + +- `atomic-agents mcp-registry list`: calls `list_mcp_servers()`, prints names, descriptions, and transport in a table. +- `atomic-agents mcp-registry show `: calls `load_mcp_server(name)`, prints the full MCPServerSpec. +- `atomic-agents mcp-registry validate `: calls `validate(name)`, prints the ValidationResult. +- `atomic-agents mcp-registry install --command --args --env --description `: builds an MCPServerSpec from CLI flags, calls `install(spec)`. Ships in PR 3 for filesystem; PR 5 for HTTP. +- `atomic-agents mcp-registry uninstall `: calls `uninstall(name)`. Ships in PR 3 for filesystem; PR 5 for HTTP. +- `atomic-agents mcp-registry refresh-capabilities`: for HTTP backends; re-runs the capability probe and reports the current tier. + +CLI catches `MCPRegistryUnavailable`, `MCPServerNotInRegistry`, `MCPServerAlreadyInstalled`, and `MCPRegistryAuthRequired` cleanly with operator-readable error messages. + +--- + +## Doctor coherence check + +`doctor.check_mcp_server_registry_backend` (PR 2 scope) validates operator-config coherence with a PASS / WARN / FAIL ladder. Doctor uses `backend.validate(name)` (not `parse_mcp_md` directly) when checking server-level validity. + +- **PASS:** backend resolves cleanly; `list_mcp_servers()` returns without raising; capability snapshot matches the env var (e.g., `=http` resolves to `HTTPMCPServerRegistryBackend`). +- **WARN:** transient probe failure (e.g., HTTP backend can't reach catalog server right now but the env var is correctly set); operator-facing message names the catalog_url with credentials redacted. +- **FAIL:** unknown backend_id in env var; missing required env var (URL absent when backend is HTTP); auth required but no token provided. + +Doctor output includes a capability snapshot (current `capabilities` view) so operators see at a glance which tier their HTTP catalog server is at. + +--- + +## Reserved at lock + +Items with reserved capability flags that ship unsupported at v1.0: + +- **Tier 4+ capability flags.** Future upstream MCP ecosystem registry protocols slot in additively. spec/36 v1.0 documents tiers 1-3; future arcs add tier-4 negotiation. +- **SQLite-backed local catalog.** Reserved for a future arc if the single-host multi-agent use case emerges with real operator demand. The Protocol seam supports it; no v1.0 work. +- **OAuth flows for HTTP catalog auth.** Operators pre-resolve tokens into the `AUTH_TOKEN` env var. Matches spec/19's v1 OAuth deferral. +- **Audit log surfacing.** Tier 3 catalog servers may advertise `supports_audit=True`; the framework's HTTP backend exposes the capability flag but does not consume the audit endpoint in v1.0. Reserved for a future arc. + +--- + +## Out of scope + +- **Postgres / vector-backed catalog.** Deferred to a future arc if operator demand justifies it. Mirrors the `PgvectorCorpusBackend` deferral pattern from spec/34 where `PgvectorCorpusBackend` is deferred to the coordinated #258 Postgres-adapter family release. +- **Subprocess sandboxing.** spec/28's judge layer covers runtime safety (pre-action validation, REVISE / ESCALATE). The registry covers catalog only. +- **HTTP transport for MCP servers themselves.** Still stdio-only per spec/19's v1. The HTTP added here is for the *catalog server*, not the *MCP server's transport*. +- **MCP resource subscriptions / prompt templates.** Deferred per spec/19's existing reservations. +- **Cross-process catalog locking.** The HTTP catalog server owns transactionality at the storage layer; the framework does NOT add a separate LockBackend dependency for HTTP install/uninstall (relies on the server's own consistency). +- **`supports_skills_catalog`.** Not relevant for MCP server registry. The skill catalog reservation lives on `ToolRegistryCapabilities` per spec/25 Decision 2. + +--- + +## Implementer contract (10 normative MUSTs) + +Backends implementing `MCPServerRegistryBackend` MUST satisfy all 10 MUSTs. The conformance suite at `tests/test_mcp_server_registry_conformance.py` parametrizes across registered backends and asserts each MUST. + +The MUST count is 10 because the static-vs-dynamic capability distinction (Decision 6) is novel relative to the 11 prior arcs (which all had static-equals-dynamic capabilities) and requires two additional MUSTs (MUST 3 extended to cover the static/dynamic distinction; MUST 10 added for `load_all_mcp_servers()` consistency) beyond the 8-MUST range of prior arcs. + +**MUST 1: Name charset validation at API boundary.** Server names MUST match `[a-zA-Z0-9_.+@-]+`. Validation MUST happen BEFORE any backend access (disk read, HTTP call, parse). Path-traversal tokens (`..`, `/`, `\\`, control chars, newlines, leading dot) MUST raise `ValueError` at the API boundary. Matches the locked charset across PolicyBackend, PersonaBackend, CorpusBackend. + +**MUST 2: Side-effect-free construction.** `__init__` MUST NOT call the network, spawn subprocesses, open file handles for the catalog, or do any other side-effecting work. Construction is cheap and synchronous. First non-construction call may trigger lazy probing. This preserves byte-identical pre-#201 behavior for all existing `AtomicAgent(...)` construction sites when no backend is configured. + +**MUST 3: Capability honesty.** `capabilities` (the `@property`) MUST reflect what the backend actually supports. Conformance suite enforces: if `supports_install=True` is reported, `install(spec)` MUST NOT raise `NotImplementedError`. Static-vs-dynamic capability distinction (Decision 6) is documented in each backend's `capabilities` property docstring; HTTP-style backends MUST distinguish static class capability (constant) from runtime capability (probe-dependent). + +**MUST 4: URL credential redaction.** Operator-facing error messages (including `BackendNotRegistered`, `MCPRegistryUnavailable`, `MCPRegistryAuthRequired`) MUST redact embedded credentials in URLs. The `_redact_for_error_message` helper from `mcp_registry/__init__.py` is the canonical implementation; backends use it for any error path that surfaces a URL. + +**MUST 5: Cross-agent isolation at storage layer.** Filesystem backends scope by `agent_root` (one mcp.md per agent). HTTP / SQLite / SaaS backends scope by `agent_scope` (column / query param / URL path segment). The scope is hardcoded from the constructor; `install` / `uninstall` / `list` / `load` / `validate` ALL filter by scope. No cross-scope reads or writes regardless of backend. + +**MUST 6: `backend_id` stability and `close()` idempotency.** + +- (a) `backend_id` is a stable identifier (constant across the backend's lifetime; matches the registry key used in `register_mcp_server_registry_backend`). +- (b) `close()` is idempotent: calling it twice does NOT raise; calling it before any other method is a no-op. + +**MUST 7: Transient-vs-permanent failure honesty.** Backends MUST distinguish "catalog unreachable" (network error, file lock contention, server 5xx) from "server not in catalog" (404, missing mcp.md section). The former raises `MCPRegistryUnavailable`; the latter raises `MCPServerNotInRegistry`. Operators rely on this distinction for retry decisions; conflating them breaks the operator surface. + +**MUST 8: Env-var resolution semantics on MCPServerSpec.** `$VAR` references in `MCPServerSpec.env` values MUST be resolved against the client process environment at `load_mcp_server(name)` time. Unresolvable references MUST raise `MCPServerConnectFailed` (the existing spec/19 exception; not a new exception class). This applies to ALL backends regardless of storage substrate: HTTP catalog servers MAY store unresolved `$VAR` strings, and the framework's HTTP client MUST resolve at materialization. + +**MUST 9: Install / uninstall atomicity.** Concurrent `install(spec)` calls for the same server name MUST produce exactly one winner; the others MUST raise `MCPServerAlreadyInstalled`. Filesystem implementations MUST acquire a `LockBackend` (spec/21) lease around the read-modify-write critical section in `install` and `uninstall` (read mcp.md, parse, check name collision, write back); the `_io.atomic_write` discipline ensures crash-safety on the individual write, while the lock serializes concurrent callers. The lock acquisition uses the spec/21 signature: `lock_handle = lock_backend.acquire("mcp_registry", timeout=30)`; release via `lock_handle.release()`. HTTP implementations rely on the catalog server's transactional storage and translate HTTP 409 to `MCPServerAlreadyInstalled`. `uninstall(name)` MUST be idempotent (no exception when the name is absent). + +**MUST 10: `load_all_mcp_servers()` consistency.** The output of `load_all_mcp_servers()` MUST be semantically equivalent to `[load_mcp_server(ref.name) for ref in list_mcp_servers()]` for any given backend state. Backends MAY optimize the bulk implementation (HTTP backend uses single bulk GET via `?expand=spec`; SQLite backend can use a single SELECT) but MUST preserve the equivalence guarantee. The conformance suite asserts: for every registered backend, `set(load_all_mcp_servers())` equals `set([load_mcp_server(ref.name) for ref in list_mcp_servers()])` across populated and empty catalog states. + +**Framework-level invariant (NOT a per-backend MUST):** `agent.py:_load_config()` MUST populate `AgentProfile.mcp_servers_resolved` with the materialized MCPServerSpec list (via `load_all_mcp_servers()`) BEFORE `MCPClientPool` consumes it. This is enforced at the framework integration layer in PR 2, not in each backend. Per Decision 9; preserves the audit-trail-is-structural invariant (CLAUDE.md §5) across all backends. + +--- + +## Shipping plan detail + +### PR 1: Protocol scaffolding + filesystem read paths + DRAFT spec + +**Code:** +- `atomic_agents/mcp_registry/__init__.py`: register / get / list functions + `get_default_mcp_server_registry_backend` factory + `_redact_for_error_message` helper. +- `atomic_agents/mcp_registry/types.py`: `MCPServerRef`, `MCPServerRegistryCapabilities`, `ValidationResult`. +- `atomic_agents/mcp_registry/backend.py`: `MCPServerRegistryBackend` Protocol + exception classes + `_default_load_all(backend)` convenience helper. +- `atomic_agents/mcp_registry/filesystem.py`: `FilesystemMCPServerRegistryBackend` constructor + read paths (`list_mcp_servers`, `load_mcp_server`, `load_all_mcp_servers`, `validate`, `capabilities`, `refresh_capabilities`, `close`). Install/uninstall ship in PR 3 alongside the LockBackend integration. +- `atomic_agents/mcp.py`: additive `resolve_env: bool = True` parameter on `parse_mcp_md_text` and `_build_spec`. Backward-compatible (existing callers see byte-identical behavior with default True). + +**Spec:** `docs/spec/36-mcp-server-registry-backend.md` (this file, DRAFT). + +**Tests:** +- `tests/test_mcp_server_registry_conformance.py`: ~40 conformance tests parametrized across registered backends (filesystem only at this PR; HTTP joins in PR 4). Covers MUSTs 1, 2, 3, 5, 6, 7, 8, 10 (install/uninstall MUST 9 covered in PR 3 when those land). +- `tests/test_mcp_server_registry_filesystem_backend.py`: ~23 filesystem-specific tests (path-traversal at API boundary; env-var resolution at load time with `resolve_env=False` parse and lazy resolution; mcp.md parse semantics; empty / missing mcp.md returns `[]`; `load_all_mcp_servers` default impl consistency; malformed mcp.md raises `MCPRegistryDescriptorInvalid`; multi-section mcp.md returns multiple specs; `MCPServerRef.source` field equals `"mcp.md#section:"`). + +**CLI:** `atomic-agents mcp-registry list / show / validate / refresh-capabilities` (read-only subcommands). + +**No agent.py wiring yet.** Wiring lands in PR 2 alongside the audit/profile sibling field. + +**Expected test count delta:** +60. Total test count after PR 1: approximately 2997. + +### PR 2: agent.py wiring + audit/profile sibling field + IRON RULE regression + doctor + +**Code:** +- `atomic_agents/agent.py:_load_config()`: resolve backend from env vars and constructor kwarg; default to `FilesystemMCPServerRegistryBackend(agent_root, read_paths)`. Call `backend.load_all_mcp_servers()` to materialize the spec list; populate `AgentProfile.mcp_servers_resolved` BEFORE `MCPClientPool` consumes the specs. Re-raise `MCPRegistryUnavailable` on probe failure (fail-closed per Decision 1; framework-level invariant). +- `atomic_agents/profile/types.py`: `AgentProfile.mcp_servers_resolved: list[MCPServerSpec] = field(default_factory=list)` (Decision 9 sibling field). +- `docs/spec/24-agent-profile-backend.md`: D1 addendum documenting the sibling field; `mcp_md_raw` stays as filesystem-backend backward-compat. +- `OutcomeRunner` / `EvalRunner` / `DreamRunner` per-runner kwargs. +- `delegate.py` explicit-only threading via `_mcp_server_registry_backend_was_explicit` flag. +- `doctor.py`: add `check_mcp_server_registry_backend` with PASS/WARN/FAIL ladder + capability snapshot + URL credential redaction. + +**Expected test count delta:** +35. Total after PR 2: approximately 3032. + +### PR 3: Filesystem CLI install/uninstall + LockBackend integration + +**Code:** +- `atomic_agents/mcp_registry/filesystem.py`: add `install(spec)` and `uninstall(name)` with `LockBackend` lease and atomic mcp.md edits. Lock acquisition uses the spec/21 signature `acquire("mcp_registry", timeout=30)`; release via the returned `LockHandle.release()`. Filesystem capability flags flip to `supports_install=True, supports_uninstall=True`. +- `atomic_agents/cli.py`: `atomic-agents mcp-registry install / uninstall` subcommands (filesystem-only at this PR; HTTP install/uninstall land in PR 5). + +**Spec:** `docs/spec/36-mcp-server-registry-backend.md`: add §"Install / uninstall semantics" and §"LockBackend integration" subsections. + +**Expected test count delta:** +15. Total after PR 3: approximately 3047. + +### PR 4: HTTP-client reference impl + tier negotiation + fail-closed + auth + bulk + +**Code:** +- `atomic_agents/mcp_registry/http.py`: `HTTPMCPServerRegistryBackend` (read-only mode: `list / load / load_all / validate / refresh_capabilities`) with tier-1/2/3 capability negotiation + lazy probe + `probe_failure_cache_s` (default 60s) + URL credential redaction + bulk endpoint (`GET /mcp-servers?expand=spec`) overriding `_default_load_all`. Install/uninstall stubs raise `NotImplementedError` at this PR (write path lands in PR 5). +- `pyproject.toml`: add `[project.optional-dependencies]` entry: `http = ["httpx>=0.27"]`. Matches the existing `redis` / `openai` / `validation` extras pattern. + +**Spec:** `docs/spec/36-mcp-server-registry-backend.md`: add §"HTTP wire format" + §"Tier negotiation" + §"Capability handshake" + §"Per-scope filtering" (catalog server MUST filter by `agent_scope` server-side; non-conformant catalog servers that return org-wide listings are out of spec). + +**Expected test count delta:** +31. Total after PR 4: approximately 3078. + +### PR 5: HTTP install/uninstall + tier-3 audit + spec/36 LOCKED + v1.0 RELEASE candidate + +**Code:** +- `atomic_agents/mcp_registry/http.py`: add `install / uninstall` write paths (POST and DELETE) with capability gating (dynamic `supports_install` based on tier negotiation); mid-session tier regression handling (inline re-probe + `NotImplementedError` on 405 from a previously-tier-2-now-tier-1 server). +- `atomic_agents/cli.py`: wire HTTP install/uninstall into the existing `atomic-agents mcp-registry install / uninstall` subcommands. + +**Spec:** `docs/spec/36-mcp-server-registry-backend.md` LOCKED at 10 MUSTs. + +**Doc updates:** +- `CHANGELOG.md` `[Unreleased]`: arc-closer entry covering the full 5-PR series. +- `README.md` backend protocols table: flip `MCPServerRegistryBackend` row from `Planned` to `Shipped`; flip Status block from "Eleven of twelve" to "Twelve of twelve". +- `CLAUDE.md`: add 12th backend lock-paragraph. +- `~/ObsidianVault/Atomic Agents/ROADMAP.md`: flip MCPServerRegistry row; append the Tier 2 backend-protocol scaling roadmap closer. + +**Expected test count delta:** +10. Total after PR 5: approximately 3088. + +**After merge:** +- `/land-and-deploy` verification on main-branch CI green on merge commit. +- v1.0 RELEASE tag cut as `v1.0.0`. +- PyPI release fires (`uv build` + `uv publish`). +- GitHub Release notes pulled from CHANGELOG.md `[v1.0.0]` entry. + +--- + +## Deferred decisions + +Items 1-3 from the design doc (package name `mcp_registry`, bearer header for HTTP auth, snake_case for JSON keys) are resolved per the decisions above. Remaining open questions: + +**4. Tier-4 trigger.** What upstream signal should we watch to start a v1.1 tier-4 arc? File a tracking issue at PR 4 LOCK for "v1.1 spec/36 tier-4 / upstream MCP registry protocol"; watch MCP spec discussions and pin the issue to that thread. Tracked in successor issue at PR 4. + +**5. `refresh_capabilities` ergonomics.** Should the HTTP backend auto-refresh capabilities on `MCPRegistryUnavailable` retry, or strictly on explicit operator call? Explicit only at v1.0; auto-refresh is a v1.1 ergonomics arc once operator behavior is observed. Tracked in successor issue at PR 4. + +--- + +## Success criteria + +- All 5 PRs ship through formal `/ship` Skill end-to-end (extending the streak from 7 to 12 consecutive clean ships post-#285-revert). +- `/land-and-deploy` verification on each PR's merge commit with main-branch CI green. +- Test count grows approximately 151 total across the arc (PR 1: +60, PR 2: +35, PR 3: +15, PR 4: +31, PR 5: +10). Final count approximately 3088 tests collected. +- spec/36 LOCK at PR 5 passes adversarial review (Opus subagent, 2-5 rounds per CLAUDE.md §11). +- v1.0 RELEASE cuts after PR 5 lands. CHANGELOG `[Unreleased]` converts to `[v1.0.0]`. PyPI publishes. + +--- + +## References + +- `docs/spec/19-mcp-integration.md`: `MCPServerSpec` dataclass shape, `MCPClientPool` subprocess lifecycle, `parse_mcp_md_text` function signature, `validate_mcp_server_args` helper, env-var resolution at parse time. +- `docs/spec/20-memory-backend.md`: MemoryBackend Protocol; the template the protocol-pattern series follows. +- `docs/spec/21-lock-backend.md`: `LockBackend.acquire(name="", timeout=0.0)` signature; `LockHandle.release()` idiom; `LockLost` lease-expiry discipline. +- `docs/spec/24-agent-profile-backend.md`: `mcp_md_raw` snapshot pattern (unchanged); `mcp_servers_resolved` sibling field added (spec/24 D1 addendum). Implementer Contract shape. +- `docs/spec/25-tool-registry-backend.md`: Decision 3 carve-out is the origin of this arc. `ToolRegistryBackend.install` signature comparison. `ValidationResult` shape. Protocol surface parallels. +- `docs/spec/27-doctor.md`: PASS/WARN/FAIL ladder shape. +- `docs/spec/32-policy-backend.md`: `_policy_backend_was_explicit` delegate-threading precedent. +- `docs/spec/33-persona-backend.md`: Most recently locked Protocol spec; Implementer Contract (8 MUSTs) used as template. Exception placement (D-PI-1). `delegate.py` threading rationale (D-ER-2). +- `docs/spec/34-corpus-backend.md`: `@property capabilities` declaration at `corpus/backend.py:70-71`. 9-MUST contract structure used as template. `_redact_for_error_message` helper pattern. `Out of scope` table format. `References` section format. +- `atomic_agents/mcp.py:84`: `MCPServerSpec` dataclass. +- `atomic_agents/mcp.py:126`: `MCPClientPool` (subprocess lifecycle; stays unchanged). +- `atomic_agents/mcp.py:410`: `parse_mcp_md` function (always resolves env vars; no `resolve_env` knob). +- `atomic_agents/mcp.py:432`: `parse_mcp_md_text` function (gains `resolve_env: bool = True` at PR 1). +- `atomic_agents/mcp.py:558-566`: `_build_spec` env-var resolution block (extracted to `_resolve_env_vars` helper at PR 1). +- `atomic_agents/mcp.py:589`: `validate_mcp_server_args(spec, agent_read_paths)` helper (reused by `load_mcp_server`; do NOT reimplement inline). +- `atomic_agents/locks/backend.py:82`: `LockBackend.acquire` signature. +- `atomic_agents/_io.py:42`: `atomic_write` primitive. +- `atomic_agents/corpus/backend.py:70-71`: `@property capabilities` declaration used as the precedent for this Protocol. +- `agent.py:2337`: MCP discovery overlay. +- `~/.gstack/projects/dep0we-atomic-agents-stack/dep0we-main-design-20260601-165020.md`: Full architectural rationale. APPROVED (3 adversarial rounds, 9/10 quality, zero P0/P1 remaining). 10 MUSTs, 5-PR shipping plan, `mcp_servers_resolved` sibling field on AgentProfile, bulk Protocol method, fail-closed catalog wiring, LockBackend API corrected to spec/21 signature, `probe_failure_cache_s` (60s) distinct from `request_timeout_s` (10s). +- `~/.gstack/projects/dep0we-atomic-agents-stack/dep0we-main-pr1-prep-notes-20260602-114910.md`: PR 1 implementation prep notes. 35 findings (5 P0, 15 P1, 15 P2) from 5 parallel Sonnet review streams. +- `~/.gstack/projects/dep0we-atomic-agents-stack/dep0we-main-eng-review-test-plan-20260601-174020.md`: Test plan (~151 tests across 5 PRs). +- [#201](https://github.com/dep0we/atomic-agents-stack/issues/201): The umbrella issue for this arc. From 0c57a410b997679d5d0d320a0fb2043caaa51a6d Mon Sep 17 00:00:00 2001 From: Dan Powers Date: Wed, 3 Jun 2026 08:59:38 -0500 Subject: [PATCH 5/5] docs: post-ship documentation update for MCPServerRegistryBackend PR 1 of 5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit README.md: test count 2937→3153 (3101 passing + 52 skipped); spec list gains spec/35 (RFC) + spec/36 (DRAFT, PR 1 of 5); RFC/DRAFT count 2→4; "remaining two protocols" corrected to "remaining protocol" (CorpusBackend shipped). CLAUDE.md: test count refresh to 3,153 (2026-06-03). docs/spec/19-mcp.md: cross-link to spec/36 added in the Cross-links header. Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 4 ++-- README.md | 14 ++++++++------ docs/spec/19-mcp.md | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 80fc7db..c6b4d2b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -207,7 +207,7 @@ uv run pytest # full suite uv run pytest tests/test_.py -v # one module ``` -Run `uv run pytest --collect-only -q | tail -1` for the live test count (last refresh: 2,937 tests collected, 2026-06-01). New backend protocols add ~25 conformance + ~10 impl-specific tests. New features ship with tests. Migration-shaped PRs need parameterized fixture tests across the backend protocol — the conformance suite is what keeps the protocol honest. +Run `uv run pytest --collect-only -q | tail -1` for the live test count (last refresh: 3,153 tests collected, 2026-06-03). New backend protocols add ~25 conformance + ~10 impl-specific tests. New features ship with tests. Migration-shaped PRs need parameterized fixture tests across the backend protocol — the conformance suite is what keeps the protocol honest. ### Releases + SemVer @@ -341,7 +341,7 @@ These are not forbidden forever — they're explicitly deferred with rationale. ## Status -**v0.13.0, alpha, PUBLIC.** Core runtime stable. Test suite: run `uv run pytest --collect-only -q | tail -1` for the live count (last refresh: 2,937 tests collected, 2026-06-01). Capability-gated skips fall into four buckets — ToolRegistry conformance (filesystem-shape + `supports_uninstall=False` variants), AgentProfile (skill-content + filesystem-shape on SQLite), cross-process Redis (require real Redis instead of fakeredis), and judge-conformance dispatch (LLM-only + PolicyJudge concurrent-evaluate). Full CI runs against `uv sync --extra dev --extra openai --extra validation --extra redis`. **Eleven backend protocols shipped**: +**v0.13.0, alpha, PUBLIC.** Core runtime stable. Test suite: run `uv run pytest --collect-only -q | tail -1` for the live count (last refresh: 3,153 tests collected, 2026-06-03). Capability-gated skips fall into four buckets — ToolRegistry conformance (filesystem-shape + `supports_uninstall=False` variants), AgentProfile (skill-content + filesystem-shape on SQLite), cross-process Redis (require real Redis instead of fakeredis), and judge-conformance dispatch (LLM-only + PolicyJudge concurrent-evaluate). Full CI runs against `uv sync --extra dev --extra openai --extra validation --extra redis`. **Eleven backend protocols shipped**: - **MemoryBackend** (PR #57) — filesystem reference impl + conformance suite. - **LLMBackend** (#87) — Anthropic + OpenAI + Moonshot reference impls, registered at framework import; conformance suite parametrizes across all three. diff --git a/README.md b/README.md index b45bef8..118a4da 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ This is the slot in the AI-agent-tooling landscape `atomic-agents-stack` occupie | **Audit trail** | JSONL per run with `parent_run_id` rollups; helper + delegate + tool + capture lines all link back | Dashboards in Letta UI / cloud | Mem0 dashboards | LangSmith (hosted) | Build it | | **Cost guardrails** | First-class — daily / monthly caps, threshold warnings, fallback action, `critical=True` override, tree-cap across delegates | Per their pricing model | Per their pricing model | Not built into core OSS | Build it | | **Multi-agent coordination** | Role × project cascade defined in spec/06 | Multi-agent shared memory blocks | Agent-shared memory pools | LangGraph: graph-based orchestration (more flexible) | Build it | -| **Numbered, locked spec** | 31 locked docs in `docs/spec/` (+ 2 RFCs) | API + concept docs | API + concept docs | API reference + concept docs | None | +| **Numbered, locked spec** | 31 locked docs in `docs/spec/` (+ 4 RFCs/DRAFTs in progress) | API + concept docs | API + concept docs | API reference + concept docs | None | | **Reference runtime** | Python, macOS / Linux primary | Python (server) + multi-language clients | Python (OSS) + multi-language clients | Python + JavaScript | Whatever | **Where the alternatives win:** @@ -141,7 +141,7 @@ This is the slot in the AI-agent-tooling landscape `atomic-agents-stack` occupie - **Markdown-source-of-truth, human-editable.** Operators can edit persona / tools / memory from any text editor or Obsidian without a vendor app. - **No required server.** The framework is "files + Python." A complete agent runs on a laptop with zero infrastructure. -- **Spec-level file layout.** 31 numbered docs lock the contract (plus 2 RFCs in progress); conformance is testable; alternate implementations are possible. +- **Spec-level file layout.** 31 numbered docs lock the contract (plus 4 RFCs/DRAFTs in progress); conformance is testable; alternate implementations are possible. - **Crash-safe writes by default.** `temp file + fsync + rename + parent-dir fsync` for every mutation; an interrupted run leaves recoverable artifacts, not corruption. - **Cost story is structural, not bolted on.** Daily / monthly caps + tree-cap for delegations + per-call cost reservation for helper batches + a `critical=True` override that's part of the API, not a per-vendor workaround. @@ -181,6 +181,8 @@ Start at [`docs/README.md`](docs/README.md) for the spec entry point. The locked - [32 — Policy backend protocol](docs/spec/32-policy-backend.md) — fleet-wide `policy.md`; cost-cap MIN composition + allowlist enforcement - [33 — PersonaBackend Protocol](docs/spec/33-persona-backend.md) — persona ownership, snapshot/restore, `persona.link.md` format - [34 — CorpusBackend Protocol](docs/spec/34-corpus-backend.md) — wiki/raw corpus protocol; filesystem + SQLite (FTS5) reference impls; GB-scale indexed full-text search +- [35 — init wizard](docs/spec/35-init-wizard.md) — `atomic-agents init` on-ramp; template scaffolding + Add-to-it merge; CI-friendly `--from-template` (RFC) +- [36 — MCPServerRegistryBackend Protocol](docs/spec/36-mcp-server-registry-backend.md) — MCP server catalog + install/audit; `FilesystemMCPServerRegistryBackend` reference impl; `atomic-agents mcp-registry` CLI (DRAFT, PR 1 of 5) Each spec doc is locked when the implementation matches and tests pass. Spec changes that imply implementation changes get filed as GitHub issues. **Spec docs separate shipped behavior from explicit future / deferred boundaries** — sections that describe behavior not yet implemented are explicitly marked as such, not silently aspirational. @@ -205,7 +207,7 @@ The framework is moving toward swappable backends layer by layer. The shape: a P | `CorpusBackend` | ✅ Shipped | Filesystem + SQLite (FTS5) reference impls; per-agent `wiki/` + `raw/`; `render_index_summary(corpus)` Protocol method; closes the GB-scale wiki cliff via O(log N) indexed full-text query | [`spec/34`](docs/spec/34-corpus-backend.md) | | `MCPServerRegistryBackend` | Planned | Catalog + install/audit for MCP servers (MCP equivalent of ToolRegistry) | [`#201`](https://github.com/dep0we/atomic-agents-stack/issues/201) | -**v1 direction:** a home user runs filesystem-everything today. An organization runs the same agent definitions over Postgres / Redis / SQLite-Datadog / behind an HTTP service once the remaining two protocols ship. v1.0 closes when MCPServerRegistry lands + its conformance suite pins the contract. See [`docs/architecture.md`](docs/architecture.md) for the mental model, [`docs/TENSIONS.md`](docs/TENSIONS.md) for architectural tensions this scaling story has to survive, and [`ROADMAP.md`](ROADMAP.md) for the full backlog beyond v1.0. +**v1 direction:** a home user runs filesystem-everything today. An organization runs the same agent definitions over Postgres / Redis / SQLite-Datadog / behind an HTTP service once the remaining protocol ships. v1.0 closes when MCPServerRegistry lands + its conformance suite pins the contract. See [`docs/architecture.md`](docs/architecture.md) for the mental model, [`docs/TENSIONS.md`](docs/TENSIONS.md) for architectural tensions this scaling story has to survive, and [`ROADMAP.md`](ROADMAP.md) for the full backlog beyond v1.0. --- @@ -280,8 +282,8 @@ Same pattern for OpenAI (`atomic-agents-openai`) and Moonshot (`atomic-agents-mo ## Repository structure - `atomic_agents/` — the Python package (runtime in `agent.py`; backend protocols in `memory/`, `_llm.py`, `_locks.py`, `_costs.py`, etc.; CLI in `cli.py`; preflight in `doctor.py`) -- `tests/` 2937 tests collected (2889 passing + 48 skipped), Python 3.11 + 3.12 matrix -- `docs/` — [spec entry point](docs/README.md), [`architecture.md`](docs/architecture.md), [`spec/`](docs/spec/) (31 locked docs + 2 RFCs), [`deployment/`](docs/deployment/) (8 operator runbooks), [`samples/caldwell/`](docs/samples/caldwell/) (complete worked example), [`GOVERNANCE.md`](docs/GOVERNANCE.md), [`TENSIONS.md`](docs/TENSIONS.md), [`methodology.md`](docs/methodology.md) +- `tests/` 3153 tests collected (3101 passing + 52 skipped), Python 3.11 + 3.12 matrix +- `docs/` — [spec entry point](docs/README.md), [`architecture.md`](docs/architecture.md), [`spec/`](docs/spec/) (31 locked docs + 4 RFCs/DRAFTs), [`deployment/`](docs/deployment/) (8 operator runbooks), [`samples/caldwell/`](docs/samples/caldwell/) (complete worked example), [`GOVERNANCE.md`](docs/GOVERNANCE.md), [`TENSIONS.md`](docs/TENSIONS.md), [`methodology.md`](docs/methodology.md) - `extras/` — operational templates (Claude Code skill wrappers, macOS LaunchAgent plists, cron examples) --- @@ -311,4 +313,4 @@ Before opening a PR, read [`CLAUDE.md`](CLAUDE.md) (the project's design ethos a ## Status -**v0.13.0, alpha.** Core runtime stable. 2937 tests collected (2889 passing + 48 skipped) on Python 3.11 / 3.12. Eleven of twelve backend protocols shipped (see the backend protocols table above); `MCPServerRegistryBackend` planned. The surface stabilizes at v1.0. Pre-1.0 — Minor releases may contain breaking changes (see [`docs/deployment/versioning.md`](docs/deployment/versioning.md)). Single-maintainer project; reference implementation anyone can use, fork, or extend. +**v0.13.0, alpha.** Core runtime stable. 3153 tests collected (3101 passing + 52 skipped) on Python 3.11 / 3.12. Eleven of twelve backend protocols shipped (see the backend protocols table above); `MCPServerRegistryBackend` planned. The surface stabilizes at v1.0. Pre-1.0 — Minor releases may contain breaking changes (see [`docs/deployment/versioning.md`](docs/deployment/versioning.md)). Single-maintainer project; reference implementation anyone can use, fork, or extend. diff --git a/docs/spec/19-mcp.md b/docs/spec/19-mcp.md index e8094b6..8dda367 100644 --- a/docs/spec/19-mcp.md +++ b/docs/spec/19-mcp.md @@ -1,7 +1,7 @@ # spec/19 — MCP (Model Context Protocol) Client Support > Status: **implemented** (PR feat/mcp-support; fixes in PR fix/mcp-review-findings) -> Cross-links: spec/17 (custom tools — MCP composes with this), [MCP official spec](https://spec.modelcontextprotocol.io/) +> Cross-links: spec/17 (custom tools — MCP composes with this), [MCP official spec](https://spec.modelcontextprotocol.io/), [spec/36](36-mcp-server-registry-backend.md) (MCPServerRegistryBackend — catalog + install/audit for MCP servers, the MCP equivalent of the ToolRegistry pattern; DRAFT, tracking [#201](https://github.com/dep0we/atomic-agents-stack/issues/201)) ## Why MCP