From ac40f03f644132cad4e6eb0b9137daec0b90c7b3 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 07:04:34 +0000 Subject: [PATCH 01/10] feat: add Markdown MCP docs generator (Docusaurus- and pdoc-compatible) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces `scripts/generate_mcp_markdown.py` (exposed via `poe mcp-docs-md`) which introspects the MCP server with `fastmcp inspect` and renders a small set of Markdown files under `docs/mcp-generated/`: - index.md — server overview + counts + TOC - tools.md — one H2 per tool with a GFM parameters table and collapsible input/output JSON schemas - resources.md — concrete resources and resource templates - prompts.md — prompts and their arguments Formatting is modeled on `mcpdocs-gen` (evaluated in PR #1013) but emitted as plain CommonMark + GFM + YAML front-matter + `
` blocks, so the pages render correctly in both Docusaurus and `pdoc` without MDX-only components. Every tool/resource/prompt has a stable slug anchor for deep-linking. --- .gitignore | 3 + docs/CONTRIBUTING.md | 26 +++ pyproject.toml | 1 + scripts/generate_mcp_markdown.py | 370 +++++++++++++++++++++++++++++++ 4 files changed, 400 insertions(+) create mode 100755 scripts/generate_mcp_markdown.py diff --git a/.gitignore b/.gitignore index 5db1a35e8..b867eb5f0 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,9 @@ viztracer_report.json # Packaged docs docs/*.zip +# Generated MCP server docs (regenerate via `poe mcp-docs-md`) +docs/mcp-generated/ + # Misc .DS_Store diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index dd9a2fc53..ab7619dec 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -143,3 +143,29 @@ poe mcp-serve-sse # Server-Sent Events transport on localhost:8000 poe mcp-inspect # Show all available MCP tools and their schemas ``` + +### Generating Markdown docs for the MCP Server + +The repo ships a small script that introspects the MCP server via +`fastmcp inspect` and emits a Markdown documentation site under +`docs/mcp-generated/` (git-ignored). The output is designed to be both +Docusaurus-hostable and `pdoc`-compatible — plain CommonMark with YAML +front-matter, no MDX-only components. + +```bash +uv sync --group dev +poe mcp-docs-md +``` + +Four files are produced: + +- `index.md` — server overview (name, version, instructions, counts) +- `tools.md` — one section per tool with a parameters table and collapsible + input/output JSON schemas +- `resources.md` — concrete resources + resource templates +- `prompts.md` — prompts and their arguments + +Each tool/resource/prompt has a stable slug anchor (e.g. +`tools.md#list_connectors`) so the pages can be deep-linked from Slack, issues, +or other docs. Regenerate after any change to MCP tool signatures, descriptions, +or schemas. The underlying script is at `scripts/generate_mcp_markdown.py`. diff --git a/pyproject.toml b/pyproject.toml index f3ff6419f..334f5a3eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -174,6 +174,7 @@ mcp-serve-http = { cmd = "python -c \"from airbyte.mcp.server import app; app.ru mcp-serve-sse = { cmd = "python -c \"from airbyte.mcp.server import app; app.run(transport='sse', host='127.0.0.1', port=8000)\"", help = "Start the MCP server with SSE transport" } mcp-inspect = { cmd = "fastmcp inspect airbyte/mcp/server.py:app", help = "Inspect MCP tools and resources (supports --tools, --health, etc.)" } mcp-tool-test = { cmd = "python -m fastmcp_extensions.utils.test_tool --app airbyte.mcp.server:app", help = "Test MCP tools directly with JSON arguments: poe mcp-tool-test ''" } +mcp-docs-md = { cmd = "python scripts/generate_mcp_markdown.py", help = "Generate Markdown docs for the MCP server into docs/mcp-generated/ (Docusaurus- and pdoc-compatible)" } # Claude Code MCP Testing Tasks [tool.poe.tasks.test-my-tools] diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py new file mode 100755 index 000000000..8633ebafe --- /dev/null +++ b/scripts/generate_mcp_markdown.py @@ -0,0 +1,370 @@ +#!/usr/bin/env python3 +# Copyright (c) 2026 Airbyte, Inc., all rights reserved. +"""Generate Markdown documentation for the PyAirbyte MCP server. + +Runs `fastmcp inspect` against `airbyte.mcp.server:app` to obtain the full +FastMCP protocol surface (tools, resources, resource templates, prompts) as a +JSON report, then renders it into a small set of Markdown files under +`docs/mcp-generated/`. + +The output is designed to be: + +- **Docusaurus-hostable**: each file starts with YAML front-matter (`title`, + `sidebar_label`, `description`); the body is plain CommonMark + GFM tables + + `
` blocks for collapsible JSON schemas. No MDX-only + components are used. +- **`pdoc3`-compatible**: standard Markdown that renders correctly alongside + the existing `pdoc3` output in `docs/generated/` without any special config. +- **Deep-linkable**: every tool/resource/prompt name is an H2 with a stable + slug anchor (e.g. `tools.md#list_connectors`). + +Formatting is deliberately modeled on the +[`mcpdocs-gen`](https://github.com/smytsyk/mcpdocs) static HTML output — same +sections, same per-tool shape (description → parameters table → JSON schema) — +but emitted as Markdown rather than HTML so it can slot into an existing docs +site. + +Usage: + +``` +uv run python scripts/generate_mcp_markdown.py [--output docs/mcp-generated] +``` + +Or via the project's poe task: + +``` +poe mcp-docs-md +``` +""" + +from __future__ import annotations + +import argparse +import json +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from typing import Any + + +DEFAULT_OUTPUT = Path("docs/mcp-generated") +DEFAULT_SERVER_SPEC = "airbyte/mcp/server.py:app" + + +def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]: + """Invoke `fastmcp inspect` and return the parsed JSON report.""" + fastmcp_bin = shutil.which("fastmcp") + if fastmcp_bin is None: + raise RuntimeError( + "`fastmcp` CLI not found on PATH. Install project dev deps first " + "(e.g. `uv sync --group dev`) and re-run from the repo root." + ) + subprocess.run( + [ + fastmcp_bin, + "inspect", + server_spec, + "--format", + "fastmcp", + "--output", + str(report_path), + ], + check=True, + ) + return json.loads(report_path.read_text()) + + +def _fmt_type(schema: dict[str, Any]) -> str: + """Render a JSON-schema fragment as a short, human-readable type string.""" + for key in ("anyOf", "oneOf"): + if key in schema: + return " | ".join(_fmt_type(s) for s in schema[key]) + if "enum" in schema: + return "enum(" + ", ".join(repr(v) for v in schema["enum"]) + ")" + t = schema.get("type") + if t == "array": + items = schema.get("items", {}) + return f"array<{_fmt_type(items)}>" if items else "array" + if isinstance(t, list): + return " | ".join(str(x) for x in t) + return str(t) if t else "any" + + +def _escape_table_cell(value: str) -> str: + """Make a string safe to embed in a single GFM table cell.""" + return value.replace("|", "\\|").replace("\n", " ").strip() + + +def _fmt_default(schema: dict[str, Any]) -> str: + """Render a schema's `default` value as a compact Markdown code span.""" + if "default" not in schema: + return "—" + default = schema["default"] + if default is None: + return "`null`" + return f"`{json.dumps(default)}`" + + +def _frontmatter(title: str, sidebar_label: str, description: str) -> str: + """Build a YAML front-matter block for a Docusaurus page.""" + esc_desc = description.replace("\n", " ").replace('"', '\\"').strip() + return ( + "---\n" + f"title: {title}\n" + f"sidebar_label: {sidebar_label}\n" + f'description: "{esc_desc}"\n' + "---\n\n" + ) + + +def _json_block(label: str, obj: Any) -> str: # noqa: ANN401 + """Render an object inside a collapsible `
` JSON code block.""" + return ( + f"
\n{label}\n\n" + "```json\n" + json.dumps(obj, indent=2) + "\n```\n\n
\n\n" + ) + + +def _render_parameters_table(input_schema: dict[str, Any]) -> str: + """Render a GFM parameters table for a tool's `input_schema`.""" + properties = input_schema.get("properties") or {} + if not properties: + return "_No parameters._\n\n" + required = set(input_schema.get("required") or []) + lines = [ + "| Name | Type | Required | Default | Description |", + "| --- | --- | --- | --- | --- |", + ] + for name, prop in properties.items(): + desc = _escape_table_cell(prop.get("description", "")) + # Union types contain literal `|` chars which break GFM table rendering + # even inside backticks in some parsers; escape defensively. + type_cell = _fmt_type(prop).replace("|", "\\|") + lines.append( + f"| `{name}` | `{type_cell}` | " + f"{'yes' if name in required else 'no'} | " + f"{_fmt_default(prop)} | {desc} |" + ) + return "\n".join(lines) + "\n\n" + + +def _render_tool(tool: dict[str, Any]) -> str: + """Render a single tool as a Markdown section.""" + name = tool["name"] + parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"] + if description := tool.get("description"): + parts.append(description.strip() + "\n\n") + if tags := tool.get("tags"): + parts.append("**Tags:** " + ", ".join(f"`{t}`" for t in tags) + "\n\n") + parts.extend( + [ + "### Parameters\n\n", + _render_parameters_table(tool.get("input_schema") or {}), + ] + ) + if input_schema := tool.get("input_schema"): + parts.append(_json_block("Show input JSON schema", input_schema)) + if output_schema := tool.get("output_schema"): + parts.append(_json_block("Show output JSON schema", output_schema)) + return "".join(parts) + + +def _render_resource(resource: dict[str, Any]) -> str: + """Render a single resource as a Markdown section.""" + name = resource["name"] + parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"] + if description := resource.get("description"): + parts.append(description.strip() + "\n\n") + meta_lines: list[str] = [] + if uri := resource.get("uri"): + meta_lines.append(f"- **URI:** `{uri}`") + if uri_template := resource.get("uri_template"): + meta_lines.append(f"- **URI template:** `{uri_template}`") + if mime := resource.get("mime_type"): + meta_lines.append(f"- **MIME type:** `{mime}`") + if tags := resource.get("tags"): + meta_lines.append("- **Tags:** " + ", ".join(f"`{t}`" for t in tags)) + if meta_lines: + parts.append("\n".join(meta_lines) + "\n\n") + return "".join(parts) + + +def _render_prompt(prompt: dict[str, Any]) -> str: + """Render a single prompt as a Markdown section.""" + name = prompt["name"] + parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"] + if description := prompt.get("description"): + parts.append(description.strip() + "\n\n") + args = prompt.get("arguments") or [] + if args: + parts.extend( + [ + "### Arguments\n\n", + "| Name | Required | Description |\n| --- | --- | --- |\n", + ] + ) + for arg in args: + desc = _escape_table_cell(arg.get("description", "")) + parts.append( + f"| `{arg['name']}` | " f"{'yes' if arg.get('required') else 'no'} | " f"{desc} |\n" + ) + parts.append("\n") + else: + parts.append("_No arguments._\n\n") + return "".join(parts) + + +def _render_index(report: dict[str, Any]) -> str: + """Render the top-level overview page.""" + server = report.get("server") or {} + server_name = server.get("name", "mcp-server") + out = _frontmatter( + title=f"{server_name} — MCP server", + sidebar_label="Overview", + description=(server.get("instructions") or "").splitlines()[0] + or f"Auto-generated docs for the {server_name} MCP server.", + ) + out += f"# `{server_name}`\n\n" + if version := server.get("version"): + out += f"**Version:** `{version}` \n" + if proto := server.get("protocol_version") or server.get("fastmcp_version"): + out += f"**FastMCP version:** `{proto}` \n" + out += "\n" + if instructions := server.get("instructions"): + out += instructions.strip() + "\n\n" + out += "## Contents\n\n" + counts = { + "tools": len(report.get("tools") or []), + "resources": (len(report.get("resources") or []) + len(report.get("templates") or [])), + "prompts": len(report.get("prompts") or []), + } + out += ( + f"- [Tools]({'./tools'}) — {counts['tools']}\n" + f"- [Resources]({'./resources'}) — {counts['resources']}\n" + f"- [Prompts]({'./prompts'}) — {counts['prompts']}\n\n" + ) + out += ( + "> These pages are generated from the live `fastmcp inspect` report. " + "Regenerate with `poe mcp-docs-md`.\n" + ) + return out + + +def _render_tools_page(report: dict[str, Any]) -> str: + """Render the tools page.""" + tools = report.get("tools") or [] + out = _frontmatter( + title="Tools", + sidebar_label="Tools", + description=f"All {len(tools)} MCP tools exposed by this server.", + ) + out += "# Tools\n\n" + if not tools: + out += "_No tools are exposed by this server._\n" + return out + out += f"This server exposes **{len(tools)}** tool(s).\n\n" + out += "**Index:** " + out += ", ".join(f"[`{t['name']}`](#{t['name']})" for t in tools) + "\n\n" + for tool in tools: + out += _render_tool(tool) + return out + + +def _render_resources_page(report: dict[str, Any]) -> str: + """Render the resources + resource-templates page.""" + resources = report.get("resources") or [] + templates = report.get("templates") or [] + total = len(resources) + len(templates) + out = _frontmatter( + title="Resources", + sidebar_label="Resources", + description=f"All {total} MCP resource(s) and resource template(s).", + ) + out += "# Resources\n\n" + if not resources and not templates: + out += "_No resources or resource templates are exposed by this server._\n" + return out + if resources: + out += f"## Concrete resources ({len(resources)})\n\n" + for resource in resources: + out += _render_resource(resource) + if templates: + out += f"## Resource templates ({len(templates)})\n\n" + for template in templates: + out += _render_resource(template) + return out + + +def _render_prompts_page(report: dict[str, Any]) -> str: + """Render the prompts page.""" + prompts = report.get("prompts") or [] + out = _frontmatter( + title="Prompts", + sidebar_label="Prompts", + description=f"All {len(prompts)} MCP prompt(s).", + ) + out += "# Prompts\n\n" + if not prompts: + out += "_No prompts are exposed by this server._\n" + return out + out += f"This server exposes **{len(prompts)}** prompt(s).\n\n" + for prompt in prompts: + out += _render_prompt(prompt) + return out + + +def generate(server_spec: str, output: Path) -> None: + """Run `fastmcp inspect`, render Markdown, and write files to `output/`.""" + with tempfile.TemporaryDirectory() as tmp: + report_path = Path(tmp) / "mcp-inspect.json" + print(f"Running `fastmcp inspect {server_spec}`...") + report = _run_fastmcp_inspect(server_spec, report_path) + + if output.exists(): + shutil.rmtree(output) + output.mkdir(parents=True, exist_ok=True) + + pages: dict[str, str] = { + "index.md": _render_index(report), + "tools.md": _render_tools_page(report), + "resources.md": _render_resources_page(report), + "prompts.md": _render_prompts_page(report), + } + for name, content in pages.items(): + (output / name).write_text(content) + print(f" wrote {output / name}") + + print( + f"Done. {len(report.get('tools') or [])} tool(s), " + f"{len(report.get('resources') or []) + len(report.get('templates') or [])} " + f"resource(s), {len(report.get('prompts') or [])} prompt(s) documented." + ) + + +def main() -> int: + """CLI entrypoint for the Markdown MCP docs generator.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--server-spec", + default=DEFAULT_SERVER_SPEC, + help=("FastMCP server spec to inspect, e.g. " f"'{DEFAULT_SERVER_SPEC}' (default)."), + ) + parser.add_argument( + "--output", + type=Path, + default=DEFAULT_OUTPUT, + help=f"Output directory for generated Markdown (default: {DEFAULT_OUTPUT}).", + ) + args = parser.parse_args() + try: + generate(server_spec=args.server_spec, output=args.output) + except (subprocess.CalledProcessError, RuntimeError) as ex: + print(f"MCP docs generation failed: {ex}", file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) From 214bec67f90076e25a565d3d62a65c064346baa1 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 07:11:46 +0000 Subject: [PATCH 02/10] chore(mcp-docs-md): address CodeRabbit nits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - UTF-8 encoding on file I/O (Windows locale safety) - json.dumps(v) instead of repr(v) for enum values (JSON-shaped output) - Split 'FastMCP version' into 'Protocol version' + 'FastMCP version' lines - Fix `{'./tools'}` no-op f-string → explicit `./tools.md` links - Add minimal safety guard refusing to rmtree '/', HOME, or CWD --- scripts/generate_mcp_markdown.py | 53 +++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index 8633ebafe..e9b077ae0 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -73,7 +73,7 @@ def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]: ], check=True, ) - return json.loads(report_path.read_text()) + return json.loads(report_path.read_text(encoding="utf-8")) def _fmt_type(schema: dict[str, Any]) -> str: @@ -82,7 +82,7 @@ def _fmt_type(schema: dict[str, Any]) -> str: if key in schema: return " | ".join(_fmt_type(s) for s in schema[key]) if "enum" in schema: - return "enum(" + ", ".join(repr(v) for v in schema["enum"]) + ")" + return "enum(" + ", ".join(json.dumps(v) for v in schema["enum"]) + ")" t = schema.get("type") if t == "array": items = schema.get("items", {}) @@ -207,9 +207,8 @@ def _render_prompt(prompt: dict[str, Any]) -> str: ) for arg in args: desc = _escape_table_cell(arg.get("description", "")) - parts.append( - f"| `{arg['name']}` | " f"{'yes' if arg.get('required') else 'no'} | " f"{desc} |\n" - ) + required = "yes" if arg.get("required") else "no" + parts.append(f"| `{arg['name']}` | {required} | {desc} |\n") parts.append("\n") else: parts.append("_No arguments._\n\n") @@ -229,8 +228,10 @@ def _render_index(report: dict[str, Any]) -> str: out += f"# `{server_name}`\n\n" if version := server.get("version"): out += f"**Version:** `{version}` \n" - if proto := server.get("protocol_version") or server.get("fastmcp_version"): - out += f"**FastMCP version:** `{proto}` \n" + if proto := server.get("protocol_version"): + out += f"**Protocol version:** `{proto}` \n" + if fastmcp_version := server.get("fastmcp_version"): + out += f"**FastMCP version:** `{fastmcp_version}` \n" out += "\n" if instructions := server.get("instructions"): out += instructions.strip() + "\n\n" @@ -241,9 +242,9 @@ def _render_index(report: dict[str, Any]) -> str: "prompts": len(report.get("prompts") or []), } out += ( - f"- [Tools]({'./tools'}) — {counts['tools']}\n" - f"- [Resources]({'./resources'}) — {counts['resources']}\n" - f"- [Prompts]({'./prompts'}) — {counts['prompts']}\n\n" + f"- [Tools](./tools.md) — {counts['tools']}\n" + f"- [Resources](./resources.md) — {counts['resources']}\n" + f"- [Prompts](./prompts.md) — {counts['prompts']}\n\n" ) out += ( "> These pages are generated from the live `fastmcp inspect` report. " @@ -315,6 +316,30 @@ def _render_prompts_page(report: dict[str, Any]) -> str: return out +# Paths we refuse to `rmtree` even if the user passes them as --output, to +# avoid cases like `--output /` or `--output $HOME` accidentally nuking data. +_FORBIDDEN_OUTPUT_PATHS = frozenset( + { + Path("/"), + Path.home(), + Path.cwd(), + } +) + + +def _prepare_output_dir(output: Path) -> None: + """Reset (or create) an output directory, with a minimal safety guard.""" + resolved = output.resolve() + if resolved in {p.resolve() for p in _FORBIDDEN_OUTPUT_PATHS}: + raise RuntimeError( + f"Refusing to rmtree suspicious output path {resolved}. " + "Pass --output pointing at a dedicated subdirectory." + ) + if output.exists(): + shutil.rmtree(output) + output.mkdir(parents=True, exist_ok=True) + + def generate(server_spec: str, output: Path) -> None: """Run `fastmcp inspect`, render Markdown, and write files to `output/`.""" with tempfile.TemporaryDirectory() as tmp: @@ -322,9 +347,7 @@ def generate(server_spec: str, output: Path) -> None: print(f"Running `fastmcp inspect {server_spec}`...") report = _run_fastmcp_inspect(server_spec, report_path) - if output.exists(): - shutil.rmtree(output) - output.mkdir(parents=True, exist_ok=True) + _prepare_output_dir(output) pages: dict[str, str] = { "index.md": _render_index(report), @@ -333,7 +356,7 @@ def generate(server_spec: str, output: Path) -> None: "prompts.md": _render_prompts_page(report), } for name, content in pages.items(): - (output / name).write_text(content) + (output / name).write_text(content, encoding="utf-8") print(f" wrote {output / name}") print( @@ -349,7 +372,7 @@ def main() -> int: parser.add_argument( "--server-spec", default=DEFAULT_SERVER_SPEC, - help=("FastMCP server spec to inspect, e.g. " f"'{DEFAULT_SERVER_SPEC}' (default)."), + help=f"FastMCP server spec to inspect, e.g. '{DEFAULT_SERVER_SPEC}' (default).", ) parser.add_argument( "--output", From 62af4956ce79fc0c64f6c9064c8c5d7f7e14f8ea Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 07:35:41 +0000 Subject: [PATCH 03/10] refactor(mcp-docs-md): group output by module + pdoc3 include --- airbyte/mcp/cloud.py | 5 +- airbyte/mcp/local.py | 5 +- airbyte/mcp/prompts.py | 2 + airbyte/mcp/registry.py | 5 +- scripts/generate_mcp_markdown.py | 325 +++++++++++++++++++++---------- 5 files changed, 239 insertions(+), 103 deletions(-) diff --git a/airbyte/mcp/cloud.py b/airbyte/mcp/cloud.py index 594bee0db..52859def9 100644 --- a/airbyte/mcp/cloud.py +++ b/airbyte/mcp/cloud.py @@ -1,5 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -"""Airbyte Cloud MCP operations.""" +"""Airbyte Cloud MCP operations. + +.. include:: ../../docs/mcp-generated/cloud.md +""" from pathlib import Path from typing import Annotated, Any, Literal, cast diff --git a/airbyte/mcp/local.py b/airbyte/mcp/local.py index d9bbb801f..4b27b121d 100644 --- a/airbyte/mcp/local.py +++ b/airbyte/mcp/local.py @@ -1,5 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -"""Local MCP operations.""" +"""Local MCP operations. + +.. include:: ../../docs/mcp-generated/local.md +""" import sys import traceback diff --git a/airbyte/mcp/prompts.py b/airbyte/mcp/prompts.py index 47993aef6..37767a895 100644 --- a/airbyte/mcp/prompts.py +++ b/airbyte/mcp/prompts.py @@ -3,6 +3,8 @@ This module defines prompts that can be invoked by MCP clients to perform common workflows. + +.. include:: ../../docs/mcp-generated/prompts.md """ from __future__ import annotations diff --git a/airbyte/mcp/registry.py b/airbyte/mcp/registry.py index 77b44f061..999c6b0e6 100644 --- a/airbyte/mcp/registry.py +++ b/airbyte/mcp/registry.py @@ -1,5 +1,8 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. -"""Airbyte Cloud MCP operations.""" +"""Airbyte connector registry MCP operations. + +.. include:: ../../docs/mcp-generated/registry.md +""" # Note: Deferred type evaluation must be avoided due to FastMCP/Pydantic needing # types to be available at import time for tool registration. diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index e9b077ae0..344e4f495 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -4,25 +4,51 @@ Runs `fastmcp inspect` against `airbyte.mcp.server:app` to obtain the full FastMCP protocol surface (tools, resources, resource templates, prompts) as a -JSON report, then renders it into a small set of Markdown files under -`docs/mcp-generated/`. +JSON report, then renders it into one Markdown file **per MCP module** under +`docs/mcp-generated/`, plus an `index.md` overview. + +The per-module grouping uses the `mcp_module` annotation that +`fastmcp_extensions.mcp_tool` attaches to every registered tool (derived from +the Python file the tool is defined in — e.g. tools in `airbyte/mcp/cloud.py` +get `mcp_module="cloud"`). Prompts and resources fall back to `meta.mcp_module` +when present, and otherwise to an import-based lookup against +`fastmcp_extensions.decorators._REGISTERED_*`; anything still unresolved lands +in `misc.md`. + +Inside each module file, content is grouped by primitive with L2 headings: + +``` +# airbyte.mcp.cloud + +## Tools +### `deploy_source_to_cloud` +... +## Prompts +### `some_prompt` +... +## Resources +### `some_resource` +... +``` The output is designed to be: +- **`pdoc`/`pdoc3`-includable**: each `.md` is a self-contained body + intended to be spliced into the corresponding Python module's docstring via + pdoc's `.. include::` directive, so the generated tool docs render inline on + the module's pdoc page. - **Docusaurus-hostable**: each file starts with YAML front-matter (`title`, `sidebar_label`, `description`); the body is plain CommonMark + GFM tables + `
` blocks for collapsible JSON schemas. No MDX-only components are used. -- **`pdoc3`-compatible**: standard Markdown that renders correctly alongside - the existing `pdoc3` output in `docs/generated/` without any special config. -- **Deep-linkable**: every tool/resource/prompt name is an H2 with a stable - slug anchor (e.g. `tools.md#list_connectors`). +- **Deep-linkable**: every tool/resource/prompt name is an H3 with a stable + slug anchor (e.g. `cloud.md#deploy_source_to_cloud`). -Formatting is deliberately modeled on the +Formatting is modeled on the [`mcpdocs-gen`](https://github.com/smytsyk/mcpdocs) static HTML output — same -sections, same per-tool shape (description → parameters table → JSON schema) — -but emitted as Markdown rather than HTML so it can slot into an existing docs -site. +per-tool shape (description → parameters table → JSON schema) — but emitted as +Markdown, one file per MCP module, so the output can slot into both +`pdoc`-rendered per-module pages and an external Docusaurus site. Usage: @@ -40,17 +66,20 @@ from __future__ import annotations import argparse +import importlib import json import shutil import subprocess import sys import tempfile +from collections import OrderedDict from pathlib import Path from typing import Any DEFAULT_OUTPUT = Path("docs/mcp-generated") DEFAULT_SERVER_SPEC = "airbyte/mcp/server.py:app" +MISC_MODULE = "misc" def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]: @@ -76,6 +105,66 @@ def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]: return json.loads(report_path.read_text(encoding="utf-8")) +def _resolve_extra_module_map(server_spec: str) -> dict[str, str]: + """Best-effort import-based lookup of `mcp_module` for prompts/resources. + + `fastmcp_extensions`'s `mcp_tool` decorator embeds `mcp_module` in the MCP + tool `annotations` dict, which the inspect JSON surfaces directly. But + `mcp_prompt` and `mcp_resource` store `mcp_module` on the library's + internal `_REGISTERED_*` lists only — it is not re-emitted as an MCP + annotation, so it doesn't appear in the inspect JSON. + + To still recover that information, we import the server module and read + those internal lists. If that fails (not a `fastmcp_extensions`-based + server, import errors, etc.), we silently return an empty map and the + caller falls back to `MISC_MODULE`. + + Returns a map of `name/uri -> mcp_module` covering both prompts and + resources. + """ + file_part = server_spec.split(":", 1)[0] + module_name = file_part.removesuffix(".py").replace("/", ".") + mapping: dict[str, str] = {} + try: + importlib.import_module(module_name) + # Import private lists from fastmcp_extensions: these are the only + # place `mcp_module` is recorded for prompts/resources, so we accept + # the private-name coupling. + from fastmcp_extensions.decorators import ( # noqa: PLC0415 + _REGISTERED_PROMPTS, # noqa: PLC2701 + _REGISTERED_RESOURCES, # noqa: PLC2701 + ) + except Exception: + return mapping + for _fn, ann in _REGISTERED_PROMPTS: + if name := ann.get("name"): + mapping[name] = ann.get("mcp_module") or MISC_MODULE + for _fn, ann in _REGISTERED_RESOURCES: + mcp_module = ann.get("mcp_module") or MISC_MODULE + if uri := ann.get("uri"): + mapping[uri] = mcp_module + # FastMCP exposes the URI stem as the resource `name` in inspect + # output; index by that too so lookup by either key works. + mapping[uri.rsplit("/", 1)[-1]] = mcp_module + return mapping + + +def _get_module(item: dict[str, Any], fallback_map: dict[str, str]) -> str: + """Extract the `mcp_module` for a tool / resource / prompt.""" + annotations = item.get("annotations") or {} + if mcp_module := annotations.get("mcp_module"): + return str(mcp_module) + meta = item.get("meta") or {} + if mcp_module := meta.get("mcp_module"): + return str(mcp_module) + name = item.get("name") + uri = item.get("uri") or item.get("uri_template") + for key in (name, uri): + if key and key in fallback_map: + return fallback_map[key] + return MISC_MODULE + + def _fmt_type(schema: dict[str, Any]) -> str: """Render a JSON-schema fragment as a short, human-readable type string.""" for key in ("anyOf", "oneOf"): @@ -151,16 +240,18 @@ def _render_parameters_table(input_schema: dict[str, Any]) -> str: def _render_tool(tool: dict[str, Any]) -> str: - """Render a single tool as a Markdown section.""" + """Render a single tool as L3 under its module's `## Tools` section.""" name = tool["name"] - parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"] + # HTML anchor + heading (instead of Pandoc `{#...}` attr syntax, which + # renders as literal text in pdoc3's markdown processor). + parts: list[str] = [f'\n\n### `{name}`\n\n'] if description := tool.get("description"): parts.append(description.strip() + "\n\n") if tags := tool.get("tags"): parts.append("**Tags:** " + ", ".join(f"`{t}`" for t in tags) + "\n\n") parts.extend( [ - "### Parameters\n\n", + "#### Parameters\n\n", _render_parameters_table(tool.get("input_schema") or {}), ] ) @@ -172,9 +263,9 @@ def _render_tool(tool: dict[str, Any]) -> str: def _render_resource(resource: dict[str, Any]) -> str: - """Render a single resource as a Markdown section.""" + """Render a single resource as L3 under its module's `## Resources` section.""" name = resource["name"] - parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"] + parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"] if description := resource.get("description"): parts.append(description.strip() + "\n\n") meta_lines: list[str] = [] @@ -192,16 +283,16 @@ def _render_resource(resource: dict[str, Any]) -> str: def _render_prompt(prompt: dict[str, Any]) -> str: - """Render a single prompt as a Markdown section.""" + """Render a single prompt as L3 under its module's `## Prompts` section.""" name = prompt["name"] - parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"] + parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"] if description := prompt.get("description"): parts.append(description.strip() + "\n\n") args = prompt.get("arguments") or [] if args: parts.extend( [ - "### Arguments\n\n", + "#### Arguments\n\n", "| Name | Required | Description |\n| --- | --- | --- |\n", ] ) @@ -215,7 +306,94 @@ def _render_prompt(prompt: dict[str, Any]) -> str: return "".join(parts) -def _render_index(report: dict[str, Any]) -> str: +# ----------------------------------------------------------------------------- +# Bucketing + per-module pages +# ----------------------------------------------------------------------------- + + +class _ModuleBucket: + """Accumulator for a single mcp_module's tools / prompts / resources.""" + + def __init__(self, name: str) -> None: + """Create an empty bucket for the given mcp_module name.""" + self.name = name + self.tools: list[dict[str, Any]] = [] + self.prompts: list[dict[str, Any]] = [] + self.resources: list[dict[str, Any]] = [] # concrete + templates + + @property + def total(self) -> int: + """Total count of MCP primitives in this bucket.""" + return len(self.tools) + len(self.prompts) + len(self.resources) + + +def _bucket_by_module( + report: dict[str, Any], + fallback_map: dict[str, str], +) -> OrderedDict[str, _ModuleBucket]: + """Group report items by mcp_module, preserving first-seen order.""" + buckets: OrderedDict[str, _ModuleBucket] = OrderedDict() + + def get(mcp_module: str) -> _ModuleBucket: + if mcp_module not in buckets: + buckets[mcp_module] = _ModuleBucket(mcp_module) + return buckets[mcp_module] + + for tool in report.get("tools") or []: + get(_get_module(tool, fallback_map)).tools.append(tool) + for prompt in report.get("prompts") or []: + get(_get_module(prompt, fallback_map)).prompts.append(prompt) + for resource in report.get("resources") or []: + get(_get_module(resource, fallback_map)).resources.append(resource) + for template in report.get("templates") or []: + get(_get_module(template, fallback_map)).resources.append(template) + + return buckets + + +def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str: + """Render a single `.md` page with L2 Tools/Prompts/Resources sections. + + No YAML front-matter is emitted on module pages: these files are consumed + by pdoc3 via the `.. include::` directive (pdoc's Markdown renderer does + not strip front-matter and would emit it as body text). Docusaurus infers + the page title from the first H1, which we always emit here. + """ + parts: list[str] = [ + f"# `{bucket.name}` module\n\n", + ( + f"MCP primitives registered by the `{bucket.name}` module " + f"of the `{server_name}` server: " + f"**{len(bucket.tools)}** tool(s), " + f"**{len(bucket.prompts)}** prompt(s), " + f"**{len(bucket.resources)}** resource(s).\n\n" + ), + ] + if bucket.tools: + parts.extend( + [ + f"## Tools ({len(bucket.tools)})\n\n", + ( + "**Index:** " + + ", ".join(f"[`{t['name']}`](#{t['name']})" for t in bucket.tools) + + "\n\n" + ), + ] + ) + parts.extend(_render_tool(tool) for tool in bucket.tools) + if bucket.prompts: + parts.append(f"## Prompts ({len(bucket.prompts)})\n\n") + parts.extend(_render_prompt(prompt) for prompt in bucket.prompts) + if bucket.resources: + parts.append(f"## Resources ({len(bucket.resources)})\n\n") + parts.extend(_render_resource(resource) for resource in bucket.resources) + return "".join(parts) + + +def _render_index( + report: dict[str, Any], + buckets: OrderedDict[str, _ModuleBucket], +) -> str: """Render the top-level overview page.""" server = report.get("server") or {} server_name = server.get("name", "mcp-server") @@ -235,17 +413,24 @@ def _render_index(report: dict[str, Any]) -> str: out += "\n" if instructions := server.get("instructions"): out += instructions.strip() + "\n\n" - out += "## Contents\n\n" - counts = { - "tools": len(report.get("tools") or []), - "resources": (len(report.get("resources") or []) + len(report.get("templates") or [])), - "prompts": len(report.get("prompts") or []), - } + total_tools = sum(len(b.tools) for b in buckets.values()) + total_prompts = sum(len(b.prompts) for b in buckets.values()) + total_resources = sum(len(b.resources) for b in buckets.values()) out += ( - f"- [Tools](./tools.md) — {counts['tools']}\n" - f"- [Resources](./resources.md) — {counts['resources']}\n" - f"- [Prompts](./prompts.md) — {counts['prompts']}\n\n" + "## Totals\n\n" + f"- **Tools:** {total_tools}\n" + f"- **Prompts:** {total_prompts}\n" + f"- **Resources:** {total_resources}\n\n" ) + out += "## Modules\n\n" + out += "| Module | Tools | Prompts | Resources |\n" + out += "| --- | ---: | ---: | ---: |\n" + for name, bucket in buckets.items(): + out += ( + f"| [`{name}`](./{name}.md) | {len(bucket.tools)} | " + f"{len(bucket.prompts)} | {len(bucket.resources)} |\n" + ) + out += "\n" out += ( "> These pages are generated from the live `fastmcp inspect` report. " "Regenerate with `poe mcp-docs-md`.\n" @@ -253,69 +438,6 @@ def _render_index(report: dict[str, Any]) -> str: return out -def _render_tools_page(report: dict[str, Any]) -> str: - """Render the tools page.""" - tools = report.get("tools") or [] - out = _frontmatter( - title="Tools", - sidebar_label="Tools", - description=f"All {len(tools)} MCP tools exposed by this server.", - ) - out += "# Tools\n\n" - if not tools: - out += "_No tools are exposed by this server._\n" - return out - out += f"This server exposes **{len(tools)}** tool(s).\n\n" - out += "**Index:** " - out += ", ".join(f"[`{t['name']}`](#{t['name']})" for t in tools) + "\n\n" - for tool in tools: - out += _render_tool(tool) - return out - - -def _render_resources_page(report: dict[str, Any]) -> str: - """Render the resources + resource-templates page.""" - resources = report.get("resources") or [] - templates = report.get("templates") or [] - total = len(resources) + len(templates) - out = _frontmatter( - title="Resources", - sidebar_label="Resources", - description=f"All {total} MCP resource(s) and resource template(s).", - ) - out += "# Resources\n\n" - if not resources and not templates: - out += "_No resources or resource templates are exposed by this server._\n" - return out - if resources: - out += f"## Concrete resources ({len(resources)})\n\n" - for resource in resources: - out += _render_resource(resource) - if templates: - out += f"## Resource templates ({len(templates)})\n\n" - for template in templates: - out += _render_resource(template) - return out - - -def _render_prompts_page(report: dict[str, Any]) -> str: - """Render the prompts page.""" - prompts = report.get("prompts") or [] - out = _frontmatter( - title="Prompts", - sidebar_label="Prompts", - description=f"All {len(prompts)} MCP prompt(s).", - ) - out += "# Prompts\n\n" - if not prompts: - out += "_No prompts are exposed by this server._\n" - return out - out += f"This server exposes **{len(prompts)}** prompt(s).\n\n" - for prompt in prompts: - out += _render_prompt(prompt) - return out - - # Paths we refuse to `rmtree` even if the user passes them as --output, to # avoid cases like `--output /` or `--output $HOME` accidentally nuking data. _FORBIDDEN_OUTPUT_PATHS = frozenset( @@ -347,22 +469,25 @@ def generate(server_spec: str, output: Path) -> None: print(f"Running `fastmcp inspect {server_spec}`...") report = _run_fastmcp_inspect(server_spec, report_path) + fallback_map = _resolve_extra_module_map(server_spec) + buckets = _bucket_by_module(report, fallback_map) + _prepare_output_dir(output) - pages: dict[str, str] = { - "index.md": _render_index(report), - "tools.md": _render_tools_page(report), - "resources.md": _render_resources_page(report), - "prompts.md": _render_prompts_page(report), - } + server_name = (report.get("server") or {}).get("name", "mcp-server") + pages: dict[str, str] = {"index.md": _render_index(report, buckets)} + for name, bucket in buckets.items(): + pages[f"{name}.md"] = _render_module_page(bucket, server_name) + for name, content in pages.items(): (output / name).write_text(content, encoding="utf-8") print(f" wrote {output / name}") print( - f"Done. {len(report.get('tools') or [])} tool(s), " - f"{len(report.get('resources') or []) + len(report.get('templates') or [])} " - f"resource(s), {len(report.get('prompts') or [])} prompt(s) documented." + f"Done. {len(buckets)} module(s) documented — " + f"{sum(len(b.tools) for b in buckets.values())} tool(s), " + f"{sum(len(b.resources) for b in buckets.values())} resource(s), " + f"{sum(len(b.prompts) for b in buckets.values())} prompt(s)." ) From 20e22a45c47b964ed6bf3523dc6550ecf0591628 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:13:54 +0000 Subject: [PATCH 04/10] fix(mcp-docs-md): plain-text headings + __all__=[] + pdoc toc depth=3 - Strip backticks from H1/H2/H3 in generated markdown; pdoc's TOC extractor was emitting an unbalanced tag in the sidebar that leaked through the page as monospace rendering. - Add __all__ = [] to airbyte/mcp/{cloud,local,registry,prompts}.py so pdoc hides the redundant Python-side tool declarations and uses the markdown include as the single source of truth on the page. - Bump pdoc markdown toc depth from 2 to 3 so per-tool H3 anchors show up in the left-nav. --- airbyte/mcp/cloud.py | 6 ++++++ airbyte/mcp/local.py | 6 ++++++ airbyte/mcp/prompts.py | 7 +++++++ airbyte/mcp/registry.py | 6 ++++++ docs/generate.py | 9 +++++++++ scripts/generate_mcp_markdown.py | 16 ++++++++++------ 6 files changed, 44 insertions(+), 6 deletions(-) diff --git a/airbyte/mcp/cloud.py b/airbyte/mcp/cloud.py index 52859def9..f8bb3bb7e 100644 --- a/airbyte/mcp/cloud.py +++ b/airbyte/mcp/cloud.py @@ -4,6 +4,12 @@ .. include:: ../../docs/mcp-generated/cloud.md """ +# No public Python API — MCP primitives are registered via decorators and +# documented via the generated Markdown include above. Setting `__all__` to an +# empty list tells pdoc (and other doc tools) not to surface the individual +# tool / helper definitions as a redundant "API Documentation" list. +__all__: list[str] = [] + from pathlib import Path from typing import Annotated, Any, Literal, cast diff --git a/airbyte/mcp/local.py b/airbyte/mcp/local.py index 4b27b121d..1f8ac752d 100644 --- a/airbyte/mcp/local.py +++ b/airbyte/mcp/local.py @@ -4,6 +4,12 @@ .. include:: ../../docs/mcp-generated/local.md """ +# No public Python API — MCP primitives are registered via decorators and +# documented via the generated Markdown include above. Setting `__all__` to an +# empty list tells pdoc (and other doc tools) not to surface the individual +# tool / helper definitions as a redundant "API Documentation" list. +__all__: list[str] = [] + import sys import traceback from itertools import islice diff --git a/airbyte/mcp/prompts.py b/airbyte/mcp/prompts.py index 37767a895..449f25f26 100644 --- a/airbyte/mcp/prompts.py +++ b/airbyte/mcp/prompts.py @@ -15,6 +15,13 @@ from pydantic import Field +# No public Python API — MCP primitives are registered via decorators and +# documented via the generated Markdown include above. Setting `__all__` to an +# empty list tells pdoc (and other doc tools) not to surface the individual +# tool / helper definitions as a redundant "API Documentation" list. +__all__: list[str] = [] + + if TYPE_CHECKING: from fastmcp import FastMCP diff --git a/airbyte/mcp/registry.py b/airbyte/mcp/registry.py index 999c6b0e6..e8ec451b4 100644 --- a/airbyte/mcp/registry.py +++ b/airbyte/mcp/registry.py @@ -4,6 +4,12 @@ .. include:: ../../docs/mcp-generated/registry.md """ +# No public Python API — MCP primitives are registered via decorators and +# documented via the generated Markdown include above. Setting `__all__` to an +# empty list tells pdoc (and other doc tools) not to surface the individual +# tool / helper definitions as a redundant "API Documentation" list. +__all__: list[str] = [] + # Note: Deferred type evaluation must be avoided due to FastMCP/Pydantic needing # types to be available at import time for tool registration. import contextlib diff --git a/docs/generate.py b/docs/generate.py index a3bfa91b9..d9d530a28 100755 --- a/docs/generate.py +++ b/docs/generate.py @@ -14,6 +14,7 @@ import shutil import pdoc +import pdoc.render_helpers def run() -> None: @@ -24,6 +25,14 @@ def run() -> None: if pathlib.Path("docs/generated").exists(): shutil.rmtree("docs/generated") + # pdoc's default sidebar TOC depth is 2 (H1 + H2 only), which hides the + # per-tool H3 anchors produced by our MCP Markdown generator. Bump to 3 so + # individual tools / prompts / resources show up in the left nav. This + # monkey-patches the module-level `markdown_extensions` dict because pdoc + # 16's `configure()` does not expose markdown extension options. + # pyrefly: ignore[unsupported-operation] + pdoc.render_helpers.markdown_extensions["toc"] = {"depth": 3} + pdoc.render.configure( template_directory=pathlib.Path("docs/templates"), show_source=True, diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index 344e4f495..f94c646b0 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -242,9 +242,10 @@ def _render_parameters_table(input_schema: dict[str, Any]) -> str: def _render_tool(tool: dict[str, Any]) -> str: """Render a single tool as L3 under its module's `## Tools` section.""" name = tool["name"] - # HTML anchor + heading (instead of Pandoc `{#...}` attr syntax, which - # renders as literal text in pdoc3's markdown processor). - parts: list[str] = [f'\n\n### `{name}`\n\n'] + # Plain text in the heading (no backticks) so pdoc's TOC extractor + # produces a clean sidebar nav entry. The HTML anchor above the heading + # is what we deep-link to. + parts: list[str] = [f'\n\n### {name}\n\n'] if description := tool.get("description"): parts.append(description.strip() + "\n\n") if tags := tool.get("tags"): @@ -265,7 +266,7 @@ def _render_tool(tool: dict[str, Any]) -> str: def _render_resource(resource: dict[str, Any]) -> str: """Render a single resource as L3 under its module's `## Resources` section.""" name = resource["name"] - parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"] + parts: list[str] = [f'\n\n### {name}\n\n'] if description := resource.get("description"): parts.append(description.strip() + "\n\n") meta_lines: list[str] = [] @@ -285,7 +286,7 @@ def _render_resource(resource: dict[str, Any]) -> str: def _render_prompt(prompt: dict[str, Any]) -> str: """Render a single prompt as L3 under its module's `## Prompts` section.""" name = prompt["name"] - parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"] + parts: list[str] = [f'\n\n### {name}\n\n'] if description := prompt.get("description"): parts.append(description.strip() + "\n\n") args = prompt.get("arguments") or [] @@ -359,8 +360,11 @@ def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str: not strip front-matter and would emit it as body text). Docusaurus infers the page title from the first H1, which we always emit here. """ + # Headings are plain text (no backticks) so pdoc's TOC extractor yields + # clean nav entries; cosmetic backticks inside headings produced + # unbalanced `` tags in the generated TOC HTML. parts: list[str] = [ - f"# `{bucket.name}` module\n\n", + f"# {bucket.name} module\n\n", ( f"MCP primitives registered by the `{bucket.name}` module " f"of the `{server_name}` server: " From dbd39e7a52615896cf33af1a2f5ae34d7171bb02 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:41:16 +0000 Subject: [PATCH 05/10] fix(mcp-docs-md): indent nested TOC levels, drop inline tool index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - custom.css: progressively indent H3-and-deeper entries in pdoc's left sidebar TOC so per-tool anchors visually nest under the 'Tools (N)' H2 and the ' module' H1. pdoc's default layout.css uses a single indent step for all non-top-level entries, which made H2 and H3 render at the same depth. - generate_mcp_markdown.py: drop the inline 'Index: tool_a, tool_b, …' row from module pages. The left nav now lists every tool under its section, so the inline list was redundant. --- docs/templates/custom.css | 16 ++++++++++++++++ scripts/generate_mcp_markdown.py | 14 ++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/templates/custom.css b/docs/templates/custom.css index 541900423..17a1d7d0f 100644 --- a/docs/templates/custom.css +++ b/docs/templates/custom.css @@ -174,6 +174,22 @@ nav a:hover { color: var(--link-hover) !important; } +/* + * Progressively indent nested TOC levels in the sidebar. + * + * pdoc's default layout.css indents *all* non-top-level nav items by a single + * (pad + indent) step, which makes H2 and H3 entries render at the same visual + * depth. When the generated MCP Markdown uses H3 headings per-tool nested + * under an H2 "Tools" heading, we want the tool names to appear visibly + * nested under the section heading in the left nav. + */ +nav.pdoc > div > ul > li > ul > li > ul > li > a { + padding-left: calc(var(--pad) + (var(--indent) * 2)) !important; +} +nav.pdoc > div > ul > li > ul > li > ul > li > ul > li > a { + padding-left: calc(var(--pad) + (var(--indent) * 3)) !important; +} + /* Style badges and labels */ .badge { background-color: var(--color-green-40); diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index f94c646b0..80a31580e 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -374,16 +374,10 @@ def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str: ), ] if bucket.tools: - parts.extend( - [ - f"## Tools ({len(bucket.tools)})\n\n", - ( - "**Index:** " - + ", ".join(f"[`{t['name']}`](#{t['name']})" for t in bucket.tools) - + "\n\n" - ), - ] - ) + # The left-nav sidebar already lists every tool under this H2 via the + # TOC, so we intentionally omit the inline "Index: …" row that we used + # to emit here. + parts.append(f"## Tools ({len(bucket.tools)})\n\n") parts.extend(_render_tool(tool) for tool in bucket.tools) if bucket.prompts: parts.append(f"## Prompts ({len(bucket.prompts)})\n\n") From fcababf7fad5656451dccaf8fb27d2676bca8e2d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:47:31 +0000 Subject: [PATCH 06/10] fix(mcp-docs-md): address Copilot + Devin review comments - scripts/generate_mcp_markdown.py: refresh module docstring to match current behavior (per-module output, no front-matter on module pages, DEFAULT_SERVER_SPEC is a .py path not a dotted module). - scripts/generate_mcp_markdown.py: guard _render_index against empty instructions (splitlines()[0] raised IndexError). - scripts/generate_mcp_markdown.py: tighten _prepare_output_dir to require --output to be strictly inside the current working directory (rejects /, ~, .., and arbitrary absolute paths outside the repo). - docs/generate.py: regenerate docs/mcp-generated/ before pdoc so .. include:: directives resolve on a clean checkout (docs/mcp-generated is git-ignored). Falls back to a warning if generation fails. - docs/CONTRIBUTING.md: describe actual per-module output layout (index.md + cloud/local/registry/prompts/misc) and deep-link shape. --- docs/CONTRIBUTING.md | 41 +++++++++++-------- docs/generate.py | 48 ++++++++++++++++++++++ scripts/generate_mcp_markdown.py | 69 ++++++++++++++++++-------------- 3 files changed, 110 insertions(+), 48 deletions(-) diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index ab7619dec..0455e0c17 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -146,26 +146,33 @@ poe mcp-inspect # Show all available MCP tools and their schemas ### Generating Markdown docs for the MCP Server -The repo ships a small script that introspects the MCP server via -`fastmcp inspect` and emits a Markdown documentation site under -`docs/mcp-generated/` (git-ignored). The output is designed to be both -Docusaurus-hostable and `pdoc`-compatible — plain CommonMark with YAML -front-matter, no MDX-only components. +The repo ships a small script (`scripts/generate_mcp_markdown.py`) that +introspects the MCP server via `fastmcp inspect` and emits a Markdown +documentation site under `docs/mcp-generated/` (git-ignored). The output is +plain CommonMark with no MDX-only components, so it is both Docusaurus-hostable +and consumable by `pdoc` — the four `airbyte.mcp.{cloud,local,registry,prompts}` +modules pull their respective generated file in via pdoc's `.. include::` +directive, so `poe docs-generate` surfaces the generated tool docs on each +module's pdoc page alongside the regular `docs/generated/` output. ```bash uv sync --group dev poe mcp-docs-md ``` -Four files are produced: - -- `index.md` — server overview (name, version, instructions, counts) -- `tools.md` — one section per tool with a parameters table and collapsible - input/output JSON schemas -- `resources.md` — concrete resources + resource templates -- `prompts.md` — prompts and their arguments - -Each tool/resource/prompt has a stable slug anchor (e.g. -`tools.md#list_connectors`) so the pages can be deep-linked from Slack, issues, -or other docs. Regenerate after any change to MCP tool signatures, descriptions, -or schemas. The underlying script is at `scripts/generate_mcp_markdown.py`. +One Markdown file is produced per MCP module, plus an `index.md`. For the +PyAirbyte server that is: + +- `index.md` — server overview (name, version, instructions, totals, module table) +- `cloud.md` — tools registered by `airbyte.mcp.cloud` +- `local.md` — tools registered by `airbyte.mcp.local` +- `registry.md` — tools registered by `airbyte.mcp.registry` +- `prompts.md` — prompts registered by `airbyte.mcp.prompts` +- `misc.md` — anything without an `mcp_module` annotation (currently just the + `server_info` resource) + +Inside each module page, primitives are grouped by kind (`## Tools`, +`## Prompts`, `## Resources`), and each primitive has an HTML anchor +(``) above its H3 so links like +`cloud.md#deploy_source_to_cloud` resolve in both pdoc and Docusaurus. +Regenerate after any change to MCP tool signatures, descriptions, or schemas. diff --git a/docs/generate.py b/docs/generate.py index d9d530a28..6820f47de 100755 --- a/docs/generate.py +++ b/docs/generate.py @@ -12,15 +12,63 @@ import pathlib import shutil +import sys import pdoc import pdoc.render_helpers +def _regenerate_mcp_markdown() -> None: + """Regenerate `docs/mcp-generated/` before pdoc runs. + + The `airbyte.mcp.{cloud,local,registry,prompts}` modules pull the + per-module Markdown files from `docs/mcp-generated/` via pdoc's + `.. include::` directive. That directory is git-ignored, so on a clean + checkout pdoc would fail to resolve the include unless we regenerate it + here. Running the generator from inside `docs-generate` makes the full + docs build reproducible from a fresh clone (and matches the standalone + `poe mcp-docs-md` task). + + If generation fails (e.g. `fastmcp` is not installed, or the MCP server + import fails), we print a warning and continue: pdoc will still build, + and the include directive will just surface the missing file. + """ + script = pathlib.Path(__file__).parent.parent / "scripts" / "generate_mcp_markdown.py" + if not script.exists(): + print(f"[docs-generate] MCP markdown generator not found at {script}; skipping.") + return + # Import-and-call rather than subprocess so we share the current venv and + # surface tracebacks directly. The generator resolves paths relative to + # cwd, which `poe docs-generate` runs from the repo root. + sys.path.insert(0, str(script.parent)) + try: + from generate_mcp_markdown import ( # noqa: PLC0415 + DEFAULT_OUTPUT, + DEFAULT_SERVER_SPEC, + generate, + ) + + print("[docs-generate] Regenerating docs/mcp-generated/ ...") + generate(server_spec=DEFAULT_SERVER_SPEC, output=DEFAULT_OUTPUT) + except Exception as ex: + print( + f"[docs-generate] WARNING: failed to regenerate MCP Markdown docs: {ex}. " + "pdoc will continue, but module pages may show missing include warnings.", + file=sys.stderr, + ) + finally: + sys.path.pop(0) + + def run() -> None: """Generate docs for all public modules in PyAirbyte and save them to docs/generated.""" public_modules = ["airbyte", "airbyte/cli/pyab.py"] + # Regenerate MCP Markdown first so the `.. include::` directives in the + # MCP module docstrings resolve on a clean checkout (docs/mcp-generated/ + # is git-ignored). + _regenerate_mcp_markdown() + # recursively delete the docs/generated folder if it exists if pathlib.Path("docs/generated").exists(): shutil.rmtree("docs/generated") diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index 80a31580e..35bd99db6 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -2,9 +2,10 @@ # Copyright (c) 2026 Airbyte, Inc., all rights reserved. """Generate Markdown documentation for the PyAirbyte MCP server. -Runs `fastmcp inspect` against `airbyte.mcp.server:app` to obtain the full -FastMCP protocol surface (tools, resources, resource templates, prompts) as a -JSON report, then renders it into one Markdown file **per MCP module** under +Runs `fastmcp inspect` against the default `airbyte/mcp/server.py:app` spec +(override with `--server-spec`) to obtain the full FastMCP protocol surface +(tools, resources, resource templates, prompts) as a JSON report, then +renders it into one Markdown file **per MCP module** under `docs/mcp-generated/`, plus an `index.md` overview. The per-module grouping uses the `mcp_module` annotation that @@ -18,16 +19,16 @@ Inside each module file, content is grouped by primitive with L2 headings: ``` -# airbyte.mcp.cloud +# cloud module -## Tools -### `deploy_source_to_cloud` +## Tools (35) +### deploy_source_to_cloud ... -## Prompts -### `some_prompt` +## Prompts (N) +### some_prompt ... -## Resources -### `some_resource` +## Resources (N) +### some_resource ... ``` @@ -36,13 +37,17 @@ - **`pdoc`/`pdoc3`-includable**: each `.md` is a self-contained body intended to be spliced into the corresponding Python module's docstring via pdoc's `.. include::` directive, so the generated tool docs render inline on - the module's pdoc page. -- **Docusaurus-hostable**: each file starts with YAML front-matter (`title`, - `sidebar_label`, `description`); the body is plain CommonMark + GFM tables + + the module's pdoc page. Per-module pages intentionally emit **no** YAML + front-matter (pdoc's Markdown renderer would surface it as body text); + only `index.md` carries front-matter. +- **Docusaurus-hostable**: `index.md` starts with YAML front-matter (`title`, + `sidebar_label`, `description`); module pages rely on Docusaurus' + first-H1-as-title inference. The body is plain CommonMark + GFM tables + `
` blocks for collapsible JSON schemas. No MDX-only components are used. -- **Deep-linkable**: every tool/resource/prompt name is an H3 with a stable - slug anchor (e.g. `cloud.md#deploy_source_to_cloud`). +- **Deep-linkable**: every tool/resource/prompt name gets an HTML anchor + (``) above its H3, so links like + `cloud.md#deploy_source_to_cloud` resolve in both pdoc and Docusaurus. Formatting is modeled on the [`mcpdocs-gen`](https://github.com/smytsyk/mcpdocs) static HTML output — same @@ -395,10 +400,12 @@ def _render_index( """Render the top-level overview page.""" server = report.get("server") or {} server_name = server.get("name", "mcp-server") + # `splitlines()` on an empty string returns `[]`, so we can't index [0]. + first_instruction_line = next(iter((server.get("instructions") or "").splitlines()), "") out = _frontmatter( title=f"{server_name} — MCP server", sidebar_label="Overview", - description=(server.get("instructions") or "").splitlines()[0] + description=first_instruction_line or f"Auto-generated docs for the {server_name} MCP server.", ) out += f"# `{server_name}`\n\n" @@ -436,24 +443,24 @@ def _render_index( return out -# Paths we refuse to `rmtree` even if the user passes them as --output, to -# avoid cases like `--output /` or `--output $HOME` accidentally nuking data. -_FORBIDDEN_OUTPUT_PATHS = frozenset( - { - Path("/"), - Path.home(), - Path.cwd(), - } -) - - def _prepare_output_dir(output: Path) -> None: - """Reset (or create) an output directory, with a minimal safety guard.""" + """Reset (or create) an output directory, with a strict safety guard. + + The script unconditionally `rmtree`s `output` before regenerating, so we + need to be careful about what callers can point `--output` at. We require + the resolved output path to live **strictly inside** the current working + directory (typically the repo root) — this rules out `/`, `$HOME`, + `--output ..`, and any absolute path outside the repo, while still + letting the default `docs/mcp-generated/` work. The cwd itself is also + rejected so we never nuke the whole repo. + """ resolved = output.resolve() - if resolved in {p.resolve() for p in _FORBIDDEN_OUTPUT_PATHS}: + cwd = Path.cwd().resolve() + if resolved == cwd or not resolved.is_relative_to(cwd): raise RuntimeError( - f"Refusing to rmtree suspicious output path {resolved}. " - "Pass --output pointing at a dedicated subdirectory." + f"Refusing to rmtree output path {resolved}: must be a dedicated " + f"subdirectory strictly inside the current working directory " + f"({cwd}). Pass --output pointing at e.g. `docs/mcp-generated`." ) if output.exists(): shutil.rmtree(output) From 2cf273960a29b5f901102b1968ffab292e6469ef Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:49:19 +0000 Subject: [PATCH 07/10] fix(docs-generate): load MCP markdown generator via importlib to satisfy deptry The previous static 'from generate_mcp_markdown import ...' triggered deptry's DEP001 rule (the script lives under scripts/ which is not on sys.path, so deptry treated it as a missing external dependency). Use importlib.util.spec_from_file_location to load the module from its on-disk path instead. --- docs/generate.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/docs/generate.py b/docs/generate.py index 6820f47de..8990ebfb3 100755 --- a/docs/generate.py +++ b/docs/generate.py @@ -10,6 +10,7 @@ from __future__ import annotations +import importlib.util import pathlib import shutil import sys @@ -32,32 +33,35 @@ def _regenerate_mcp_markdown() -> None: If generation fails (e.g. `fastmcp` is not installed, or the MCP server import fails), we print a warning and continue: pdoc will still build, and the include directive will just surface the missing file. + + We load the generator via `importlib.util` from its on-disk path rather + than a plain `from generate_mcp_markdown import ...`: the generator + lives under `scripts/` (not on `sys.path`), and a static import would + also trip `deptry` into flagging `generate_mcp_markdown` as a missing + external dependency. """ script = pathlib.Path(__file__).parent.parent / "scripts" / "generate_mcp_markdown.py" if not script.exists(): print(f"[docs-generate] MCP markdown generator not found at {script}; skipping.") return - # Import-and-call rather than subprocess so we share the current venv and - # surface tracebacks directly. The generator resolves paths relative to - # cwd, which `poe docs-generate` runs from the repo root. - sys.path.insert(0, str(script.parent)) try: - from generate_mcp_markdown import ( # noqa: PLC0415 - DEFAULT_OUTPUT, - DEFAULT_SERVER_SPEC, - generate, - ) - + spec = importlib.util.spec_from_file_location("_mcp_markdown_gen", script) + if spec is None or spec.loader is None: + msg = f"Could not load spec for {script}" + raise RuntimeError(msg) # noqa: TRY301 + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) print("[docs-generate] Regenerating docs/mcp-generated/ ...") - generate(server_spec=DEFAULT_SERVER_SPEC, output=DEFAULT_OUTPUT) + module.generate( + server_spec=module.DEFAULT_SERVER_SPEC, + output=module.DEFAULT_OUTPUT, + ) except Exception as ex: print( f"[docs-generate] WARNING: failed to regenerate MCP Markdown docs: {ex}. " "pdoc will continue, but module pages may show missing include warnings.", file=sys.stderr, ) - finally: - sys.path.pop(0) def run() -> None: From dffeaefc62bc308ccae9cca96404624f813e2930 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 09:19:48 +0000 Subject: [PATCH 08/10] feat(mcp-docs-md): render MCP tool-annotation hints + alpha-sort primitives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every tool / prompt / resource is now rendered in a stable alphabetical order inside each module page (case-insensitive sort by name/uri), and the 'misc' catch-all module is pinned last in the module table. Module order on the index is alphabetical. For each tool we now surface MCP tool-annotation hints as inline-code badges right below the H3 — 'read-only', 'destructive', 'idempotent', 'open-world'. Hints are only rendered when explicitly True, so a tool like 'list_cloud_workspaces' shows '`read-only` · `idempotent` · `open-world`' while 'permanently_delete_cloud_connection' shows '`destructive` · `open-world`'. An optional human-readable 'annotations.title' override (distinct from the top-level title) is also surfaced when present. --- scripts/generate_mcp_markdown.py | 56 +++++++++++++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index 35bd99db6..11b88d5df 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -221,6 +221,41 @@ def _json_block(label: str, obj: Any) -> str: # noqa: ANN401 ) +# MCP tool annotation hints (per the MCP spec): we render a badge for every +# hint whose value is `True`, using a stable, human-readable label. The four +# standardised hints come from +# https://modelcontextprotocol.io/specification/server/tools#tool-annotations. +_HINT_LABELS: dict[str, str] = { + "readOnlyHint": "read-only", + "destructiveHint": "destructive", + "idempotentHint": "idempotent", + "openWorldHint": "open-world", +} + + +def _render_hint_badges(annotations: dict[str, Any] | None) -> str: + """Render MCP tool-annotation hints as inline `code` badges. + + Only hints whose value is explicitly `True` are rendered — an unset or + `False` hint is omitted. The MCP spec treats hints as advisory, so + "absence" and "false" are equivalent for documentation purposes. + + Also surfaces the optional human-readable `annotations.title` (distinct + from the top-level `title` field) when present, so e.g. + `annotations.title == "Deploy a source to Airbyte Cloud"` shows up in + the rendered doc. + """ + if not annotations: + return "" + lines: list[str] = [] + badges = [f"`{label}`" for key, label in _HINT_LABELS.items() if annotations.get(key) is True] + if badges: + lines.append("**Hints:** " + " · ".join(badges)) + if title := annotations.get("title"): + lines.append(f"**Title:** {title}") + return ("\n\n".join(lines) + "\n\n") if lines else "" + + def _render_parameters_table(input_schema: dict[str, Any]) -> str: """Render a GFM parameters table for a tool's `input_schema`.""" properties = input_schema.get("properties") or {} @@ -251,6 +286,7 @@ def _render_tool(tool: dict[str, Any]) -> str: # produces a clean sidebar nav entry. The HTML anchor above the heading # is what we deep-link to. parts: list[str] = [f'\n\n### {name}\n\n'] + parts.append(_render_hint_badges(tool.get("annotations"))) if description := tool.get("description"): parts.append(description.strip() + "\n\n") if tags := tool.get("tags"): @@ -354,7 +390,25 @@ def get(mcp_module: str) -> _ModuleBucket: for template in report.get("templates") or []: get(_get_module(template, fallback_map)).resources.append(template) - return buckets + # Alpha-sort each bucket's primitives (case-insensitive) so the rendered + # pages, left-nav entries, and deep-link IDs are in a stable, predictable + # order across regenerations instead of reflecting server registration + # order (which is effectively arbitrary). + def _sort_key(item: dict[str, Any]) -> str: + return str(item.get("name") or item.get("uri") or "").lower() + + for bucket in buckets.values(): + bucket.tools.sort(key=_sort_key) + bucket.prompts.sort(key=_sort_key) + bucket.resources.sort(key=_sort_key) + + # Also sort the module-level ordering so `index.md`'s module table and the + # order of files on disk are alphabetical (the `misc` bucket, which is a + # catch-all, is always pinned last). + sorted_buckets: OrderedDict[str, _ModuleBucket] = OrderedDict() + for name in sorted(buckets, key=lambda n: (n == MISC_MODULE, n.lower())): + sorted_buckets[name] = buckets[name] + return sorted_buckets def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str: From cbb13644a9171d9412b8e478d569d6bfbc9740d2 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 09:26:39 +0000 Subject: [PATCH 09/10] fix(mcp-docs-md): timeout fastmcp inspect, guard private-name iteration, anchor output path to repo root Addresses three CodeRabbit findings on commit dffeaef: 1. `_run_fastmcp_inspect` now passes `timeout=120` to `subprocess.run` and translates `TimeoutExpired` into an actionable `RuntimeError`. Previously a hung `fastmcp inspect` (blocking import, stalled network I/O during tool registration, etc.) would make `poe docs-generate` / `poe mcp-docs-md` hang indefinitely rather than fail loudly in CI. 2. `_resolve_extra_module_map` now iterates the private `fastmcp_extensions.decorators._REGISTERED_{PROMPTS,RESOURCES}` tuples *inside* the same `try`/`except Exception` that imports them. Previously any shape drift in those private tuples (third element added, `ann` becoming a dataclass, etc.) would escape the guard and abort doc generation \u2014 now the function falls back to an empty map exactly as its docstring promises. 3. `_prepare_output_dir` is now anchored to the repo root (derived from `__file__`), not `Path.cwd()`. `DEFAULT_OUTPUT` is a repo-relative path, so anchoring to cwd meant running `poe mcp-docs-md` from inside `docs/` (or anywhere other than the repo root) would silently write into the wrong directory while still passing the strict `is_relative_to(cwd)` guard. A new `_resolve_output_dir` helper encapsulates the relative-to-repo-root resolution; the existing safety guard semantics are preserved (repo root itself is rejected, absolute paths outside the repo root are rejected). --- scripts/generate_mcp_markdown.py | 113 +++++++++++++++++++++---------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index 11b88d5df..e245c2abf 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -85,6 +85,19 @@ DEFAULT_OUTPUT = Path("docs/mcp-generated") DEFAULT_SERVER_SPEC = "airbyte/mcp/server.py:app" MISC_MODULE = "misc" +# Upper bound on how long `fastmcp inspect` may take before we fail the build. +# 120s is generous: local runs finish in ~10s, but CI / cold caches occasionally +# spend longer on the server-module import (e.g. re-resolving wheels). Anything +# beyond this almost certainly indicates a hang (blocking import, stalled +# network I/O during registration) rather than real work, so failing fast is +# preferable to an indefinitely stuck `poe docs-generate` / `poe mcp-docs-md`. +_FASTMCP_INSPECT_TIMEOUT_SEC = 120 +# Repo root anchor for path-safety checks. `__file__` is always the on-disk +# location of this script, so `parent.parent` reliably points at the repo root +# regardless of the caller's `cwd`. We use this instead of `Path.cwd()` when +# resolving repo-relative defaults like `DEFAULT_OUTPUT` so that +# `poe mcp-docs-md` works even when invoked from a subdirectory. +_REPO_ROOT = Path(__file__).resolve().parent.parent def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]: @@ -95,18 +108,28 @@ def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]: "`fastmcp` CLI not found on PATH. Install project dev deps first " "(e.g. `uv sync --group dev`) and re-run from the repo root." ) - subprocess.run( - [ - fastmcp_bin, - "inspect", - server_spec, - "--format", - "fastmcp", - "--output", - str(report_path), - ], - check=True, - ) + try: + subprocess.run( + [ + fastmcp_bin, + "inspect", + server_spec, + "--format", + "fastmcp", + "--output", + str(report_path), + ], + check=True, + timeout=_FASTMCP_INSPECT_TIMEOUT_SEC, + ) + except subprocess.TimeoutExpired as ex: + msg = ( + f"`fastmcp inspect {server_spec}` timed out after " + f"{_FASTMCP_INSPECT_TIMEOUT_SEC}s. The server module likely hangs " + "on import (blocking network I/O during tool registration?). " + "Re-run with the server imported manually to investigate." + ) + raise RuntimeError(msg) from ex return json.loads(report_path.read_text(encoding="utf-8")) @@ -130,6 +153,11 @@ def _resolve_extra_module_map(server_spec: str) -> dict[str, str]: file_part = server_spec.split(":", 1)[0] module_name = file_part.removesuffix(".py").replace("/", ".") mapping: dict[str, str] = {} + # The iteration sits inside the same `try` as the import so any shape + # drift in the private `_REGISTERED_*` tuples (e.g. an added third element, + # or `ann` becoming a dataclass instead of a dict) falls back to an empty + # mapping — preserving this helper's documented best-effort semantics — + # rather than aborting doc generation. try: importlib.import_module(module_name) # Import private lists from fastmcp_extensions: these are the only @@ -139,18 +167,20 @@ def _resolve_extra_module_map(server_spec: str) -> dict[str, str]: _REGISTERED_PROMPTS, # noqa: PLC2701 _REGISTERED_RESOURCES, # noqa: PLC2701 ) + + for _fn, ann in _REGISTERED_PROMPTS: + if name := ann.get("name"): + mapping[name] = ann.get("mcp_module") or MISC_MODULE + for _fn, ann in _REGISTERED_RESOURCES: + mcp_module = ann.get("mcp_module") or MISC_MODULE + if uri := ann.get("uri"): + mapping[uri] = mcp_module + # FastMCP exposes the URI stem as the resource `name` in + # inspect output; index by that too so lookup by either key + # works. + mapping[uri.rsplit("/", 1)[-1]] = mcp_module except Exception: - return mapping - for _fn, ann in _REGISTERED_PROMPTS: - if name := ann.get("name"): - mapping[name] = ann.get("mcp_module") or MISC_MODULE - for _fn, ann in _REGISTERED_RESOURCES: - mcp_module = ann.get("mcp_module") or MISC_MODULE - if uri := ann.get("uri"): - mapping[uri] = mcp_module - # FastMCP exposes the URI stem as the resource `name` in inspect - # output; index by that too so lookup by either key works. - mapping[uri.rsplit("/", 1)[-1]] = mcp_module + return {} return mapping @@ -497,28 +527,39 @@ def _render_index( return out +def _resolve_output_dir(output: Path) -> Path: + """Resolve an `--output` path against the repo root when it's relative. + + `DEFAULT_OUTPUT` is a repo-relative path, so anchoring relative inputs to + `_REPO_ROOT` (rather than `Path.cwd()`) means `poe mcp-docs-md` works + regardless of where the task is invoked from — a contributor running the + task from inside `docs/` still writes to `/docs/mcp-generated/`. + Absolute paths are honoured as-given (the safety guard below still + rejects any absolute path that escapes the repo root). + """ + return (output if output.is_absolute() else _REPO_ROOT / output).resolve() + + def _prepare_output_dir(output: Path) -> None: """Reset (or create) an output directory, with a strict safety guard. The script unconditionally `rmtree`s `output` before regenerating, so we need to be careful about what callers can point `--output` at. We require - the resolved output path to live **strictly inside** the current working - directory (typically the repo root) — this rules out `/`, `$HOME`, - `--output ..`, and any absolute path outside the repo, while still - letting the default `docs/mcp-generated/` work. The cwd itself is also - rejected so we never nuke the whole repo. + the resolved output path to live **strictly inside** the repo root — this + rules out `/`, `$HOME`, `--output ..`, and any absolute path outside the + repo, while still letting the default `docs/mcp-generated/` work. The + repo root itself is also rejected so we never nuke the whole repo. """ - resolved = output.resolve() - cwd = Path.cwd().resolve() - if resolved == cwd or not resolved.is_relative_to(cwd): + resolved = _resolve_output_dir(output) + if resolved == _REPO_ROOT or not resolved.is_relative_to(_REPO_ROOT): raise RuntimeError( f"Refusing to rmtree output path {resolved}: must be a dedicated " - f"subdirectory strictly inside the current working directory " - f"({cwd}). Pass --output pointing at e.g. `docs/mcp-generated`." + f"subdirectory strictly inside the repo root ({_REPO_ROOT}). " + f"Pass --output pointing at e.g. `docs/mcp-generated`." ) - if output.exists(): - shutil.rmtree(output) - output.mkdir(parents=True, exist_ok=True) + if resolved.exists(): + shutil.rmtree(resolved) + resolved.mkdir(parents=True, exist_ok=True) def generate(server_spec: str, output: Path) -> None: From 41d092cac2f33b607b04524553b91fbc79e1d7e5 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 17 Apr 2026 09:31:38 +0000 Subject: [PATCH 10/10] fix(mcp-docs-md): use resolved output dir for file writes Follow-up to cbb1364. Devin Review caught that `_prepare_output_dir` was resolving paths against `_REPO_ROOT` for mkdir/rmtree while the caller in `generate()` still used the raw (cwd-relative) `output` for `write_text`, so running from a subdirectory would prepare `/docs/mcp-generated/` but then try to write to `/docs/mcp-generated/` (which doesn't exist) and raise `FileNotFoundError`. `_prepare_output_dir` now returns the resolved absolute path, and `generate()` routes all subsequent file writes through it, so the two always agree regardless of where the task is invoked from. --- scripts/generate_mcp_markdown.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py index e245c2abf..82d675b11 100755 --- a/scripts/generate_mcp_markdown.py +++ b/scripts/generate_mcp_markdown.py @@ -540,7 +540,7 @@ def _resolve_output_dir(output: Path) -> Path: return (output if output.is_absolute() else _REPO_ROOT / output).resolve() -def _prepare_output_dir(output: Path) -> None: +def _prepare_output_dir(output: Path) -> Path: """Reset (or create) an output directory, with a strict safety guard. The script unconditionally `rmtree`s `output` before regenerating, so we @@ -549,6 +549,12 @@ def _prepare_output_dir(output: Path) -> None: rules out `/`, `$HOME`, `--output ..`, and any absolute path outside the repo, while still letting the default `docs/mcp-generated/` work. The repo root itself is also rejected so we never nuke the whole repo. + + Returns the *resolved* (absolute, repo-root-anchored) path so callers can + use a single canonical location for subsequent file writes — avoids a + footgun where preparing a resolved dir but writing via the raw `output` + would silently target a different, non-existent path when cwd differs + from the repo root. """ resolved = _resolve_output_dir(output) if resolved == _REPO_ROOT or not resolved.is_relative_to(_REPO_ROOT): @@ -560,6 +566,7 @@ def _prepare_output_dir(output: Path) -> None: if resolved.exists(): shutil.rmtree(resolved) resolved.mkdir(parents=True, exist_ok=True) + return resolved def generate(server_spec: str, output: Path) -> None: @@ -572,7 +579,10 @@ def generate(server_spec: str, output: Path) -> None: fallback_map = _resolve_extra_module_map(server_spec) buckets = _bucket_by_module(report, fallback_map) - _prepare_output_dir(output) + # Use the resolved path returned by `_prepare_output_dir` for subsequent + # writes: when called from a subdirectory, the raw `output` is + # cwd-relative and would target a non-existent directory. + resolved_output = _prepare_output_dir(output) server_name = (report.get("server") or {}).get("name", "mcp-server") pages: dict[str, str] = {"index.md": _render_index(report, buckets)} @@ -580,8 +590,8 @@ def generate(server_spec: str, output: Path) -> None: pages[f"{name}.md"] = _render_module_page(bucket, server_name) for name, content in pages.items(): - (output / name).write_text(content, encoding="utf-8") - print(f" wrote {output / name}") + (resolved_output / name).write_text(content, encoding="utf-8") + print(f" wrote {resolved_output / name}") print( f"Done. {len(buckets)} module(s) documented — "