From ac40f03f644132cad4e6eb0b9137daec0b90c7b3 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 07:04:34 +0000
Subject: [PATCH 01/10] feat: add Markdown MCP docs generator (Docusaurus- and
 pdoc-compatible)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces `scripts/generate_mcp_markdown.py` (exposed via `poe mcp-docs-md`)
which introspects the MCP server with `fastmcp inspect` and renders a small
set of Markdown files under `docs/mcp-generated/`:

- index.md — server overview + counts + TOC
- tools.md — one H2 per tool with a GFM parameters table and collapsible
  input/output JSON schemas
- resources.md — concrete resources and resource templates
- prompts.md — prompts and their arguments

Formatting is modeled on `mcpdocs-gen` (evaluated in PR #1013) but emitted
as plain CommonMark + GFM + YAML front-matter + `<details>` blocks, so the
pages render correctly in both Docusaurus and `pdoc` without MDX-only
components. Every tool/resource/prompt has a stable slug anchor for
deep-linking.
---
 .gitignore                       |   3 +
 docs/CONTRIBUTING.md             |  26 +++
 pyproject.toml                   |   1 +
 scripts/generate_mcp_markdown.py | 370 +++++++++++++++++++++++++++++++
 4 files changed, 400 insertions(+)
 create mode 100755 scripts/generate_mcp_markdown.py
diff --git a/.gitignore b/.gitignore
index 5db1a35e8..b867eb5f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,9 @@ viztracer_report.json
 # Packaged docs
 docs/*.zip
 
+# Generated MCP server docs (regenerate via `poe mcp-docs-md`)
+docs/mcp-generated/
+
 # Misc
 .DS_Store
 
diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index dd9a2fc53..ab7619dec 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -143,3 +143,29 @@ poe mcp-serve-sse      # Server-Sent Events transport on localhost:8000
 
 poe mcp-inspect        # Show all available MCP tools and their schemas
 ```
+
+### Generating Markdown docs for the MCP Server
+
+The repo ships a small script that introspects the MCP server via
+`fastmcp inspect` and emits a Markdown documentation site under
+`docs/mcp-generated/` (git-ignored). The output is designed to be both
+Docusaurus-hostable and `pdoc`-compatible — plain CommonMark with YAML
+front-matter, no MDX-only components.
+
+```bash
+uv sync --group dev
+poe mcp-docs-md
+```
+
+Four files are produced:
+
+- `index.md` — server overview (name, version, instructions, counts)
+- `tools.md` — one section per tool with a parameters table and collapsible
+  input/output JSON schemas
+- `resources.md` — concrete resources + resource templates
+- `prompts.md` — prompts and their arguments
+
+Each tool/resource/prompt has a stable slug anchor (e.g.
+`tools.md#list_connectors`) so the pages can be deep-linked from Slack, issues,
+or other docs. Regenerate after any change to MCP tool signatures, descriptions,
+or schemas. The underlying script is at `scripts/generate_mcp_markdown.py`.
diff --git a/pyproject.toml b/pyproject.toml
index f3ff6419f..334f5a3eb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -174,6 +174,7 @@ mcp-serve-http = { cmd = "python -c \"from airbyte.mcp.server import app; app.ru
 mcp-serve-sse = { cmd = "python -c \"from airbyte.mcp.server import app; app.run(transport='sse', host='127.0.0.1', port=8000)\"", help = "Start the MCP server with SSE transport" }
 mcp-inspect = { cmd = "fastmcp inspect airbyte/mcp/server.py:app", help = "Inspect MCP tools and resources (supports --tools, --health, etc.)" }
 mcp-tool-test = { cmd = "python -m fastmcp_extensions.utils.test_tool --app airbyte.mcp.server:app", help = "Test MCP tools directly with JSON arguments: poe mcp-tool-test <tool_name> '<json_args>'" }
+mcp-docs-md = { cmd = "python scripts/generate_mcp_markdown.py", help = "Generate Markdown docs for the MCP server into docs/mcp-generated/ (Docusaurus- and pdoc-compatible)" }
 
 # Claude Code MCP Testing Tasks
 [tool.poe.tasks.test-my-tools]
diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
new file mode 100755
index 000000000..8633ebafe
--- /dev/null
+++ b/scripts/generate_mcp_markdown.py
@@ -0,0 +1,370 @@
+#!/usr/bin/env python3
+# Copyright (c) 2026 Airbyte, Inc., all rights reserved.
+"""Generate Markdown documentation for the PyAirbyte MCP server.
+
+Runs `fastmcp inspect` against `airbyte.mcp.server:app` to obtain the full
+FastMCP protocol surface (tools, resources, resource templates, prompts) as a
+JSON report, then renders it into a small set of Markdown files under
+`docs/mcp-generated/`.
+
+The output is designed to be:
+
+- **Docusaurus-hostable**: each file starts with YAML front-matter (`title`,
+  `sidebar_label`, `description`); the body is plain CommonMark + GFM tables +
+  `<details><summary>` blocks for collapsible JSON schemas. No MDX-only
+  components are used.
+- **`pdoc3`-compatible**: standard Markdown that renders correctly alongside
+  the existing `pdoc3` output in `docs/generated/` without any special config.
+- **Deep-linkable**: every tool/resource/prompt name is an H2 with a stable
+  slug anchor (e.g. `tools.md#list_connectors`).
+
+Formatting is deliberately modeled on the
+[`mcpdocs-gen`](https://github.com/smytsyk/mcpdocs) static HTML output — same
+sections, same per-tool shape (description → parameters table → JSON schema) —
+but emitted as Markdown rather than HTML so it can slot into an existing docs
+site.
+
+Usage:
+
+```
+uv run python scripts/generate_mcp_markdown.py [--output docs/mcp-generated]
+```
+
+Or via the project's poe task:
+
+```
+poe mcp-docs-md
+```
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+from typing import Any
+
+
+DEFAULT_OUTPUT = Path("docs/mcp-generated")
+DEFAULT_SERVER_SPEC = "airbyte/mcp/server.py:app"
+
+
+def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]:
+    """Invoke `fastmcp inspect` and return the parsed JSON report."""
+    fastmcp_bin = shutil.which("fastmcp")
+    if fastmcp_bin is None:
+        raise RuntimeError(
+            "`fastmcp` CLI not found on PATH. Install project dev deps first "
+            "(e.g. `uv sync --group dev`) and re-run from the repo root."
+        )
+    subprocess.run(
+        [
+            fastmcp_bin,
+            "inspect",
+            server_spec,
+            "--format",
+            "fastmcp",
+            "--output",
+            str(report_path),
+        ],
+        check=True,
+    )
+    return json.loads(report_path.read_text())
+
+
+def _fmt_type(schema: dict[str, Any]) -> str:
+    """Render a JSON-schema fragment as a short, human-readable type string."""
+    for key in ("anyOf", "oneOf"):
+        if key in schema:
+            return " | ".join(_fmt_type(s) for s in schema[key])
+    if "enum" in schema:
+        return "enum(" + ", ".join(repr(v) for v in schema["enum"]) + ")"
+    t = schema.get("type")
+    if t == "array":
+        items = schema.get("items", {})
+        return f"array<{_fmt_type(items)}>" if items else "array"
+    if isinstance(t, list):
+        return " | ".join(str(x) for x in t)
+    return str(t) if t else "any"
+
+
+def _escape_table_cell(value: str) -> str:
+    """Make a string safe to embed in a single GFM table cell."""
+    return value.replace("|", "\\|").replace("\n", " ").strip()
+
+
+def _fmt_default(schema: dict[str, Any]) -> str:
+    """Render a schema's `default` value as a compact Markdown code span."""
+    if "default" not in schema:
+        return "—"
+    default = schema["default"]
+    if default is None:
+        return "`null`"
+    return f"`{json.dumps(default)}`"
+
+
+def _frontmatter(title: str, sidebar_label: str, description: str) -> str:
+    """Build a YAML front-matter block for a Docusaurus page."""
+    esc_desc = description.replace("\n", " ").replace('"', '\\"').strip()
+    return (
+        "---\n"
+        f"title: {title}\n"
+        f"sidebar_label: {sidebar_label}\n"
+        f'description: "{esc_desc}"\n'
+        "---\n\n"
+    )
+
+
+def _json_block(label: str, obj: Any) -> str:  # noqa: ANN401
+    """Render an object inside a collapsible `<details>` JSON code block."""
+    return (
+        f"<details>\n<summary>{label}</summary>\n\n"
+        "```json\n" + json.dumps(obj, indent=2) + "\n```\n\n</details>\n\n"
+    )
+
+
+def _render_parameters_table(input_schema: dict[str, Any]) -> str:
+    """Render a GFM parameters table for a tool's `input_schema`."""
+    properties = input_schema.get("properties") or {}
+    if not properties:
+        return "_No parameters._\n\n"
+    required = set(input_schema.get("required") or [])
+    lines = [
+        "| Name | Type | Required | Default | Description |",
+        "| --- | --- | --- | --- | --- |",
+    ]
+    for name, prop in properties.items():
+        desc = _escape_table_cell(prop.get("description", ""))
+        # Union types contain literal `|` chars which break GFM table rendering
+        # even inside backticks in some parsers; escape defensively.
+        type_cell = _fmt_type(prop).replace("|", "\\|")
+        lines.append(
+            f"| `{name}` | `{type_cell}` | "
+            f"{'yes' if name in required else 'no'} | "
+            f"{_fmt_default(prop)} | {desc} |"
+        )
+    return "\n".join(lines) + "\n\n"
+
+
+def _render_tool(tool: dict[str, Any]) -> str:
+    """Render a single tool as a Markdown section."""
+    name = tool["name"]
+    parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"]
+    if description := tool.get("description"):
+        parts.append(description.strip() + "\n\n")
+    if tags := tool.get("tags"):
+        parts.append("**Tags:** " + ", ".join(f"`{t}`" for t in tags) + "\n\n")
+    parts.extend(
+        [
+            "### Parameters\n\n",
+            _render_parameters_table(tool.get("input_schema") or {}),
+        ]
+    )
+    if input_schema := tool.get("input_schema"):
+        parts.append(_json_block("Show input JSON schema", input_schema))
+    if output_schema := tool.get("output_schema"):
+        parts.append(_json_block("Show output JSON schema", output_schema))
+    return "".join(parts)
+
+
+def _render_resource(resource: dict[str, Any]) -> str:
+    """Render a single resource as a Markdown section."""
+    name = resource["name"]
+    parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"]
+    if description := resource.get("description"):
+        parts.append(description.strip() + "\n\n")
+    meta_lines: list[str] = []
+    if uri := resource.get("uri"):
+        meta_lines.append(f"- **URI:** `{uri}`")
+    if uri_template := resource.get("uri_template"):
+        meta_lines.append(f"- **URI template:** `{uri_template}`")
+    if mime := resource.get("mime_type"):
+        meta_lines.append(f"- **MIME type:** `{mime}`")
+    if tags := resource.get("tags"):
+        meta_lines.append("- **Tags:** " + ", ".join(f"`{t}`" for t in tags))
+    if meta_lines:
+        parts.append("\n".join(meta_lines) + "\n\n")
+    return "".join(parts)
+
+
+def _render_prompt(prompt: dict[str, Any]) -> str:
+    """Render a single prompt as a Markdown section."""
+    name = prompt["name"]
+    parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"]
+    if description := prompt.get("description"):
+        parts.append(description.strip() + "\n\n")
+    args = prompt.get("arguments") or []
+    if args:
+        parts.extend(
+            [
+                "### Arguments\n\n",
+                "| Name | Required | Description |\n| --- | --- | --- |\n",
+            ]
+        )
+        for arg in args:
+            desc = _escape_table_cell(arg.get("description", ""))
+            parts.append(
+                f"| `{arg['name']}` | " f"{'yes' if arg.get('required') else 'no'} | " f"{desc} |\n"
+            )
+        parts.append("\n")
+    else:
+        parts.append("_No arguments._\n\n")
+    return "".join(parts)
+
+
+def _render_index(report: dict[str, Any]) -> str:
+    """Render the top-level overview page."""
+    server = report.get("server") or {}
+    server_name = server.get("name", "mcp-server")
+    out = _frontmatter(
+        title=f"{server_name} — MCP server",
+        sidebar_label="Overview",
+        description=(server.get("instructions") or "").splitlines()[0]
+        or f"Auto-generated docs for the {server_name} MCP server.",
+    )
+    out += f"# `{server_name}`\n\n"
+    if version := server.get("version"):
+        out += f"**Version:** `{version}`  \n"
+    if proto := server.get("protocol_version") or server.get("fastmcp_version"):
+        out += f"**FastMCP version:** `{proto}`  \n"
+    out += "\n"
+    if instructions := server.get("instructions"):
+        out += instructions.strip() + "\n\n"
+    out += "## Contents\n\n"
+    counts = {
+        "tools": len(report.get("tools") or []),
+        "resources": (len(report.get("resources") or []) + len(report.get("templates") or [])),
+        "prompts": len(report.get("prompts") or []),
+    }
+    out += (
+        f"- [Tools]({'./tools'}) — {counts['tools']}\n"
+        f"- [Resources]({'./resources'}) — {counts['resources']}\n"
+        f"- [Prompts]({'./prompts'}) — {counts['prompts']}\n\n"
+    )
+    out += (
+        "> These pages are generated from the live `fastmcp inspect` report. "
+        "Regenerate with `poe mcp-docs-md`.\n"
+    )
+    return out
+
+
+def _render_tools_page(report: dict[str, Any]) -> str:
+    """Render the tools page."""
+    tools = report.get("tools") or []
+    out = _frontmatter(
+        title="Tools",
+        sidebar_label="Tools",
+        description=f"All {len(tools)} MCP tools exposed by this server.",
+    )
+    out += "# Tools\n\n"
+    if not tools:
+        out += "_No tools are exposed by this server._\n"
+        return out
+    out += f"This server exposes **{len(tools)}** tool(s).\n\n"
+    out += "**Index:** "
+    out += ", ".join(f"[`{t['name']}`](#{t['name']})" for t in tools) + "\n\n"
+    for tool in tools:
+        out += _render_tool(tool)
+    return out
+
+
+def _render_resources_page(report: dict[str, Any]) -> str:
+    """Render the resources + resource-templates page."""
+    resources = report.get("resources") or []
+    templates = report.get("templates") or []
+    total = len(resources) + len(templates)
+    out = _frontmatter(
+        title="Resources",
+        sidebar_label="Resources",
+        description=f"All {total} MCP resource(s) and resource template(s).",
+    )
+    out += "# Resources\n\n"
+    if not resources and not templates:
+        out += "_No resources or resource templates are exposed by this server._\n"
+        return out
+    if resources:
+        out += f"## Concrete resources ({len(resources)})\n\n"
+        for resource in resources:
+            out += _render_resource(resource)
+    if templates:
+        out += f"## Resource templates ({len(templates)})\n\n"
+        for template in templates:
+            out += _render_resource(template)
+    return out
+
+
+def _render_prompts_page(report: dict[str, Any]) -> str:
+    """Render the prompts page."""
+    prompts = report.get("prompts") or []
+    out = _frontmatter(
+        title="Prompts",
+        sidebar_label="Prompts",
+        description=f"All {len(prompts)} MCP prompt(s).",
+    )
+    out += "# Prompts\n\n"
+    if not prompts:
+        out += "_No prompts are exposed by this server._\n"
+        return out
+    out += f"This server exposes **{len(prompts)}** prompt(s).\n\n"
+    for prompt in prompts:
+        out += _render_prompt(prompt)
+    return out
+
+
+def generate(server_spec: str, output: Path) -> None:
+    """Run `fastmcp inspect`, render Markdown, and write files to `output/`."""
+    with tempfile.TemporaryDirectory() as tmp:
+        report_path = Path(tmp) / "mcp-inspect.json"
+        print(f"Running `fastmcp inspect {server_spec}`...")
+        report = _run_fastmcp_inspect(server_spec, report_path)
+
+    if output.exists():
+        shutil.rmtree(output)
+    output.mkdir(parents=True, exist_ok=True)
+
+    pages: dict[str, str] = {
+        "index.md": _render_index(report),
+        "tools.md": _render_tools_page(report),
+        "resources.md": _render_resources_page(report),
+        "prompts.md": _render_prompts_page(report),
+    }
+    for name, content in pages.items():
+        (output / name).write_text(content)
+        print(f"  wrote {output / name}")
+
+    print(
+        f"Done. {len(report.get('tools') or [])} tool(s), "
+        f"{len(report.get('resources') or []) + len(report.get('templates') or [])} "
+        f"resource(s), {len(report.get('prompts') or [])} prompt(s) documented."
+    )
+
+
+def main() -> int:
+    """CLI entrypoint for the Markdown MCP docs generator."""
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--server-spec",
+        default=DEFAULT_SERVER_SPEC,
+        help=("FastMCP server spec to inspect, e.g. " f"'{DEFAULT_SERVER_SPEC}' (default)."),
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=DEFAULT_OUTPUT,
+        help=f"Output directory for generated Markdown (default: {DEFAULT_OUTPUT}).",
+    )
+    args = parser.parse_args()
+    try:
+        generate(server_spec=args.server_spec, output=args.output)
+    except (subprocess.CalledProcessError, RuntimeError) as ex:
+        print(f"MCP docs generation failed: {ex}", file=sys.stderr)
+        return 1
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())

From 214bec67f90076e25a565d3d62a65c064346baa1 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 07:11:46 +0000
Subject: [PATCH 02/10] chore(mcp-docs-md): address CodeRabbit nits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- UTF-8 encoding on file I/O (Windows locale safety)
- json.dumps(v) instead of repr(v) for enum values (JSON-shaped output)
- Split 'FastMCP version' into 'Protocol version' + 'FastMCP version' lines
- Fix `{'./tools'}` no-op f-string → explicit `./tools.md` links
- Add minimal safety guard refusing to rmtree '/', HOME, or CWD
---
 scripts/generate_mcp_markdown.py | 53 +++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 15 deletions(-)

diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index 8633ebafe..e9b077ae0 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -73,7 +73,7 @@ def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]:
         ],
         check=True,
     )
-    return json.loads(report_path.read_text())
+    return json.loads(report_path.read_text(encoding="utf-8"))
 
 
 def _fmt_type(schema: dict[str, Any]) -> str:
@@ -82,7 +82,7 @@ def _fmt_type(schema: dict[str, Any]) -> str:
         if key in schema:
             return " | ".join(_fmt_type(s) for s in schema[key])
     if "enum" in schema:
-        return "enum(" + ", ".join(repr(v) for v in schema["enum"]) + ")"
+        return "enum(" + ", ".join(json.dumps(v) for v in schema["enum"]) + ")"
     t = schema.get("type")
     if t == "array":
         items = schema.get("items", {})
@@ -207,9 +207,8 @@ def _render_prompt(prompt: dict[str, Any]) -> str:
         )
         for arg in args:
             desc = _escape_table_cell(arg.get("description", ""))
-            parts.append(
-                f"| `{arg['name']}` | " f"{'yes' if arg.get('required') else 'no'} | " f"{desc} |\n"
-            )
+            required = "yes" if arg.get("required") else "no"
+            parts.append(f"| `{arg['name']}` | {required} | {desc} |\n")
         parts.append("\n")
     else:
         parts.append("_No arguments._\n\n")
@@ -229,8 +228,10 @@ def _render_index(report: dict[str, Any]) -> str:
     out += f"# `{server_name}`\n\n"
     if version := server.get("version"):
         out += f"**Version:** `{version}`  \n"
-    if proto := server.get("protocol_version") or server.get("fastmcp_version"):
-        out += f"**FastMCP version:** `{proto}`  \n"
+    if proto := server.get("protocol_version"):
+        out += f"**Protocol version:** `{proto}`  \n"
+    if fastmcp_version := server.get("fastmcp_version"):
+        out += f"**FastMCP version:** `{fastmcp_version}`  \n"
     out += "\n"
     if instructions := server.get("instructions"):
         out += instructions.strip() + "\n\n"
@@ -241,9 +242,9 @@ def _render_index(report: dict[str, Any]) -> str:
         "prompts": len(report.get("prompts") or []),
     }
     out += (
-        f"- [Tools]({'./tools'}) — {counts['tools']}\n"
-        f"- [Resources]({'./resources'}) — {counts['resources']}\n"
-        f"- [Prompts]({'./prompts'}) — {counts['prompts']}\n\n"
+        f"- [Tools](./tools.md) — {counts['tools']}\n"
+        f"- [Resources](./resources.md) — {counts['resources']}\n"
+        f"- [Prompts](./prompts.md) — {counts['prompts']}\n\n"
     )
     out += (
         "> These pages are generated from the live `fastmcp inspect` report. "
@@ -315,6 +316,30 @@ def _render_prompts_page(report: dict[str, Any]) -> str:
     return out
 
 
+# Paths we refuse to `rmtree` even if the user passes them as --output, to
+# avoid cases like `--output /` or `--output $HOME` accidentally nuking data.
+_FORBIDDEN_OUTPUT_PATHS = frozenset(
+    {
+        Path("/"),
+        Path.home(),
+        Path.cwd(),
+    }
+)
+
+
+def _prepare_output_dir(output: Path) -> None:
+    """Reset (or create) an output directory, with a minimal safety guard."""
+    resolved = output.resolve()
+    if resolved in {p.resolve() for p in _FORBIDDEN_OUTPUT_PATHS}:
+        raise RuntimeError(
+            f"Refusing to rmtree suspicious output path {resolved}. "
+            "Pass --output pointing at a dedicated subdirectory."
+        )
+    if output.exists():
+        shutil.rmtree(output)
+    output.mkdir(parents=True, exist_ok=True)
+
+
 def generate(server_spec: str, output: Path) -> None:
     """Run `fastmcp inspect`, render Markdown, and write files to `output/`."""
     with tempfile.TemporaryDirectory() as tmp:
@@ -322,9 +347,7 @@ def generate(server_spec: str, output: Path) -> None:
         print(f"Running `fastmcp inspect {server_spec}`...")
         report = _run_fastmcp_inspect(server_spec, report_path)
 
-    if output.exists():
-        shutil.rmtree(output)
-    output.mkdir(parents=True, exist_ok=True)
+    _prepare_output_dir(output)
 
     pages: dict[str, str] = {
         "index.md": _render_index(report),
@@ -333,7 +356,7 @@ def generate(server_spec: str, output: Path) -> None:
         "prompts.md": _render_prompts_page(report),
     }
     for name, content in pages.items():
-        (output / name).write_text(content)
+        (output / name).write_text(content, encoding="utf-8")
         print(f"  wrote {output / name}")
 
     print(
@@ -349,7 +372,7 @@ def main() -> int:
     parser.add_argument(
         "--server-spec",
         default=DEFAULT_SERVER_SPEC,
-        help=("FastMCP server spec to inspect, e.g. " f"'{DEFAULT_SERVER_SPEC}' (default)."),
+        help=f"FastMCP server spec to inspect, e.g. '{DEFAULT_SERVER_SPEC}' (default).",
     )
     parser.add_argument(
         "--output",

From 62af4956ce79fc0c64f6c9064c8c5d7f7e14f8ea Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 07:35:41 +0000
Subject: [PATCH 03/10] refactor(mcp-docs-md): group output by module + pdoc3
 include

---
 airbyte/mcp/cloud.py             |   5 +-
 airbyte/mcp/local.py             |   5 +-
 airbyte/mcp/prompts.py           |   2 +
 airbyte/mcp/registry.py          |   5 +-
 scripts/generate_mcp_markdown.py | 325 +++++++++++++++++++++----------
 5 files changed, 239 insertions(+), 103 deletions(-)

diff --git a/airbyte/mcp/cloud.py b/airbyte/mcp/cloud.py
index 594bee0db..52859def9 100644
--- a/airbyte/mcp/cloud.py
+++ b/airbyte/mcp/cloud.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
-"""Airbyte Cloud MCP operations."""
+"""Airbyte Cloud MCP operations.
+
+.. include:: ../../docs/mcp-generated/cloud.md
+"""
 
 from pathlib import Path
 from typing import Annotated, Any, Literal, cast
diff --git a/airbyte/mcp/local.py b/airbyte/mcp/local.py
index d9bbb801f..4b27b121d 100644
--- a/airbyte/mcp/local.py
+++ b/airbyte/mcp/local.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
-"""Local MCP operations."""
+"""Local MCP operations.
+
+.. include:: ../../docs/mcp-generated/local.md
+"""
 
 import sys
 import traceback
diff --git a/airbyte/mcp/prompts.py b/airbyte/mcp/prompts.py
index 47993aef6..37767a895 100644
--- a/airbyte/mcp/prompts.py
+++ b/airbyte/mcp/prompts.py
@@ -3,6 +3,8 @@
 
 This module defines prompts that can be invoked by MCP clients to perform
 common workflows.
+
+.. include:: ../../docs/mcp-generated/prompts.md
 """
 
 from __future__ import annotations
diff --git a/airbyte/mcp/registry.py b/airbyte/mcp/registry.py
index 77b44f061..999c6b0e6 100644
--- a/airbyte/mcp/registry.py
+++ b/airbyte/mcp/registry.py
@@ -1,5 +1,8 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
-"""Airbyte Cloud MCP operations."""
+"""Airbyte connector registry MCP operations.
+
+.. include:: ../../docs/mcp-generated/registry.md
+"""
 
 # Note: Deferred type evaluation must be avoided due to FastMCP/Pydantic needing
 # types to be available at import time for tool registration.
diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index e9b077ae0..344e4f495 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -4,25 +4,51 @@
 
 Runs `fastmcp inspect` against `airbyte.mcp.server:app` to obtain the full
 FastMCP protocol surface (tools, resources, resource templates, prompts) as a
-JSON report, then renders it into a small set of Markdown files under
-`docs/mcp-generated/`.
+JSON report, then renders it into one Markdown file **per MCP module** under
+`docs/mcp-generated/`, plus an `index.md` overview.
+
+The per-module grouping uses the `mcp_module` annotation that
+`fastmcp_extensions.mcp_tool` attaches to every registered tool (derived from
+the Python file the tool is defined in — e.g. tools in `airbyte/mcp/cloud.py`
+get `mcp_module="cloud"`). Prompts and resources fall back to `meta.mcp_module`
+when present, and otherwise to an import-based lookup against
+`fastmcp_extensions.decorators._REGISTERED_*`; anything still unresolved lands
+in `misc.md`.
+
+Inside each module file, content is grouped by primitive with L2 headings:
+
+```
+# airbyte.mcp.cloud
+
+## Tools
+### `deploy_source_to_cloud`
+...
+## Prompts
+### `some_prompt`
+...
+## Resources
+### `some_resource`
+...
+```
 
 The output is designed to be:
 
+- **`pdoc`/`pdoc3`-includable**: each `<module>.md` is a self-contained body
+  intended to be spliced into the corresponding Python module's docstring via
+  pdoc's `.. include::` directive, so the generated tool docs render inline on
+  the module's pdoc page.
 - **Docusaurus-hostable**: each file starts with YAML front-matter (`title`,
   `sidebar_label`, `description`); the body is plain CommonMark + GFM tables +
   `<details><summary>` blocks for collapsible JSON schemas. No MDX-only
   components are used.
-- **`pdoc3`-compatible**: standard Markdown that renders correctly alongside
-  the existing `pdoc3` output in `docs/generated/` without any special config.
-- **Deep-linkable**: every tool/resource/prompt name is an H2 with a stable
-  slug anchor (e.g. `tools.md#list_connectors`).
+- **Deep-linkable**: every tool/resource/prompt name is an H3 with a stable
+  slug anchor (e.g. `cloud.md#deploy_source_to_cloud`).
 
-Formatting is deliberately modeled on the
+Formatting is modeled on the
 [`mcpdocs-gen`](https://github.com/smytsyk/mcpdocs) static HTML output — same
-sections, same per-tool shape (description → parameters table → JSON schema) —
-but emitted as Markdown rather than HTML so it can slot into an existing docs
-site.
+per-tool shape (description → parameters table → JSON schema) — but emitted as
+Markdown, one file per MCP module, so the output can slot into both
+`pdoc`-rendered per-module pages and an external Docusaurus site.
 
 Usage:
 
@@ -40,17 +66,20 @@
 from __future__ import annotations
 
 import argparse
+import importlib
 import json
 import shutil
 import subprocess
 import sys
 import tempfile
+from collections import OrderedDict
 from pathlib import Path
 from typing import Any
 
 
 DEFAULT_OUTPUT = Path("docs/mcp-generated")
 DEFAULT_SERVER_SPEC = "airbyte/mcp/server.py:app"
+MISC_MODULE = "misc"
 
 
 def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]:
@@ -76,6 +105,66 @@ def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]:
     return json.loads(report_path.read_text(encoding="utf-8"))
 
 
+def _resolve_extra_module_map(server_spec: str) -> dict[str, str]:
+    """Best-effort import-based lookup of `mcp_module` for prompts/resources.
+
+    `fastmcp_extensions`'s `mcp_tool` decorator embeds `mcp_module` in the MCP
+    tool `annotations` dict, which the inspect JSON surfaces directly. But
+    `mcp_prompt` and `mcp_resource` store `mcp_module` on the library's
+    internal `_REGISTERED_*` lists only — it is not re-emitted as an MCP
+    annotation, so it doesn't appear in the inspect JSON.
+
+    To still recover that information, we import the server module and read
+    those internal lists. If that fails (not a `fastmcp_extensions`-based
+    server, import errors, etc.), we silently return an empty map and the
+    caller falls back to `MISC_MODULE`.
+
+    Returns a map of `name/uri -> mcp_module` covering both prompts and
+    resources.
+    """
+    file_part = server_spec.split(":", 1)[0]
+    module_name = file_part.removesuffix(".py").replace("/", ".")
+    mapping: dict[str, str] = {}
+    try:
+        importlib.import_module(module_name)
+        # Import private lists from fastmcp_extensions: these are the only
+        # place `mcp_module` is recorded for prompts/resources, so we accept
+        # the private-name coupling.
+        from fastmcp_extensions.decorators import (  # noqa: PLC0415
+            _REGISTERED_PROMPTS,  # noqa: PLC2701
+            _REGISTERED_RESOURCES,  # noqa: PLC2701
+        )
+    except Exception:
+        return mapping
+    for _fn, ann in _REGISTERED_PROMPTS:
+        if name := ann.get("name"):
+            mapping[name] = ann.get("mcp_module") or MISC_MODULE
+    for _fn, ann in _REGISTERED_RESOURCES:
+        mcp_module = ann.get("mcp_module") or MISC_MODULE
+        if uri := ann.get("uri"):
+            mapping[uri] = mcp_module
+            # FastMCP exposes the URI stem as the resource `name` in inspect
+            # output; index by that too so lookup by either key works.
+            mapping[uri.rsplit("/", 1)[-1]] = mcp_module
+    return mapping
+
+
+def _get_module(item: dict[str, Any], fallback_map: dict[str, str]) -> str:
+    """Extract the `mcp_module` for a tool / resource / prompt."""
+    annotations = item.get("annotations") or {}
+    if mcp_module := annotations.get("mcp_module"):
+        return str(mcp_module)
+    meta = item.get("meta") or {}
+    if mcp_module := meta.get("mcp_module"):
+        return str(mcp_module)
+    name = item.get("name")
+    uri = item.get("uri") or item.get("uri_template")
+    for key in (name, uri):
+        if key and key in fallback_map:
+            return fallback_map[key]
+    return MISC_MODULE
+
+
 def _fmt_type(schema: dict[str, Any]) -> str:
     """Render a JSON-schema fragment as a short, human-readable type string."""
     for key in ("anyOf", "oneOf"):
@@ -151,16 +240,18 @@ def _render_parameters_table(input_schema: dict[str, Any]) -> str:
 
 
 def _render_tool(tool: dict[str, Any]) -> str:
-    """Render a single tool as a Markdown section."""
+    """Render a single tool as L3 under its module's `## Tools` section."""
     name = tool["name"]
-    parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"]
+    # HTML anchor + heading (instead of Pandoc `{#...}` attr syntax, which
+    # renders as literal text in pdoc3's markdown processor).
+    parts: list[str] = [f'<a id="{name}"></a>\n\n### `{name}`\n\n']
     if description := tool.get("description"):
         parts.append(description.strip() + "\n\n")
     if tags := tool.get("tags"):
         parts.append("**Tags:** " + ", ".join(f"`{t}`" for t in tags) + "\n\n")
     parts.extend(
         [
-            "### Parameters\n\n",
+            "#### Parameters\n\n",
             _render_parameters_table(tool.get("input_schema") or {}),
         ]
     )
@@ -172,9 +263,9 @@ def _render_tool(tool: dict[str, Any]) -> str:
 
 
 def _render_resource(resource: dict[str, Any]) -> str:
-    """Render a single resource as a Markdown section."""
+    """Render a single resource as L3 under its module's `## Resources` section."""
     name = resource["name"]
-    parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"]
+    parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"]
     if description := resource.get("description"):
         parts.append(description.strip() + "\n\n")
     meta_lines: list[str] = []
@@ -192,16 +283,16 @@ def _render_resource(resource: dict[str, Any]) -> str:
 
 
 def _render_prompt(prompt: dict[str, Any]) -> str:
-    """Render a single prompt as a Markdown section."""
+    """Render a single prompt as L3 under its module's `## Prompts` section."""
     name = prompt["name"]
-    parts: list[str] = [f"## `{name}` {{#{name}}}\n\n"]
+    parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"]
     if description := prompt.get("description"):
         parts.append(description.strip() + "\n\n")
     args = prompt.get("arguments") or []
     if args:
         parts.extend(
             [
-                "### Arguments\n\n",
+                "#### Arguments\n\n",
                 "| Name | Required | Description |\n| --- | --- | --- |\n",
             ]
         )
@@ -215,7 +306,94 @@ def _render_prompt(prompt: dict[str, Any]) -> str:
     return "".join(parts)
 
 
-def _render_index(report: dict[str, Any]) -> str:
+# -----------------------------------------------------------------------------
+# Bucketing + per-module pages
+# -----------------------------------------------------------------------------
+
+
+class _ModuleBucket:
+    """Accumulator for a single mcp_module's tools / prompts / resources."""
+
+    def __init__(self, name: str) -> None:
+        """Create an empty bucket for the given mcp_module name."""
+        self.name = name
+        self.tools: list[dict[str, Any]] = []
+        self.prompts: list[dict[str, Any]] = []
+        self.resources: list[dict[str, Any]] = []  # concrete + templates
+
+    @property
+    def total(self) -> int:
+        """Total count of MCP primitives in this bucket."""
+        return len(self.tools) + len(self.prompts) + len(self.resources)
+
+
+def _bucket_by_module(
+    report: dict[str, Any],
+    fallback_map: dict[str, str],
+) -> OrderedDict[str, _ModuleBucket]:
+    """Group report items by mcp_module, preserving first-seen order."""
+    buckets: OrderedDict[str, _ModuleBucket] = OrderedDict()
+
+    def get(mcp_module: str) -> _ModuleBucket:
+        if mcp_module not in buckets:
+            buckets[mcp_module] = _ModuleBucket(mcp_module)
+        return buckets[mcp_module]
+
+    for tool in report.get("tools") or []:
+        get(_get_module(tool, fallback_map)).tools.append(tool)
+    for prompt in report.get("prompts") or []:
+        get(_get_module(prompt, fallback_map)).prompts.append(prompt)
+    for resource in report.get("resources") or []:
+        get(_get_module(resource, fallback_map)).resources.append(resource)
+    for template in report.get("templates") or []:
+        get(_get_module(template, fallback_map)).resources.append(template)
+
+    return buckets
+
+
+def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str:
+    """Render a single `<module>.md` page with L2 Tools/Prompts/Resources sections.
+
+    No YAML front-matter is emitted on module pages: these files are consumed
+    by pdoc3 via the `.. include::` directive (pdoc's Markdown renderer does
+    not strip front-matter and would emit it as body text). Docusaurus infers
+    the page title from the first H1, which we always emit here.
+    """
+    parts: list[str] = [
+        f"# `{bucket.name}` module\n\n",
+        (
+            f"MCP primitives registered by the `{bucket.name}` module "
+            f"of the `{server_name}` server: "
+            f"**{len(bucket.tools)}** tool(s), "
+            f"**{len(bucket.prompts)}** prompt(s), "
+            f"**{len(bucket.resources)}** resource(s).\n\n"
+        ),
+    ]
+    if bucket.tools:
+        parts.extend(
+            [
+                f"## Tools ({len(bucket.tools)})\n\n",
+                (
+                    "**Index:** "
+                    + ", ".join(f"[`{t['name']}`](#{t['name']})" for t in bucket.tools)
+                    + "\n\n"
+                ),
+            ]
+        )
+        parts.extend(_render_tool(tool) for tool in bucket.tools)
+    if bucket.prompts:
+        parts.append(f"## Prompts ({len(bucket.prompts)})\n\n")
+        parts.extend(_render_prompt(prompt) for prompt in bucket.prompts)
+    if bucket.resources:
+        parts.append(f"## Resources ({len(bucket.resources)})\n\n")
+        parts.extend(_render_resource(resource) for resource in bucket.resources)
+    return "".join(parts)
+
+
+def _render_index(
+    report: dict[str, Any],
+    buckets: OrderedDict[str, _ModuleBucket],
+) -> str:
     """Render the top-level overview page."""
     server = report.get("server") or {}
     server_name = server.get("name", "mcp-server")
@@ -235,17 +413,24 @@ def _render_index(report: dict[str, Any]) -> str:
     out += "\n"
     if instructions := server.get("instructions"):
         out += instructions.strip() + "\n\n"
-    out += "## Contents\n\n"
-    counts = {
-        "tools": len(report.get("tools") or []),
-        "resources": (len(report.get("resources") or []) + len(report.get("templates") or [])),
-        "prompts": len(report.get("prompts") or []),
-    }
+    total_tools = sum(len(b.tools) for b in buckets.values())
+    total_prompts = sum(len(b.prompts) for b in buckets.values())
+    total_resources = sum(len(b.resources) for b in buckets.values())
     out += (
-        f"- [Tools](./tools.md) — {counts['tools']}\n"
-        f"- [Resources](./resources.md) — {counts['resources']}\n"
-        f"- [Prompts](./prompts.md) — {counts['prompts']}\n\n"
+        "## Totals\n\n"
+        f"- **Tools:** {total_tools}\n"
+        f"- **Prompts:** {total_prompts}\n"
+        f"- **Resources:** {total_resources}\n\n"
     )
+    out += "## Modules\n\n"
+    out += "| Module | Tools | Prompts | Resources |\n"
+    out += "| --- | ---: | ---: | ---: |\n"
+    for name, bucket in buckets.items():
+        out += (
+            f"| [`{name}`](./{name}.md) | {len(bucket.tools)} | "
+            f"{len(bucket.prompts)} | {len(bucket.resources)} |\n"
+        )
+    out += "\n"
     out += (
         "> These pages are generated from the live `fastmcp inspect` report. "
         "Regenerate with `poe mcp-docs-md`.\n"
@@ -253,69 +438,6 @@ def _render_index(report: dict[str, Any]) -> str:
     return out
 
 
-def _render_tools_page(report: dict[str, Any]) -> str:
-    """Render the tools page."""
-    tools = report.get("tools") or []
-    out = _frontmatter(
-        title="Tools",
-        sidebar_label="Tools",
-        description=f"All {len(tools)} MCP tools exposed by this server.",
-    )
-    out += "# Tools\n\n"
-    if not tools:
-        out += "_No tools are exposed by this server._\n"
-        return out
-    out += f"This server exposes **{len(tools)}** tool(s).\n\n"
-    out += "**Index:** "
-    out += ", ".join(f"[`{t['name']}`](#{t['name']})" for t in tools) + "\n\n"
-    for tool in tools:
-        out += _render_tool(tool)
-    return out
-
-
-def _render_resources_page(report: dict[str, Any]) -> str:
-    """Render the resources + resource-templates page."""
-    resources = report.get("resources") or []
-    templates = report.get("templates") or []
-    total = len(resources) + len(templates)
-    out = _frontmatter(
-        title="Resources",
-        sidebar_label="Resources",
-        description=f"All {total} MCP resource(s) and resource template(s).",
-    )
-    out += "# Resources\n\n"
-    if not resources and not templates:
-        out += "_No resources or resource templates are exposed by this server._\n"
-        return out
-    if resources:
-        out += f"## Concrete resources ({len(resources)})\n\n"
-        for resource in resources:
-            out += _render_resource(resource)
-    if templates:
-        out += f"## Resource templates ({len(templates)})\n\n"
-        for template in templates:
-            out += _render_resource(template)
-    return out
-
-
-def _render_prompts_page(report: dict[str, Any]) -> str:
-    """Render the prompts page."""
-    prompts = report.get("prompts") or []
-    out = _frontmatter(
-        title="Prompts",
-        sidebar_label="Prompts",
-        description=f"All {len(prompts)} MCP prompt(s).",
-    )
-    out += "# Prompts\n\n"
-    if not prompts:
-        out += "_No prompts are exposed by this server._\n"
-        return out
-    out += f"This server exposes **{len(prompts)}** prompt(s).\n\n"
-    for prompt in prompts:
-        out += _render_prompt(prompt)
-    return out
-
-
 # Paths we refuse to `rmtree` even if the user passes them as --output, to
 # avoid cases like `--output /` or `--output $HOME` accidentally nuking data.
 _FORBIDDEN_OUTPUT_PATHS = frozenset(
@@ -347,22 +469,25 @@ def generate(server_spec: str, output: Path) -> None:
         print(f"Running `fastmcp inspect {server_spec}`...")
         report = _run_fastmcp_inspect(server_spec, report_path)
 
+    fallback_map = _resolve_extra_module_map(server_spec)
+    buckets = _bucket_by_module(report, fallback_map)
+
     _prepare_output_dir(output)
 
-    pages: dict[str, str] = {
-        "index.md": _render_index(report),
-        "tools.md": _render_tools_page(report),
-        "resources.md": _render_resources_page(report),
-        "prompts.md": _render_prompts_page(report),
-    }
+    server_name = (report.get("server") or {}).get("name", "mcp-server")
+    pages: dict[str, str] = {"index.md": _render_index(report, buckets)}
+    for name, bucket in buckets.items():
+        pages[f"{name}.md"] = _render_module_page(bucket, server_name)
+
     for name, content in pages.items():
         (output / name).write_text(content, encoding="utf-8")
         print(f"  wrote {output / name}")
 
     print(
-        f"Done. {len(report.get('tools') or [])} tool(s), "
-        f"{len(report.get('resources') or []) + len(report.get('templates') or [])} "
-        f"resource(s), {len(report.get('prompts') or [])} prompt(s) documented."
+        f"Done. {len(buckets)} module(s) documented — "
+        f"{sum(len(b.tools) for b in buckets.values())} tool(s), "
+        f"{sum(len(b.resources) for b in buckets.values())} resource(s), "
+        f"{sum(len(b.prompts) for b in buckets.values())} prompt(s)."
     )
 
 

From 20e22a45c47b964ed6bf3523dc6550ecf0591628 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 08:13:54 +0000
Subject: [PATCH 04/10] fix(mcp-docs-md): plain-text headings + __all__=[] +
 pdoc toc depth=3

- Strip backticks from H1/H2/H3 in generated markdown; pdoc's TOC
  extractor was emitting an unbalanced <code> tag in the sidebar that
  leaked through the page as monospace rendering.
- Add __all__ = [] to airbyte/mcp/{cloud,local,registry,prompts}.py so
  pdoc hides the redundant Python-side tool declarations and uses the
  markdown include as the single source of truth on the page.
- Bump pdoc markdown toc depth from 2 to 3 so per-tool H3 anchors show
  up in the left-nav.
---
 airbyte/mcp/cloud.py             |  6 ++++++
 airbyte/mcp/local.py             |  6 ++++++
 airbyte/mcp/prompts.py           |  7 +++++++
 airbyte/mcp/registry.py          |  6 ++++++
 docs/generate.py                 |  9 +++++++++
 scripts/generate_mcp_markdown.py | 16 ++++++++++------
 6 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/airbyte/mcp/cloud.py b/airbyte/mcp/cloud.py
index 52859def9..f8bb3bb7e 100644
--- a/airbyte/mcp/cloud.py
+++ b/airbyte/mcp/cloud.py
@@ -4,6 +4,12 @@
 .. include:: ../../docs/mcp-generated/cloud.md
 """
 
+# No public Python API — MCP primitives are registered via decorators and
+# documented via the generated Markdown include above. Setting `__all__` to an
+# empty list tells pdoc (and other doc tools) not to surface the individual
+# tool / helper definitions as a redundant "API Documentation" list.
+__all__: list[str] = []
+
 from pathlib import Path
 from typing import Annotated, Any, Literal, cast
 
diff --git a/airbyte/mcp/local.py b/airbyte/mcp/local.py
index 4b27b121d..1f8ac752d 100644
--- a/airbyte/mcp/local.py
+++ b/airbyte/mcp/local.py
@@ -4,6 +4,12 @@
 .. include:: ../../docs/mcp-generated/local.md
 """
 
+# No public Python API — MCP primitives are registered via decorators and
+# documented via the generated Markdown include above. Setting `__all__` to an
+# empty list tells pdoc (and other doc tools) not to surface the individual
+# tool / helper definitions as a redundant "API Documentation" list.
+__all__: list[str] = []
+
 import sys
 import traceback
 from itertools import islice
diff --git a/airbyte/mcp/prompts.py b/airbyte/mcp/prompts.py
index 37767a895..449f25f26 100644
--- a/airbyte/mcp/prompts.py
+++ b/airbyte/mcp/prompts.py
@@ -15,6 +15,13 @@
 from pydantic import Field
 
 
+# No public Python API — MCP primitives are registered via decorators and
+# documented via the generated Markdown include above. Setting `__all__` to an
+# empty list tells pdoc (and other doc tools) not to surface the individual
+# tool / helper definitions as a redundant "API Documentation" list.
+__all__: list[str] = []
+
+
 if TYPE_CHECKING:
     from fastmcp import FastMCP
 
diff --git a/airbyte/mcp/registry.py b/airbyte/mcp/registry.py
index 999c6b0e6..e8ec451b4 100644
--- a/airbyte/mcp/registry.py
+++ b/airbyte/mcp/registry.py
@@ -4,6 +4,12 @@
 .. include:: ../../docs/mcp-generated/registry.md
 """
 
+# No public Python API — MCP primitives are registered via decorators and
+# documented via the generated Markdown include above. Setting `__all__` to an
+# empty list tells pdoc (and other doc tools) not to surface the individual
+# tool / helper definitions as a redundant "API Documentation" list.
+__all__: list[str] = []
+
 # Note: Deferred type evaluation must be avoided due to FastMCP/Pydantic needing
 # types to be available at import time for tool registration.
 import contextlib
diff --git a/docs/generate.py b/docs/generate.py
index a3bfa91b9..d9d530a28 100755
--- a/docs/generate.py
+++ b/docs/generate.py
@@ -14,6 +14,7 @@
 import shutil
 
 import pdoc
+import pdoc.render_helpers
 
 
 def run() -> None:
@@ -24,6 +25,14 @@ def run() -> None:
     if pathlib.Path("docs/generated").exists():
         shutil.rmtree("docs/generated")
 
+    # pdoc's default sidebar TOC depth is 2 (H1 + H2 only), which hides the
+    # per-tool H3 anchors produced by our MCP Markdown generator. Bump to 3 so
+    # individual tools / prompts / resources show up in the left nav. This
+    # monkey-patches the module-level `markdown_extensions` dict because pdoc
+    # 16's `configure()` does not expose markdown extension options.
+    # pyrefly: ignore[unsupported-operation]
+    pdoc.render_helpers.markdown_extensions["toc"] = {"depth": 3}
+
     pdoc.render.configure(
         template_directory=pathlib.Path("docs/templates"),
         show_source=True,
diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index 344e4f495..f94c646b0 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -242,9 +242,10 @@ def _render_parameters_table(input_schema: dict[str, Any]) -> str:
 def _render_tool(tool: dict[str, Any]) -> str:
     """Render a single tool as L3 under its module's `## Tools` section."""
     name = tool["name"]
-    # HTML anchor + heading (instead of Pandoc `{#...}` attr syntax, which
-    # renders as literal text in pdoc3's markdown processor).
-    parts: list[str] = [f'<a id="{name}"></a>\n\n### `{name}`\n\n']
+    # Plain text in the heading (no backticks) so pdoc's TOC extractor
+    # produces a clean sidebar nav entry. The HTML anchor above the heading
+    # is what we deep-link to.
+    parts: list[str] = [f'<a id="{name}"></a>\n\n### {name}\n\n']
     if description := tool.get("description"):
         parts.append(description.strip() + "\n\n")
     if tags := tool.get("tags"):
@@ -265,7 +266,7 @@ def _render_tool(tool: dict[str, Any]) -> str:
 def _render_resource(resource: dict[str, Any]) -> str:
     """Render a single resource as L3 under its module's `## Resources` section."""
     name = resource["name"]
-    parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"]
+    parts: list[str] = [f'<a id="{name}"></a>\n\n### {name}\n\n']
     if description := resource.get("description"):
         parts.append(description.strip() + "\n\n")
     meta_lines: list[str] = []
@@ -285,7 +286,7 @@ def _render_resource(resource: dict[str, Any]) -> str:
 def _render_prompt(prompt: dict[str, Any]) -> str:
     """Render a single prompt as L3 under its module's `## Prompts` section."""
     name = prompt["name"]
-    parts: list[str] = [f"### `{name}` {{#{name}}}\n\n"]
+    parts: list[str] = [f'<a id="{name}"></a>\n\n### {name}\n\n']
     if description := prompt.get("description"):
         parts.append(description.strip() + "\n\n")
     args = prompt.get("arguments") or []
@@ -359,8 +360,11 @@ def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str:
     not strip front-matter and would emit it as body text). Docusaurus infers
     the page title from the first H1, which we always emit here.
     """
+    # Headings are plain text (no backticks) so pdoc's TOC extractor yields
+    # clean nav entries; cosmetic backticks inside headings produced
+    # unbalanced `<code>` tags in the generated TOC HTML.
     parts: list[str] = [
-        f"# `{bucket.name}` module\n\n",
+        f"# {bucket.name} module\n\n",
         (
             f"MCP primitives registered by the `{bucket.name}` module "
             f"of the `{server_name}` server: "

From dbd39e7a52615896cf33af1a2f5ae34d7171bb02 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 08:41:16 +0000
Subject: [PATCH 05/10] fix(mcp-docs-md): indent nested TOC levels, drop inline
 tool index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- custom.css: progressively indent H3-and-deeper entries in pdoc's left
  sidebar TOC so per-tool anchors visually nest under the 'Tools (N)'
  H2 and the '<module> module' H1. pdoc's default layout.css uses a
  single indent step for all non-top-level entries, which made H2 and
  H3 render at the same depth.
- generate_mcp_markdown.py: drop the inline 'Index: tool_a, tool_b, …'
  row from module pages. The left nav now lists every tool under its
  section, so the inline list was redundant.
---
 docs/templates/custom.css        | 16 ++++++++++++++++
 scripts/generate_mcp_markdown.py | 14 ++++----------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/docs/templates/custom.css b/docs/templates/custom.css
index 541900423..17a1d7d0f 100644
--- a/docs/templates/custom.css
+++ b/docs/templates/custom.css
@@ -174,6 +174,22 @@ nav a:hover {
   color: var(--link-hover) !important;
 }
 
+/*
+ * Progressively indent nested TOC levels in the sidebar.
+ *
+ * pdoc's default layout.css indents *all* non-top-level nav items by a single
+ * (pad + indent) step, which makes H2 and H3 entries render at the same visual
+ * depth. When the generated MCP Markdown uses H3 headings per-tool nested
+ * under an H2 "Tools" heading, we want the tool names to appear visibly
+ * nested under the section heading in the left nav.
+ */
+nav.pdoc > div > ul > li > ul > li > ul > li > a {
+  padding-left: calc(var(--pad) + (var(--indent) * 2)) !important;
+}
+nav.pdoc > div > ul > li > ul > li > ul > li > ul > li > a {
+  padding-left: calc(var(--pad) + (var(--indent) * 3)) !important;
+}
+
 /* Style badges and labels */
 .badge {
   background-color: var(--color-green-40);
diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index f94c646b0..80a31580e 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -374,16 +374,10 @@ def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str:
         ),
     ]
     if bucket.tools:
-        parts.extend(
-            [
-                f"## Tools ({len(bucket.tools)})\n\n",
-                (
-                    "**Index:** "
-                    + ", ".join(f"[`{t['name']}`](#{t['name']})" for t in bucket.tools)
-                    + "\n\n"
-                ),
-            ]
-        )
+        # The left-nav sidebar already lists every tool under this H2 via the
+        # TOC, so we intentionally omit the inline "Index: …" row that we used
+        # to emit here.
+        parts.append(f"## Tools ({len(bucket.tools)})\n\n")
         parts.extend(_render_tool(tool) for tool in bucket.tools)
     if bucket.prompts:
         parts.append(f"## Prompts ({len(bucket.prompts)})\n\n")

From fcababf7fad5656451dccaf8fb27d2676bca8e2d Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 08:47:31 +0000
Subject: [PATCH 06/10] fix(mcp-docs-md): address Copilot + Devin review
 comments

- scripts/generate_mcp_markdown.py: refresh module docstring to match
  current behavior (per-module output, no front-matter on module pages,
  DEFAULT_SERVER_SPEC is a .py path not a dotted module).
- scripts/generate_mcp_markdown.py: guard _render_index against empty
  instructions (splitlines()[0] raised IndexError).
- scripts/generate_mcp_markdown.py: tighten _prepare_output_dir to
  require --output to be strictly inside the current working directory
  (rejects /, ~, .., and arbitrary absolute paths outside the repo).
- docs/generate.py: regenerate docs/mcp-generated/ before pdoc so
  .. include:: directives resolve on a clean checkout (docs/mcp-generated
  is git-ignored). Falls back to a warning if generation fails.
- docs/CONTRIBUTING.md: describe actual per-module output layout
  (index.md + cloud/local/registry/prompts/misc) and deep-link shape.
---
 docs/CONTRIBUTING.md             | 41 +++++++++++--------
 docs/generate.py                 | 48 ++++++++++++++++++++++
 scripts/generate_mcp_markdown.py | 69 ++++++++++++++++++--------------
 3 files changed, 110 insertions(+), 48 deletions(-)

diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md
index ab7619dec..0455e0c17 100644
--- a/docs/CONTRIBUTING.md
+++ b/docs/CONTRIBUTING.md
@@ -146,26 +146,33 @@ poe mcp-inspect        # Show all available MCP tools and their schemas
 
 ### Generating Markdown docs for the MCP Server
 
-The repo ships a small script that introspects the MCP server via
-`fastmcp inspect` and emits a Markdown documentation site under
-`docs/mcp-generated/` (git-ignored). The output is designed to be both
-Docusaurus-hostable and `pdoc`-compatible — plain CommonMark with YAML
-front-matter, no MDX-only components.
+The repo ships a small script (`scripts/generate_mcp_markdown.py`) that
+introspects the MCP server via `fastmcp inspect` and emits a Markdown
+documentation site under `docs/mcp-generated/` (git-ignored). The output is
+plain CommonMark with no MDX-only components, so it is both Docusaurus-hostable
+and consumable by `pdoc` — the four `airbyte.mcp.{cloud,local,registry,prompts}`
+modules pull their respective generated file in via pdoc's `.. include::`
+directive, so `poe docs-generate` surfaces the generated tool docs on each
+module's pdoc page alongside the regular `docs/generated/` output.
 
 ```bash
 uv sync --group dev
 poe mcp-docs-md
 ```
 
-Four files are produced:
-
-- `index.md` — server overview (name, version, instructions, counts)
-- `tools.md` — one section per tool with a parameters table and collapsible
-  input/output JSON schemas
-- `resources.md` — concrete resources + resource templates
-- `prompts.md` — prompts and their arguments
-
-Each tool/resource/prompt has a stable slug anchor (e.g.
-`tools.md#list_connectors`) so the pages can be deep-linked from Slack, issues,
-or other docs. Regenerate after any change to MCP tool signatures, descriptions,
-or schemas. The underlying script is at `scripts/generate_mcp_markdown.py`.
+One Markdown file is produced per MCP module, plus an `index.md`. For the
+PyAirbyte server that is:
+
+- `index.md` — server overview (name, version, instructions, totals, module table)
+- `cloud.md` — tools registered by `airbyte.mcp.cloud`
+- `local.md` — tools registered by `airbyte.mcp.local`
+- `registry.md` — tools registered by `airbyte.mcp.registry`
+- `prompts.md` — prompts registered by `airbyte.mcp.prompts`
+- `misc.md` — anything without an `mcp_module` annotation (currently just the
+  `server_info` resource)
+
+Inside each module page, primitives are grouped by kind (`## Tools`,
+`## Prompts`, `## Resources`), and each primitive has an HTML anchor
+(`<a id="name"></a>`) above its H3 so links like
+`cloud.md#deploy_source_to_cloud` resolve in both pdoc and Docusaurus.
+Regenerate after any change to MCP tool signatures, descriptions, or schemas.
diff --git a/docs/generate.py b/docs/generate.py
index d9d530a28..6820f47de 100755
--- a/docs/generate.py
+++ b/docs/generate.py
@@ -12,15 +12,63 @@
 
 import pathlib
 import shutil
+import sys
 
 import pdoc
 import pdoc.render_helpers
 
 
+def _regenerate_mcp_markdown() -> None:
+    """Regenerate `docs/mcp-generated/` before pdoc runs.
+
+    The `airbyte.mcp.{cloud,local,registry,prompts}` modules pull the
+    per-module Markdown files from `docs/mcp-generated/` via pdoc's
+    `.. include::` directive. That directory is git-ignored, so on a clean
+    checkout pdoc would fail to resolve the include unless we regenerate it
+    here. Running the generator from inside `docs-generate` makes the full
+    docs build reproducible from a fresh clone (and matches the standalone
+    `poe mcp-docs-md` task).
+
+    If generation fails (e.g. `fastmcp` is not installed, or the MCP server
+    import fails), we print a warning and continue: pdoc will still build,
+    and the include directive will just surface the missing file.
+    """
+    script = pathlib.Path(__file__).parent.parent / "scripts" / "generate_mcp_markdown.py"
+    if not script.exists():
+        print(f"[docs-generate] MCP markdown generator not found at {script}; skipping.")
+        return
+    # Import-and-call rather than subprocess so we share the current venv and
+    # surface tracebacks directly. The generator resolves paths relative to
+    # cwd, which `poe docs-generate` runs from the repo root.
+    sys.path.insert(0, str(script.parent))
+    try:
+        from generate_mcp_markdown import (  # noqa: PLC0415
+            DEFAULT_OUTPUT,
+            DEFAULT_SERVER_SPEC,
+            generate,
+        )
+
+        print("[docs-generate] Regenerating docs/mcp-generated/ ...")
+        generate(server_spec=DEFAULT_SERVER_SPEC, output=DEFAULT_OUTPUT)
+    except Exception as ex:
+        print(
+            f"[docs-generate] WARNING: failed to regenerate MCP Markdown docs: {ex}. "
+            "pdoc will continue, but module pages may show missing include warnings.",
+            file=sys.stderr,
+        )
+    finally:
+        sys.path.pop(0)
+
+
 def run() -> None:
     """Generate docs for all public modules in PyAirbyte and save them to docs/generated."""
     public_modules = ["airbyte", "airbyte/cli/pyab.py"]
 
+    # Regenerate MCP Markdown first so the `.. include::` directives in the
+    # MCP module docstrings resolve on a clean checkout (docs/mcp-generated/
+    # is git-ignored).
+    _regenerate_mcp_markdown()
+
     # recursively delete the docs/generated folder if it exists
     if pathlib.Path("docs/generated").exists():
         shutil.rmtree("docs/generated")
diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index 80a31580e..35bd99db6 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -2,9 +2,10 @@
 # Copyright (c) 2026 Airbyte, Inc., all rights reserved.
 """Generate Markdown documentation for the PyAirbyte MCP server.
 
-Runs `fastmcp inspect` against `airbyte.mcp.server:app` to obtain the full
-FastMCP protocol surface (tools, resources, resource templates, prompts) as a
-JSON report, then renders it into one Markdown file **per MCP module** under
+Runs `fastmcp inspect` against the default `airbyte/mcp/server.py:app` spec
+(override with `--server-spec`) to obtain the full FastMCP protocol surface
+(tools, resources, resource templates, prompts) as a JSON report, then
+renders it into one Markdown file **per MCP module** under
 `docs/mcp-generated/`, plus an `index.md` overview.
 
 The per-module grouping uses the `mcp_module` annotation that
@@ -18,16 +19,16 @@
 Inside each module file, content is grouped by primitive with L2 headings:
 
 ```
-# airbyte.mcp.cloud
+# cloud module
 
-## Tools
-### `deploy_source_to_cloud`
+## Tools (35)
+### deploy_source_to_cloud
 ...
-## Prompts
-### `some_prompt`
+## Prompts (N)
+### some_prompt
 ...
-## Resources
-### `some_resource`
+## Resources (N)
+### some_resource
 ...
 ```
 
@@ -36,13 +37,17 @@
 - **`pdoc`/`pdoc3`-includable**: each `<module>.md` is a self-contained body
   intended to be spliced into the corresponding Python module's docstring via
   pdoc's `.. include::` directive, so the generated tool docs render inline on
-  the module's pdoc page.
-- **Docusaurus-hostable**: each file starts with YAML front-matter (`title`,
-  `sidebar_label`, `description`); the body is plain CommonMark + GFM tables +
+  the module's pdoc page. Per-module pages intentionally emit **no** YAML
+  front-matter (pdoc's Markdown renderer would surface it as body text);
+  only `index.md` carries front-matter.
+- **Docusaurus-hostable**: `index.md` starts with YAML front-matter (`title`,
+  `sidebar_label`, `description`); module pages rely on Docusaurus'
+  first-H1-as-title inference. The body is plain CommonMark + GFM tables +
   `<details><summary>` blocks for collapsible JSON schemas. No MDX-only
   components are used.
-- **Deep-linkable**: every tool/resource/prompt name is an H3 with a stable
-  slug anchor (e.g. `cloud.md#deploy_source_to_cloud`).
+- **Deep-linkable**: every tool/resource/prompt name gets an HTML anchor
+  (`<a id="name"></a>`) above its H3, so links like
+  `cloud.md#deploy_source_to_cloud` resolve in both pdoc and Docusaurus.
 
 Formatting is modeled on the
 [`mcpdocs-gen`](https://github.com/smytsyk/mcpdocs) static HTML output — same
@@ -395,10 +400,12 @@ def _render_index(
     """Render the top-level overview page."""
     server = report.get("server") or {}
     server_name = server.get("name", "mcp-server")
+    # `splitlines()` on an empty string returns `[]`, so we can't index [0].
+    first_instruction_line = next(iter((server.get("instructions") or "").splitlines()), "")
     out = _frontmatter(
         title=f"{server_name} — MCP server",
         sidebar_label="Overview",
-        description=(server.get("instructions") or "").splitlines()[0]
+        description=first_instruction_line
         or f"Auto-generated docs for the {server_name} MCP server.",
     )
     out += f"# `{server_name}`\n\n"
@@ -436,24 +443,24 @@ def _render_index(
     return out
 
 
-# Paths we refuse to `rmtree` even if the user passes them as --output, to
-# avoid cases like `--output /` or `--output $HOME` accidentally nuking data.
-_FORBIDDEN_OUTPUT_PATHS = frozenset(
-    {
-        Path("/"),
-        Path.home(),
-        Path.cwd(),
-    }
-)
-
-
 def _prepare_output_dir(output: Path) -> None:
-    """Reset (or create) an output directory, with a minimal safety guard."""
+    """Reset (or create) an output directory, with a strict safety guard.
+
+    The script unconditionally `rmtree`s `output` before regenerating, so we
+    need to be careful about what callers can point `--output` at. We require
+    the resolved output path to live **strictly inside** the current working
+    directory (typically the repo root) — this rules out `/`, `$HOME`,
+    `--output ..`, and any absolute path outside the repo, while still
+    letting the default `docs/mcp-generated/` work. The cwd itself is also
+    rejected so we never nuke the whole repo.
+    """
     resolved = output.resolve()
-    if resolved in {p.resolve() for p in _FORBIDDEN_OUTPUT_PATHS}:
+    cwd = Path.cwd().resolve()
+    if resolved == cwd or not resolved.is_relative_to(cwd):
         raise RuntimeError(
-            f"Refusing to rmtree suspicious output path {resolved}. "
-            "Pass --output pointing at a dedicated subdirectory."
+            f"Refusing to rmtree output path {resolved}: must be a dedicated "
+            f"subdirectory strictly inside the current working directory "
+            f"({cwd}). Pass --output pointing at e.g. `docs/mcp-generated`."
         )
     if output.exists():
         shutil.rmtree(output)

From 2cf273960a29b5f901102b1968ffab292e6469ef Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 08:49:19 +0000
Subject: [PATCH 07/10] fix(docs-generate): load MCP markdown generator via
 importlib to satisfy deptry

The previous static 'from generate_mcp_markdown import ...' triggered
deptry's DEP001 rule (the script lives under scripts/ which is not on
sys.path, so deptry treated it as a missing external dependency). Use
importlib.util.spec_from_file_location to load the module from its
on-disk path instead.
---
 docs/generate.py | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/docs/generate.py b/docs/generate.py
index 6820f47de..8990ebfb3 100755
--- a/docs/generate.py
+++ b/docs/generate.py
@@ -10,6 +10,7 @@
 
 from __future__ import annotations
 
+import importlib.util
 import pathlib
 import shutil
 import sys
@@ -32,32 +33,35 @@ def _regenerate_mcp_markdown() -> None:
     If generation fails (e.g. `fastmcp` is not installed, or the MCP server
     import fails), we print a warning and continue: pdoc will still build,
     and the include directive will just surface the missing file.
+
+    We load the generator via `importlib.util` from its on-disk path rather
+    than a plain `from generate_mcp_markdown import ...`: the generator
+    lives under `scripts/` (not on `sys.path`), and a static import would
+    also trip `deptry` into flagging `generate_mcp_markdown` as a missing
+    external dependency.
     """
     script = pathlib.Path(__file__).parent.parent / "scripts" / "generate_mcp_markdown.py"
     if not script.exists():
         print(f"[docs-generate] MCP markdown generator not found at {script}; skipping.")
         return
-    # Import-and-call rather than subprocess so we share the current venv and
-    # surface tracebacks directly. The generator resolves paths relative to
-    # cwd, which `poe docs-generate` runs from the repo root.
-    sys.path.insert(0, str(script.parent))
     try:
-        from generate_mcp_markdown import (  # noqa: PLC0415
-            DEFAULT_OUTPUT,
-            DEFAULT_SERVER_SPEC,
-            generate,
-        )
-
+        spec = importlib.util.spec_from_file_location("_mcp_markdown_gen", script)
+        if spec is None or spec.loader is None:
+            msg = f"Could not load spec for {script}"
+            raise RuntimeError(msg)  # noqa: TRY301
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
         print("[docs-generate] Regenerating docs/mcp-generated/ ...")
-        generate(server_spec=DEFAULT_SERVER_SPEC, output=DEFAULT_OUTPUT)
+        module.generate(
+            server_spec=module.DEFAULT_SERVER_SPEC,
+            output=module.DEFAULT_OUTPUT,
+        )
     except Exception as ex:
         print(
             f"[docs-generate] WARNING: failed to regenerate MCP Markdown docs: {ex}. "
             "pdoc will continue, but module pages may show missing include warnings.",
             file=sys.stderr,
         )
-    finally:
-        sys.path.pop(0)
 
 
 def run() -> None:

From dffeaefc62bc308ccae9cca96404624f813e2930 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 09:19:48 +0000
Subject: [PATCH 08/10] feat(mcp-docs-md): render MCP tool-annotation hints +
 alpha-sort primitives
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every tool / prompt / resource is now rendered in a stable alphabetical
order inside each module page (case-insensitive sort by name/uri), and
the 'misc' catch-all module is pinned last in the module table. Module
order on the index is alphabetical.

For each tool we now surface MCP tool-annotation hints as inline-code
badges right below the H3 — 'read-only', 'destructive', 'idempotent',
'open-world'. Hints are only rendered when explicitly True, so a tool
like 'list_cloud_workspaces' shows '`read-only` · `idempotent` ·
`open-world`' while 'permanently_delete_cloud_connection' shows
'`destructive` · `open-world`'. An optional human-readable
'annotations.title' override (distinct from the top-level title) is
also surfaced when present.
---
 scripts/generate_mcp_markdown.py | 56 +++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index 35bd99db6..11b88d5df 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -221,6 +221,41 @@ def _json_block(label: str, obj: Any) -> str:  # noqa: ANN401
     )
 
 
+# MCP tool annotation hints (per the MCP spec): we render a badge for every
+# hint whose value is `True`, using a stable, human-readable label. The four
+# standardised hints come from
+# https://modelcontextprotocol.io/specification/server/tools#tool-annotations.
+_HINT_LABELS: dict[str, str] = {
+    "readOnlyHint": "read-only",
+    "destructiveHint": "destructive",
+    "idempotentHint": "idempotent",
+    "openWorldHint": "open-world",
+}
+
+
+def _render_hint_badges(annotations: dict[str, Any] | None) -> str:
+    """Render MCP tool-annotation hints as inline `code` badges.
+
+    Only hints whose value is explicitly `True` are rendered — an unset or
+    `False` hint is omitted. The MCP spec treats hints as advisory, so
+    "absence" and "false" are equivalent for documentation purposes.
+
+    Also surfaces the optional human-readable `annotations.title` (distinct
+    from the top-level `title` field) when present, so e.g.
+    `annotations.title == "Deploy a source to Airbyte Cloud"` shows up in
+    the rendered doc.
+    """
+    if not annotations:
+        return ""
+    lines: list[str] = []
+    badges = [f"`{label}`" for key, label in _HINT_LABELS.items() if annotations.get(key) is True]
+    if badges:
+        lines.append("**Hints:** " + " · ".join(badges))
+    if title := annotations.get("title"):
+        lines.append(f"**Title:** {title}")
+    return ("\n\n".join(lines) + "\n\n") if lines else ""
+
+
 def _render_parameters_table(input_schema: dict[str, Any]) -> str:
     """Render a GFM parameters table for a tool's `input_schema`."""
     properties = input_schema.get("properties") or {}
@@ -251,6 +286,7 @@ def _render_tool(tool: dict[str, Any]) -> str:
     # produces a clean sidebar nav entry. The HTML anchor above the heading
     # is what we deep-link to.
     parts: list[str] = [f'<a id="{name}"></a>\n\n### {name}\n\n']
+    parts.append(_render_hint_badges(tool.get("annotations")))
     if description := tool.get("description"):
         parts.append(description.strip() + "\n\n")
     if tags := tool.get("tags"):
@@ -354,7 +390,25 @@ def get(mcp_module: str) -> _ModuleBucket:
     for template in report.get("templates") or []:
         get(_get_module(template, fallback_map)).resources.append(template)
 
-    return buckets
+    # Alpha-sort each bucket's primitives (case-insensitive) so the rendered
+    # pages, left-nav entries, and deep-link IDs are in a stable, predictable
+    # order across regenerations instead of reflecting server registration
+    # order (which is effectively arbitrary).
+    def _sort_key(item: dict[str, Any]) -> str:
+        return str(item.get("name") or item.get("uri") or "").lower()
+
+    for bucket in buckets.values():
+        bucket.tools.sort(key=_sort_key)
+        bucket.prompts.sort(key=_sort_key)
+        bucket.resources.sort(key=_sort_key)
+
+    # Also sort the module-level ordering so `index.md`'s module table and the
+    # order of files on disk are alphabetical (the `misc` bucket, which is a
+    # catch-all, is always pinned last).
+    sorted_buckets: OrderedDict[str, _ModuleBucket] = OrderedDict()
+    for name in sorted(buckets, key=lambda n: (n == MISC_MODULE, n.lower())):
+        sorted_buckets[name] = buckets[name]
+    return sorted_buckets
 
 
 def _render_module_page(bucket: _ModuleBucket, server_name: str) -> str:

From cbb13644a9171d9412b8e478d569d6bfbc9740d2 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 09:26:39 +0000
Subject: [PATCH 09/10] fix(mcp-docs-md): timeout fastmcp inspect, guard
 private-name iteration, anchor output path to repo root

Addresses three CodeRabbit findings on commit dffeaef:

1. `_run_fastmcp_inspect` now passes `timeout=120` to `subprocess.run`
   and translates `TimeoutExpired` into an actionable `RuntimeError`.
   Previously a hung `fastmcp inspect` (blocking import, stalled
   network I/O during tool registration, etc.) would make
   `poe docs-generate` / `poe mcp-docs-md` hang indefinitely rather
   than fail loudly in CI.

2. `_resolve_extra_module_map` now iterates the private
   `fastmcp_extensions.decorators._REGISTERED_{PROMPTS,RESOURCES}`
   tuples *inside* the same `try`/`except Exception` that imports
   them. Previously any shape drift in those private tuples (third
   element added, `ann` becoming a dataclass, etc.) would escape the
   guard and abort doc generation \u2014 now the function falls back to an
   empty map exactly as its docstring promises.

3. `_prepare_output_dir` is now anchored to the repo root (derived
   from `__file__`), not `Path.cwd()`. `DEFAULT_OUTPUT` is a
   repo-relative path, so anchoring to cwd meant running
   `poe mcp-docs-md` from inside `docs/` (or anywhere other than the
   repo root) would silently write into the wrong directory while
   still passing the strict `is_relative_to(cwd)` guard. A new
   `_resolve_output_dir` helper encapsulates the relative-to-repo-root
   resolution; the existing safety guard semantics are preserved
   (repo root itself is rejected, absolute paths outside the repo
   root are rejected).
---
 scripts/generate_mcp_markdown.py | 113 +++++++++++++++++++++----------
 1 file changed, 77 insertions(+), 36 deletions(-)

diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index 11b88d5df..e245c2abf 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -85,6 +85,19 @@
 DEFAULT_OUTPUT = Path("docs/mcp-generated")
 DEFAULT_SERVER_SPEC = "airbyte/mcp/server.py:app"
 MISC_MODULE = "misc"
+# Upper bound on how long `fastmcp inspect` may take before we fail the build.
+# 120s is generous: local runs finish in ~10s, but CI / cold caches occasionally
+# spend longer on the server-module import (e.g. re-resolving wheels). Anything
+# beyond this almost certainly indicates a hang (blocking import, stalled
+# network I/O during registration) rather than real work, so failing fast is
+# preferable to an indefinitely stuck `poe docs-generate` / `poe mcp-docs-md`.
+_FASTMCP_INSPECT_TIMEOUT_SEC = 120
+# Repo root anchor for path-safety checks. `__file__` is always the on-disk
+# location of this script, so `parent.parent` reliably points at the repo root
+# regardless of the caller's `cwd`. We use this instead of `Path.cwd()` when
+# resolving repo-relative defaults like `DEFAULT_OUTPUT` so that
+# `poe mcp-docs-md` works even when invoked from a subdirectory.
+_REPO_ROOT = Path(__file__).resolve().parent.parent
 
 
 def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]:
@@ -95,18 +108,28 @@ def _run_fastmcp_inspect(server_spec: str, report_path: Path) -> dict[str, Any]:
             "`fastmcp` CLI not found on PATH. Install project dev deps first "
             "(e.g. `uv sync --group dev`) and re-run from the repo root."
         )
-    subprocess.run(
-        [
-            fastmcp_bin,
-            "inspect",
-            server_spec,
-            "--format",
-            "fastmcp",
-            "--output",
-            str(report_path),
-        ],
-        check=True,
-    )
+    try:
+        subprocess.run(
+            [
+                fastmcp_bin,
+                "inspect",
+                server_spec,
+                "--format",
+                "fastmcp",
+                "--output",
+                str(report_path),
+            ],
+            check=True,
+            timeout=_FASTMCP_INSPECT_TIMEOUT_SEC,
+        )
+    except subprocess.TimeoutExpired as ex:
+        msg = (
+            f"`fastmcp inspect {server_spec}` timed out after "
+            f"{_FASTMCP_INSPECT_TIMEOUT_SEC}s. The server module likely hangs "
+            "on import (blocking network I/O during tool registration?). "
+            "Re-run with the server imported manually to investigate."
+        )
+        raise RuntimeError(msg) from ex
     return json.loads(report_path.read_text(encoding="utf-8"))
 
 
@@ -130,6 +153,11 @@ def _resolve_extra_module_map(server_spec: str) -> dict[str, str]:
     file_part = server_spec.split(":", 1)[0]
     module_name = file_part.removesuffix(".py").replace("/", ".")
     mapping: dict[str, str] = {}
+    # The iteration sits inside the same `try` as the import so any shape
+    # drift in the private `_REGISTERED_*` tuples (e.g. an added third element,
+    # or `ann` becoming a dataclass instead of a dict) falls back to an empty
+    # mapping — preserving this helper's documented best-effort semantics —
+    # rather than aborting doc generation.
     try:
         importlib.import_module(module_name)
         # Import private lists from fastmcp_extensions: these are the only
@@ -139,18 +167,20 @@ def _resolve_extra_module_map(server_spec: str) -> dict[str, str]:
             _REGISTERED_PROMPTS,  # noqa: PLC2701
             _REGISTERED_RESOURCES,  # noqa: PLC2701
         )
+
+        for _fn, ann in _REGISTERED_PROMPTS:
+            if name := ann.get("name"):
+                mapping[name] = ann.get("mcp_module") or MISC_MODULE
+        for _fn, ann in _REGISTERED_RESOURCES:
+            mcp_module = ann.get("mcp_module") or MISC_MODULE
+            if uri := ann.get("uri"):
+                mapping[uri] = mcp_module
+                # FastMCP exposes the URI stem as the resource `name` in
+                # inspect output; index by that too so lookup by either key
+                # works.
+                mapping[uri.rsplit("/", 1)[-1]] = mcp_module
     except Exception:
-        return mapping
-    for _fn, ann in _REGISTERED_PROMPTS:
-        if name := ann.get("name"):
-            mapping[name] = ann.get("mcp_module") or MISC_MODULE
-    for _fn, ann in _REGISTERED_RESOURCES:
-        mcp_module = ann.get("mcp_module") or MISC_MODULE
-        if uri := ann.get("uri"):
-            mapping[uri] = mcp_module
-            # FastMCP exposes the URI stem as the resource `name` in inspect
-            # output; index by that too so lookup by either key works.
-            mapping[uri.rsplit("/", 1)[-1]] = mcp_module
+        return {}
     return mapping
 
 
@@ -497,28 +527,39 @@ def _render_index(
     return out
 
 
+def _resolve_output_dir(output: Path) -> Path:
+    """Resolve an `--output` path against the repo root when it's relative.
+
+    `DEFAULT_OUTPUT` is a repo-relative path, so anchoring relative inputs to
+    `_REPO_ROOT` (rather than `Path.cwd()`) means `poe mcp-docs-md` works
+    regardless of where the task is invoked from — a contributor running the
+    task from inside `docs/` still writes to `<repo>/docs/mcp-generated/`.
+    Absolute paths are honoured as-given (the safety guard below still
+    rejects any absolute path that escapes the repo root).
+    """
+    return (output if output.is_absolute() else _REPO_ROOT / output).resolve()
+
+
 def _prepare_output_dir(output: Path) -> None:
     """Reset (or create) an output directory, with a strict safety guard.
 
     The script unconditionally `rmtree`s `output` before regenerating, so we
     need to be careful about what callers can point `--output` at. We require
-    the resolved output path to live **strictly inside** the current working
-    directory (typically the repo root) — this rules out `/`, `$HOME`,
-    `--output ..`, and any absolute path outside the repo, while still
-    letting the default `docs/mcp-generated/` work. The cwd itself is also
-    rejected so we never nuke the whole repo.
+    the resolved output path to live **strictly inside** the repo root — this
+    rules out `/`, `$HOME`, `--output ..`, and any absolute path outside the
+    repo, while still letting the default `docs/mcp-generated/` work. The
+    repo root itself is also rejected so we never nuke the whole repo.
     """
-    resolved = output.resolve()
-    cwd = Path.cwd().resolve()
-    if resolved == cwd or not resolved.is_relative_to(cwd):
+    resolved = _resolve_output_dir(output)
+    if resolved == _REPO_ROOT or not resolved.is_relative_to(_REPO_ROOT):
         raise RuntimeError(
             f"Refusing to rmtree output path {resolved}: must be a dedicated "
-            f"subdirectory strictly inside the current working directory "
-            f"({cwd}). Pass --output pointing at e.g. `docs/mcp-generated`."
+            f"subdirectory strictly inside the repo root ({_REPO_ROOT}). "
+            f"Pass --output pointing at e.g. `docs/mcp-generated`."
         )
-    if output.exists():
-        shutil.rmtree(output)
-    output.mkdir(parents=True, exist_ok=True)
+    if resolved.exists():
+        shutil.rmtree(resolved)
+    resolved.mkdir(parents=True, exist_ok=True)
 
 
 def generate(server_spec: str, output: Path) -> None:

From 41d092cac2f33b607b04524553b91fbc79e1d7e5 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 17 Apr 2026 09:31:38 +0000
Subject: [PATCH 10/10] fix(mcp-docs-md): use resolved output dir for file
 writes

Follow-up to cbb1364. Devin Review caught that `_prepare_output_dir`
was resolving paths against `_REPO_ROOT` for mkdir/rmtree while the
caller in `generate()` still used the raw (cwd-relative) `output` for
`write_text`, so running from a subdirectory would prepare
`<repo>/docs/mcp-generated/` but then try to write to
`<cwd>/docs/mcp-generated/` (which doesn't exist) and raise
`FileNotFoundError`.

`_prepare_output_dir` now returns the resolved absolute path, and
`generate()` routes all subsequent file writes through it, so the two
always agree regardless of where the task is invoked from.
---
 scripts/generate_mcp_markdown.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/scripts/generate_mcp_markdown.py b/scripts/generate_mcp_markdown.py
index e245c2abf..82d675b11 100755
--- a/scripts/generate_mcp_markdown.py
+++ b/scripts/generate_mcp_markdown.py
@@ -540,7 +540,7 @@ def _resolve_output_dir(output: Path) -> Path:
     return (output if output.is_absolute() else _REPO_ROOT / output).resolve()
 
 
-def _prepare_output_dir(output: Path) -> None:
+def _prepare_output_dir(output: Path) -> Path:
     """Reset (or create) an output directory, with a strict safety guard.
 
     The script unconditionally `rmtree`s `output` before regenerating, so we
@@ -549,6 +549,12 @@ def _prepare_output_dir(output: Path) -> None:
     rules out `/`, `$HOME`, `--output ..`, and any absolute path outside the
     repo, while still letting the default `docs/mcp-generated/` work. The
     repo root itself is also rejected so we never nuke the whole repo.
+
+    Returns the *resolved* (absolute, repo-root-anchored) path so callers can
+    use a single canonical location for subsequent file writes — avoids a
+    footgun where preparing a resolved dir but writing via the raw `output`
+    would silently target a different, non-existent path when cwd differs
+    from the repo root.
     """
     resolved = _resolve_output_dir(output)
     if resolved == _REPO_ROOT or not resolved.is_relative_to(_REPO_ROOT):
@@ -560,6 +566,7 @@ def _prepare_output_dir(output: Path) -> None:
     if resolved.exists():
         shutil.rmtree(resolved)
     resolved.mkdir(parents=True, exist_ok=True)
+    return resolved
 
 
 def generate(server_spec: str, output: Path) -> None:
@@ -572,7 +579,10 @@ def generate(server_spec: str, output: Path) -> None:
     fallback_map = _resolve_extra_module_map(server_spec)
     buckets = _bucket_by_module(report, fallback_map)
 
-    _prepare_output_dir(output)
+    # Use the resolved path returned by `_prepare_output_dir` for subsequent
+    # writes: when called from a subdirectory, the raw `output` is
+    # cwd-relative and would target a non-existent directory.
+    resolved_output = _prepare_output_dir(output)
 
     server_name = (report.get("server") or {}).get("name", "mcp-server")
     pages: dict[str, str] = {"index.md": _render_index(report, buckets)}
@@ -580,8 +590,8 @@ def generate(server_spec: str, output: Path) -> None:
         pages[f"{name}.md"] = _render_module_page(bucket, server_name)
 
     for name, content in pages.items():
-        (output / name).write_text(content, encoding="utf-8")
-        print(f"  wrote {output / name}")
+        (resolved_output / name).write_text(content, encoding="utf-8")
+        print(f"  wrote {resolved_output / name}")
 
     print(
         f"Done. {len(buckets)} module(s) documented — "