From c31cce680c64b83cfa4828fe3e6ba76aa87141ab Mon Sep 17 00:00:00 2001 From: Quentin Ambard Date: Thu, 2 Apr 2026 11:33:32 +0200 Subject: [PATCH 1/5] Fix MCP server crash on request cancellation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a client cancels a long-running MCP request, there's a race condition between the cancellation and normal response paths: 1. Client cancels request → RequestResponder.cancel() sends error response and sets _completed = True 2. Middleware catches CancelledError and returns a ToolResult 3. MCP SDK tries to call message.respond(response) 4. Crash: assert not self._completed fails Fix: Re-raise CancelledError instead of returning a result, allowing the MCP SDK's cancellation handler to properly manage the response lifecycle. See: https://github.com/modelcontextprotocol/python-sdk/pull/1153 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../databricks_mcp_server/middleware.py | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/middleware.py b/databricks-mcp-server/databricks_mcp_server/middleware.py index 44ae008a..7ef93fd7 100644 --- a/databricks-mcp-server/databricks_mcp_server/middleware.py +++ b/databricks-mcp-server/databricks_mcp_server/middleware.py @@ -4,7 +4,7 @@ Provides cross-cutting concerns like timeout and error handling for all MCP tool calls. """ -import asyncio +import anyio import json import logging import traceback @@ -74,26 +74,17 @@ async def on_call_tool( ] ) - except asyncio.CancelledError: + except anyio.get_cancelled_exc_class(): + # Re-raise CancelledError so MCP SDK's handler catches it and skips + # calling message.respond(). If we return a result here, the SDK will + # try to respond, but the request may already be marked as responded + # by the cancellation handler, causing an AssertionError crash. + # See: https://github.com/modelcontextprotocol/python-sdk/pull/1153 logger.warning( - "Tool '%s' was cancelled. Returning structured result.", + "Tool '%s' was cancelled. Re-raising to let MCP SDK handle cleanup.", tool_name, ) - return ToolResult( - content=[ - TextContent( - type="text", - text=json.dumps( - { - "error": True, - "error_type": "cancelled", - "tool": tool_name, - "message": "Operation was cancelled by the client", - } - ), - ) - ] - ) + raise except Exception as e: # Log the full traceback for debugging From 53b2b5e04f2562da778cd5cfd962e348827f923a Mon Sep 17 00:00:00 2001 From: Quentin Ambard Date: Thu, 2 Apr 2026 11:52:17 +0200 Subject: [PATCH 2/5] Add structured_content to error responses for MCP SDK validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When tools have an outputSchema (auto-generated from return type like Dict[str, Any]), MCP SDK requires structured_content in all responses. The middleware was returning ToolResult without structured_content for error cases (timeout, exceptions), causing validation errors: "Output validation error: outputSchema defined but no structured output returned" Fix: Include structured_content with the same error data in all error responses. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../databricks_mcp_server/middleware.py | 59 ++++++++----------- 1 file changed, 24 insertions(+), 35 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/middleware.py b/databricks-mcp-server/databricks_mcp_server/middleware.py index 7ef93fd7..fb910dac 100644 --- a/databricks-mcp-server/databricks_mcp_server/middleware.py +++ b/databricks-mcp-server/databricks_mcp_server/middleware.py @@ -53,25 +53,20 @@ async def on_call_tool( "Tool '%s' timed out. Returning structured result.", tool_name, ) + error_data = { + "error": True, + "error_type": "timeout", + "tool": tool_name, + "message": str(e) or "Operation timed out", + "action_required": ( + "Operation may still be in progress. " + "Do NOT retry the same call. " + "Use the appropriate get/status tool to check current state." + ), + } return ToolResult( - content=[ - TextContent( - type="text", - text=json.dumps( - { - "error": True, - "error_type": "timeout", - "tool": tool_name, - "message": str(e) or "Operation timed out", - "action_required": ( - "Operation may still be in progress. " - "Do NOT retry the same call. " - "Use the appropriate get/status tool to check current state." - ), - } - ), - ) - ] + content=[TextContent(type="text", text=json.dumps(error_data))], + structured_content=error_data, ) except anyio.get_cancelled_exc_class(): @@ -95,22 +90,16 @@ async def on_call_tool( traceback.format_exc(), ) - # Return a structured error response - error_message = str(e) - error_type = type(e).__name__ - + # Return a structured error response with both content and structured_content. + # structured_content is required when tools have an outputSchema defined + # (which fastmcp auto-generates from return type annotations like Dict[str, Any]). + error_data = { + "error": True, + "error_type": type(e).__name__, + "tool": tool_name, + "message": str(e), + } return ToolResult( - content=[ - TextContent( - type="text", - text=json.dumps( - { - "error": True, - "error_type": error_type, - "tool": tool_name, - "message": error_message, - } - ), - ) - ] + content=[TextContent(type="text", text=json.dumps(error_data))], + structured_content=error_data, ) From 1f4b1a94ac7aed6e0d1cbf21d86d449632b331cd Mon Sep 17 00:00:00 2001 From: Quentin Ambard Date: Thu, 2 Apr 2026 12:53:21 +0200 Subject: [PATCH 3/5] Fix structured_content not populated for tools with return type annotations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FastMCP auto-generates outputSchema from return type annotations (e.g., -> Dict[str, Any]) but doesn't populate structured_content in ToolResult. MCP SDK validation then fails: "outputSchema defined but no structured output" Fix: Intercept successful results and populate structured_content from JSON text content when missing. Only modifies results when: 1. structured_content is missing 2. There's exactly one TextContent item 3. The text is valid JSON that parses to a dict 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../databricks_mcp_server/middleware.py | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/databricks-mcp-server/databricks_mcp_server/middleware.py b/databricks-mcp-server/databricks_mcp_server/middleware.py index fb910dac..6ba3cc47 100644 --- a/databricks-mcp-server/databricks_mcp_server/middleware.py +++ b/databricks-mcp-server/databricks_mcp_server/middleware.py @@ -44,7 +44,27 @@ async def on_call_tool( arguments = context.message.arguments try: - return await call_next(context) + result = await call_next(context) + + # Fix for FastMCP not populating structured_content automatically. + # When a tool has a return type annotation (e.g., -> Dict[str, Any]), + # FastMCP generates an outputSchema but doesn't set structured_content. + # MCP SDK then fails validation: "outputSchema defined but no structured output" + # We fix this by parsing the JSON text content and setting structured_content. + if result and not result.structured_content and result.content: + if len(result.content) == 1 and isinstance(result.content[0], TextContent): + try: + parsed = json.loads(result.content[0].text) + if isinstance(parsed, dict): + # Create new ToolResult with structured_content populated + result = ToolResult( + content=result.content, + structured_content=parsed, + ) + except (json.JSONDecodeError, TypeError): + pass # Not valid JSON, leave as-is + + return result except TimeoutError as e: # In Python 3.11+, asyncio.TimeoutError is an alias for TimeoutError, From 76006ebccfdcdb636f9b0e3e56e6d6a77d2437dc Mon Sep 17 00:00:00 2001 From: Quentin Ambard Date: Thu, 2 Apr 2026 13:18:24 +0200 Subject: [PATCH 4/5] fix(mcp): apply async wrapper on all platforms to prevent cancellation crashes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The asyncio.to_thread() wrapper was only applied on Windows, but it's needed on ALL platforms to enable proper cancellation handling. Without this fix, when a sync tool runs longer than the client timeout: 1. Client sends cancellation 2. Sync tool blocks event loop, can't receive CancelledError 3. Tool eventually returns, but MCP SDK already responded to cancel 4. AssertionError: "Request already responded to" → server crashes This was discovered when uploading 7,375 files triggered a timeout, crashing the MCP server on macOS. Extends the fix from PR #411 which added CancelledError handling in middleware - that fix only works when cancellation can propagate, which requires async execution via to_thread(). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../databricks_mcp_server/server.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/server.py b/databricks-mcp-server/databricks_mcp_server/server.py index d1810f2a..4ee150aa 100644 --- a/databricks-mcp-server/databricks_mcp_server/server.py +++ b/databricks-mcp-server/databricks_mcp_server/server.py @@ -72,15 +72,23 @@ async def async_wrapper(**kwargs): _patch_subprocess_stdin() -def _patch_tool_decorator_for_windows(): - """Wrap sync tool functions in asyncio.to_thread() on Windows. +def _patch_tool_decorator_for_async(): + """Wrap sync tool functions in asyncio.to_thread() on all platforms. FastMCP's FunctionTool.run() calls sync functions directly on the asyncio - event loop thread, which blocks the stdio transport's I/O tasks. On Windows - with ProactorEventLoop this causes a deadlock where all MCP tools hang. + event loop thread, which blocks the stdio transport's I/O tasks. This causes: + + 1. On Windows with ProactorEventLoop: deadlock where all MCP tools hang. + + 2. On ALL platforms: cancellation race conditions. When the MCP client + cancels a request (e.g., timeout), the event loop can't propagate the + CancelledError to blocking sync code. The sync function eventually + returns, but the MCP SDK has already responded to the cancellation, + causing "Request already responded to" assertion errors and crashes. This patch intercepts @mcp.tool registration to wrap sync functions so they - run in a thread pool, yielding control back to the event loop for I/O. + run in a thread pool, yielding control back to the event loop for I/O and + enabling proper cancellation handling via anyio's task cancellation. """ original_tool = mcp.tool @@ -132,11 +140,14 @@ async def _noop_lifespan(*args, **kwargs): # Register middleware (see middleware.py for details on each) mcp.add_middleware(TimeoutHandlingMiddleware()) -# Apply async wrapper on Windows to prevent event loop deadlocks. +# Apply async wrapper on ALL platforms to: +# 1. Prevent event loop deadlocks (critical on Windows) +# 2. Enable proper cancellation handling (critical on all platforms) +# Without this, sync tools block the event loop, preventing CancelledError +# propagation and causing "Request already responded to" crashes. # TODO: FastMCP 3.x automatically wraps sync functions in asyncio.to_thread(). -# Test if this Windows-specific patch is still needed with FastMCP 3.x. -if sys.platform == "win32": - _patch_tool_decorator_for_windows() +# Test if this patch is still needed with FastMCP 3.x. +_patch_tool_decorator_for_async() # Import and register all tools (side-effect imports: each module registers @mcp.tool decorators) from .tools import ( # noqa: F401, E402 From c300deddfb4adf348d9d766b9baf080e3c93a996 Mon Sep 17 00:00:00 2001 From: Quentin Ambard Date: Thu, 2 Apr 2026 13:51:24 +0200 Subject: [PATCH 5/5] Fix: don't set structured_content on error responses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting structured_content causes MCP SDK to validate it against the tool's outputSchema. For error responses, the error dict {"error": True, ...} doesn't match the expected return type (e.g., Union[str, List[Dict]]), causing "Output validation error: 'result' is a required property". Fix: Only set structured_content for successful responses, not errors. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../databricks_mcp_server/middleware.py | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/databricks-mcp-server/databricks_mcp_server/middleware.py b/databricks-mcp-server/databricks_mcp_server/middleware.py index 6ba3cc47..71514694 100644 --- a/databricks-mcp-server/databricks_mcp_server/middleware.py +++ b/databricks-mcp-server/databricks_mcp_server/middleware.py @@ -73,20 +73,20 @@ async def on_call_tool( "Tool '%s' timed out. Returning structured result.", tool_name, ) - error_data = { - "error": True, - "error_type": "timeout", - "tool": tool_name, - "message": str(e) or "Operation timed out", - "action_required": ( - "Operation may still be in progress. " - "Do NOT retry the same call. " - "Use the appropriate get/status tool to check current state." - ), - } + # Don't set structured_content for errors - it would be validated against + # the tool's outputSchema and fail (error dict doesn't match expected type) return ToolResult( - content=[TextContent(type="text", text=json.dumps(error_data))], - structured_content=error_data, + content=[TextContent(type="text", text=json.dumps({ + "error": True, + "error_type": "timeout", + "tool": tool_name, + "message": str(e) or "Operation timed out", + "action_required": ( + "Operation may still be in progress. " + "Do NOT retry the same call. " + "Use the appropriate get/status tool to check current state." + ), + }))] ) except anyio.get_cancelled_exc_class(): @@ -110,16 +110,14 @@ async def on_call_tool( traceback.format_exc(), ) - # Return a structured error response with both content and structured_content. - # structured_content is required when tools have an outputSchema defined - # (which fastmcp auto-generates from return type annotations like Dict[str, Any]). - error_data = { - "error": True, - "error_type": type(e).__name__, - "tool": tool_name, - "message": str(e), - } + # Return error as text content only - don't set structured_content. + # Setting structured_content would cause MCP SDK to validate it against + # the tool's outputSchema, which fails (error dict doesn't match expected type). return ToolResult( - content=[TextContent(type="text", text=json.dumps(error_data))], - structured_content=error_data, + content=[TextContent(type="text", text=json.dumps({ + "error": True, + "error_type": type(e).__name__, + "tool": tool_name, + "message": str(e), + }))] )