diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..5abc2df --- /dev/null +++ b/.gitattributes @@ -0,0 +1,3 @@ +# Ensure diff test data uses LF line endings on all platforms +*.diff eol=lf +tests/test_data/** eol=lf diff --git a/docs/design.md b/docs/design.md index 1dd73fc..b58072c 100644 --- a/docs/design.md +++ b/docs/design.md @@ -10,6 +10,8 @@ MCP server that chunks large diff files for efficient LLM navigation. Uses file- Diff File → Canonicalize Path → Hash Content → Cache Check → Parse → Filter → Chunk → Index → Tools ``` +The server uses FastMCP with module-level `@mcp.tool()` decorated sync functions (not a class). Resources use `@mcp.resource()`. Error handling is automatic: `ValueError` raised in any tool function is caught by FastMCP and returned as `CallToolResult(isError=True)`. All tools have `annotations={"readOnlyHint": True}` and `structured_output=False` (to prevent `outputSchema` generation). + ## Tools ### load_diff (Optional) @@ -156,14 +158,15 @@ class DiffStats: - File existence validation - Diff format verification - Graceful handling of malformed sections -- Clear error messages for invalid patterns +- Actionable error messages that guide LLMs to self-correct +- `ValueError` automatically surfaces as `isError=True` through FastMCP ## Project Structure ``` src/ ├── main.py # CLI entry point -├── server.py # MCP server (DiffChunkServer) +├── server.py # MCP server (FastMCP module-level tools) ├── tools.py # MCP tools (DiffChunkTools) ├── models.py # Data models ├── parser.py # Diff parsing (DiffParser) @@ -172,7 +175,12 @@ src/ ## Resources -- `diffchunk://current` - Overview of loaded diffs via MCP resource protocol +- `diffchunk://current` - Overview of loaded diffs via `@mcp.resource("diffchunk://current")` decorator + +### File Matching + +- Pattern matching (glob) is case-insensitive, matching macOS/Windows filesystem behavior +- Both `find_chunks_for_files` and `get_file_diff` use case-insensitive comparison ## Performance diff --git a/pyproject.toml b/pyproject.toml index 959c7ee..81ccadd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "diffchunk" -version = "0.1.8" +version = "0.1.9" description = "MCP server for navigating large diff files with intelligent chunking" readme = "README.md" requires-python = ">=3.10" @@ -25,7 +25,7 @@ classifiers = [ ] dependencies = [ "click>=8.2.1", - "mcp>=1.10.0", + "mcp>=1.7.0,<2.0.0", "chardet>=4.0.0", ] diff --git a/scripts/pre-push b/scripts/pre-push index c2f769a..b981200 100755 --- a/scripts/pre-push +++ b/scripts/pre-push @@ -1,41 +1,17 @@ -#!/bin/bash +#!/bin/sh +# Pre-push hook - runs the same checks as CI to catch issues early -# Comprehensive code quality check script -# Can be used manually or copied to .git/hooks/pre-push +set -e -set -e # Exit on any error +echo "Running pre-push checks..." -echo "🚀 Running pre-push checks..." -echo "================================" +echo "Checking ruff lint..." +uv run ruff check src/ -# Function to run a check and report results -run_check() { - local name="$1" - local command="$2" - - echo "🔍 Running $name..." - if eval "$command"; then - echo "✅ $name passed!" - else - echo "❌ $name failed!" - echo "Please fix the issues before pushing." - exit 1 - fi - echo "" -} +echo "Checking code formatting..." +uv run ruff format --check src/ -# 1. Code formatting check -run_check "Ruff format check" "uv run ruff format --check" +echo "Running tests..." +uv run pytest tests/ -x -q -# 2. Linting check -run_check "Ruff linting" "uv run ruff check" - -# 3. Type checking -run_check "MyPy type checking" "uv run mypy src/" - -# 4. Test suite -run_check "Test suite" "uv run pytest -x -q" - -echo "🎉 All pre-push checks passed!" -echo "✅ Code is ready to push" -echo "================================" \ No newline at end of file +echo "All checks passed." diff --git a/src/__init__.py b/src/__init__.py index a5de9a2..17f5314 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,3 +1,3 @@ """diffchunk - MCP server for navigating large diff files.""" -__version__ = "0.1.0" +__version__ = "0.1.9" diff --git a/src/chunker.py b/src/chunker.py index 94a5ec6..fb58f10 100644 --- a/src/chunker.py +++ b/src/chunker.py @@ -20,8 +20,13 @@ def chunk_diff( skip_generated: bool = True, include_patterns: List[str] | None = None, exclude_patterns: List[str] | None = None, + max_chunk_lines: int | None = None, ) -> None: """Chunk a diff file into the session.""" + if max_chunk_lines is None: + max_chunk_lines = self.max_chunk_lines + elif not isinstance(max_chunk_lines, int) or max_chunk_lines <= 0: + raise ValueError("max_chunk_lines must be a positive integer") chunk_number = 1 current_chunk_lines = 0 current_chunk_content: List[str] = [] @@ -55,7 +60,7 @@ def chunk_diff( file_name = files[-1] # Check if this file needs to be split - if content_lines > self.max_chunk_lines: + if content_lines > max_chunk_lines: # Save current chunk if it has content if current_chunk_content: self._save_chunk( @@ -73,7 +78,9 @@ def chunk_diff( current_chunk_file_line_counts = {} # Split the large file - file_chunks = self._split_large_file(files, content, content_lines) + file_chunks = self._split_large_file( + files, content, content_lines, max_chunk_lines + ) parent_file = files[0] if len(files) == 1 else f"{len(files)} files" for sub_index, (sub_files, sub_content, sub_lines) in enumerate( @@ -95,7 +102,7 @@ def chunk_diff( # Check if we need to start a new chunk if ( current_chunk_content - and current_chunk_lines + content_lines > self.max_chunk_lines + and current_chunk_lines + content_lines > max_chunk_lines ): # Save current chunk self._save_chunk( @@ -172,10 +179,10 @@ def _save_chunk( session.add_chunk(chunk) def _split_large_file( - self, files: List[str], content: str, file_line_count: int + self, files: List[str], content: str, file_line_count: int, max_chunk_lines: int ) -> List[Tuple[List[str], str, int]]: """Split a large file's diff content at hunk boundaries.""" - if file_line_count <= self.max_chunk_lines: + if file_line_count <= max_chunk_lines: return [(files, content, file_line_count)] # Pattern to match hunk headers like @@ -1,4 +1,6 @@ @@ -192,7 +199,7 @@ def _split_large_file( # Be very aggressive about staying under the limit target_chunk_size = max( - self.max_chunk_lines * 0.8, 200 + max_chunk_lines * 0.8, 200 ) # 80% of limit or 200 lines minimum for i, line in enumerate(lines): @@ -236,10 +243,7 @@ def _split_large_file( current_chunk_line_count += 1 # STRICT enforcement: split immediately if we exceed limit - if ( - current_chunk_line_count + len(file_header_lines) - >= self.max_chunk_lines - ): + if current_chunk_line_count + len(file_header_lines) >= max_chunk_lines: # Find the last hunk header in current chunk to split there last_hunk_idx = None for j in range(len(current_chunk_lines) - 1, -1, -1): diff --git a/src/main.py b/src/main.py index 802ceb3..6322fd4 100644 --- a/src/main.py +++ b/src/main.py @@ -4,7 +4,8 @@ import asyncio import sys from importlib.metadata import version -from .server import DiffChunkServer + +from .server import mcp def main(): @@ -35,12 +36,17 @@ def main(): try: print( - f"Starting diffchunk MCP server v{version('diffchunk')}...", file=sys.stderr + f"Starting diffchunk MCP server v{version('diffchunk')}...", + file=sys.stderr, + flush=True, + ) + print( + "Server ready - waiting for MCP client connection", + file=sys.stderr, + flush=True, ) - print("Server ready - waiting for MCP client connection", file=sys.stderr) - server = DiffChunkServer() - asyncio.run(server.run()) - except KeyboardInterrupt: + mcp.run() + except (KeyboardInterrupt, asyncio.CancelledError): print("Server shutdown requested", file=sys.stderr) sys.exit(0) except Exception as e: diff --git a/src/models.py b/src/models.py index d0e26a9..bf0b6de 100644 --- a/src/models.py +++ b/src/models.py @@ -103,7 +103,7 @@ def find_chunks_for_files(self, pattern: str) -> List[int]: matching_chunks = set() for file_path, chunk_numbers in self.file_to_chunks.items(): - if fnmatch.fnmatch(file_path, pattern): + if fnmatch.fnmatch(file_path.lower(), pattern.lower()): matching_chunks.update(chunk_numbers) return sorted(matching_chunks) diff --git a/src/parser.py b/src/parser.py index cb0fa4d..9f915b7 100644 --- a/src/parser.py +++ b/src/parser.py @@ -1,9 +1,12 @@ """Diff file parsing functionality.""" import fnmatch +import logging import re from typing import List, Tuple, Iterator +logger = logging.getLogger("diffchunk") + class DiffParser: """Parser for unified diff files.""" @@ -135,6 +138,12 @@ def _read_diff_file(self, file_path: str) -> List[str]: encoding = ( result.get("encoding") if result.get("confidence", 0) > 0.7 else "utf-8" ) + logger.debug( + "Detected encoding %s (confidence %.1f) for %s", + encoding, + result.get("confidence", 0), + file_path, + ) try: with open(file_path, "r", encoding=encoding) as f: diff --git a/src/server.py b/src/server.py index e9ea1aa..617d630 100644 --- a/src/server.py +++ b/src/server.py @@ -1,216 +1,154 @@ """MCP server implementation for diffchunk.""" -from mcp.server import InitializationOptions, NotificationOptions -from mcp.server import Server -from mcp.types import Resource, Tool, TextContent import json -from typing import Any, Sequence +import logging +import os +import sys +from typing import Annotated, Optional -from .tools import DiffChunkTools +from mcp.server.fastmcp import FastMCP +from mcp.types import ToolAnnotations +from pydantic import Field +from .tools import DiffChunkTools -class DiffChunkServer: - """MCP server for diffchunk functionality.""" - - def __init__(self): - self.app = Server("diffchunk") - self.tools = DiffChunkTools() - self._setup_handlers() - - def _setup_handlers(self): - """Set up MCP handlers.""" - - @self.app.list_resources() - async def handle_list_resources() -> list[Resource]: - """List available resources.""" - return [ - Resource( - uri="diffchunk://current", # type: ignore - name="Current Diff Overview", - description="Overview of all currently loaded diff files", - mimeType="application/json", - ) - ] - - @self.app.read_resource() - async def handle_read_resource(uri: str) -> str: - """Read a resource.""" - if uri == "diffchunk://current": - overview = self.tools.get_current_overview() - return json.dumps(overview, indent=2) - else: - raise ValueError(f"Unknown resource: {uri}") - - @self.app.list_tools() - async def handle_list_tools() -> list[Tool]: - """List available tools.""" - return [ - Tool( - name="load_diff", - description="Parse and load a diff file with custom chunking settings. Use this tool ONLY when you need non-default settings (custom chunk sizes, filtering patterns). Otherwise, use list_chunks, get_chunk, or find_chunks_for_files which auto-load with optimal defaults. CRITICAL: You must use an absolute directory path - relative paths will fail. The diff file will be too large for direct reading, so you MUST use diffchunk tools for navigation. When using tracking documents for analysis, remember to clean up tracking state before presenting final results.", - inputSchema={ - "type": "object", - "properties": { - "absolute_file_path": { - "type": "string", - "description": "Absolute path to the diff file to load", - }, - "max_chunk_lines": { - "type": "integer", - "description": "Maximum lines per chunk", - "default": 4000, - }, - "skip_trivial": { - "type": "boolean", - "description": "Skip whitespace-only changes", - "default": True, - }, - "skip_generated": { - "type": "boolean", - "description": "Skip generated files and build artifacts", - "default": True, - }, - "include_patterns": { - "type": "string", - "description": "Comma-separated glob patterns for files to include", - }, - "exclude_patterns": { - "type": "string", - "description": "Comma-separated glob patterns for files to exclude", - }, - }, - "required": ["absolute_file_path"], - }, - ), - Tool( - name="list_chunks", - description="Get an overview of all chunks in a diff file with file mappings and summaries. Auto-loads the diff file with optimal defaults if not already loaded. Use this as your first step to understand the scope and structure of changes before diving into specific chunks. CRITICAL: You must use an absolute directory path - relative paths will fail. DO NOT attempt to read the diff file directly as it will exceed context limits. This tool provides the roadmap for systematic chunk-by-chunk analysis. If using tracking documents to resume analysis, use this to orient yourself to remaining work.", - inputSchema={ - "type": "object", - "properties": { - "absolute_file_path": { - "type": "string", - "description": "Absolute path to the diff file", - }, - }, - "required": ["absolute_file_path"], - }, - ), - Tool( - name="get_chunk", - description="Retrieve the actual content of a specific numbered chunk from a diff file. Auto-loads the diff file if not already loaded. Use this for systematic analysis of changes chunk-by-chunk, or to examine specific chunks identified via list_chunks or find_chunks_for_files. CRITICAL: You must use an absolute directory path - relative paths will fail. DO NOT read diff files directly - they exceed LLM context windows. This tool provides manageable portions of large diffs. Track your progress through chunks when doing comprehensive analysis and clean up tracking documents before final results.", - inputSchema={ - "type": "object", - "properties": { - "absolute_file_path": { - "type": "string", - "description": "Absolute path to the diff file", - }, - "chunk_number": { - "type": "integer", - "description": "The chunk number to retrieve (1-indexed)", - }, - "include_context": { - "type": "boolean", - "description": "Include chunk header with metadata", - "default": True, - }, - }, - "required": ["absolute_file_path", "chunk_number"], - }, - ), - Tool( - name="find_chunks_for_files", - description="Locate chunks containing files that match a specific glob pattern. Auto-loads the diff file if not already loaded. Essential for targeted analysis when you need to focus on specific file types, directories, or naming patterns (e.g., '*.py' for Python files, '*test*' for test files, 'src/*' for source directory). Returns chunk numbers which you then examine using get_chunk. CRITICAL: You must use an absolute directory path - relative paths will fail. DO NOT attempt direct file reading. Use this for efficient navigation to relevant changes instead of processing entire large diffs sequentially.", - inputSchema={ - "type": "object", - "properties": { - "absolute_file_path": { - "type": "string", - "description": "Absolute path to the diff file", - }, - "pattern": { - "type": "string", - "description": "Glob pattern to match file paths (e.g., '*.py', '*test*', 'src/*')", - }, - }, - "required": ["absolute_file_path", "pattern"], - }, - ), - Tool( - name="get_file_diff", - description="Extract the complete diff for a single file from a loaded diff. Returns the diff --git header and all hunks for that file. Use this when you need changes for one specific file without fetching the entire chunk. Auto-loads the diff file if not already loaded. Supports exact file paths or glob patterns that match exactly one file. Use list_chunks with file_details to see per-file line counts and decide whether to use this tool or get_chunk. CRITICAL: You must use an absolute directory path - relative paths will fail.", - inputSchema={ - "type": "object", - "properties": { - "absolute_file_path": { - "type": "string", - "description": "Absolute path to the diff file", - }, - "file_path": { - "type": "string", - "description": "Exact file path or glob pattern matching a single file within the diff (e.g., 'src/main.py', '*.config')", - }, - }, - "required": ["absolute_file_path", "file_path"], - }, - ), - ] - - @self.app.call_tool() - async def handle_call_tool( - name: str, arguments: dict[str, Any] | None - ) -> Sequence[TextContent]: - """Handle tool calls.""" - if arguments is None: - arguments = {} - - try: - if name == "load_diff": - result = self.tools.load_diff(**arguments) - return [TextContent(type="text", text=json.dumps(result, indent=2))] - - elif name == "list_chunks": - result = self.tools.list_chunks(**arguments) - return [TextContent(type="text", text=json.dumps(result, indent=2))] - - elif name == "get_chunk": - result = self.tools.get_chunk(**arguments) - return [TextContent(type="text", text=result)] - - elif name == "find_chunks_for_files": - result = self.tools.find_chunks_for_files(**arguments) - return [TextContent(type="text", text=json.dumps(result))] - - elif name == "get_file_diff": - result = self.tools.get_file_diff(**arguments) - return [TextContent(type="text", text=result)] - - else: - raise ValueError(f"Unknown tool: {name}") - - except ValueError as e: - error_msg = f"Error in {name}: {str(e)}" - return [TextContent(type="text", text=error_msg)] - except Exception as e: - error_msg = f"Unexpected error in {name}: {str(e)}" - return [TextContent(type="text", text=error_msg)] - - async def run(self): - """Run the MCP server.""" - # Import here to avoid issues with event loop - from mcp.server.stdio import stdio_server - - async with stdio_server() as (read_stream, write_stream): - await self.app.run( - read_stream, - write_stream, - InitializationOptions( - server_name="diffchunk", - server_version="0.1.8", - capabilities=self.app.get_capabilities( - notification_options=NotificationOptions(), - experimental_capabilities={}, - ), - ), - ) +logging.basicConfig( + stream=sys.stderr, + level=logging.INFO, + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", +) +logger = logging.getLogger("diffchunk") + +mcp = FastMCP("diffchunk") +tools = DiffChunkTools() + + +@mcp.resource("diffchunk://current") +def current_overview() -> str: + """Overview of all currently loaded diff files.""" + return json.dumps(tools.get_current_overview(), indent=2) + + +@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True), structured_output=False) +def load_diff( + absolute_file_path: Annotated[ + str, Field(description="Absolute path to the diff file to load") + ], + max_chunk_lines: Annotated[ + int, Field(description="Maximum lines per chunk") + ] = 1000, + skip_trivial: Annotated[ + bool, Field(description="Skip whitespace-only changes") + ] = True, + skip_generated: Annotated[ + bool, Field(description="Skip generated files and build artifacts") + ] = True, + include_patterns: Annotated[ + Optional[str], + Field(description="Comma-separated glob patterns for files to include"), + ] = None, + exclude_patterns: Annotated[ + Optional[str], + Field(description="Comma-separated glob patterns for files to exclude"), + ] = None, +) -> str: + """Parse and load a diff file with custom chunking settings. Use this tool ONLY when you need non-default settings (custom chunk sizes, filtering patterns). Otherwise, use list_chunks, get_chunk, or find_chunks_for_files which auto-load with optimal defaults. CRITICAL: You must use an absolute directory path - relative paths will fail. The diff file will be too large for direct reading, so you MUST use diffchunk tools for navigation. When using tracking documents for analysis, remember to clean up tracking state before presenting final results.""" + logger.info("load_diff: %s", os.path.basename(absolute_file_path)) + logger.debug("load_diff full path: %s", absolute_file_path) + result = tools.load_diff( + absolute_file_path=absolute_file_path, + max_chunk_lines=max_chunk_lines, + skip_trivial=skip_trivial, + skip_generated=skip_generated, + include_patterns=include_patterns, + exclude_patterns=exclude_patterns, + ) + return json.dumps(result, indent=2) + + +@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True), structured_output=False) +def list_chunks( + absolute_file_path: Annotated[ + str, Field(description="Absolute path to the diff file") + ], +) -> str: + """Get an overview of all chunks in a diff file with file mappings and summaries. Auto-loads the diff file with optimal defaults if not already loaded. Use this as your first step to understand the scope and structure of changes before diving into specific chunks. CRITICAL: You must use an absolute directory path - relative paths will fail. DO NOT attempt to read the diff file directly as it will exceed context limits. This tool provides the roadmap for systematic chunk-by-chunk analysis. If using tracking documents to resume analysis, use this to orient yourself to remaining work.""" + logger.info("list_chunks: %s", os.path.basename(absolute_file_path)) + logger.debug("list_chunks full path: %s", absolute_file_path) + result = tools.list_chunks(absolute_file_path=absolute_file_path) + return json.dumps(result, indent=2) + + +@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True), structured_output=False) +def get_chunk( + absolute_file_path: Annotated[ + str, Field(description="Absolute path to the diff file") + ], + chunk_number: Annotated[ + int, Field(description="The chunk number to retrieve (1-indexed)") + ], + include_context: Annotated[ + bool, Field(description="Include chunk header with metadata") + ] = True, +) -> str: + """Retrieve the actual content of a specific numbered chunk from a diff file. Auto-loads the diff file if not already loaded. Use this for systematic analysis of changes chunk-by-chunk, or to examine specific chunks identified via list_chunks or find_chunks_for_files. CRITICAL: You must use an absolute directory path - relative paths will fail. DO NOT read diff files directly - they exceed LLM context windows. This tool provides manageable portions of large diffs. Track your progress through chunks when doing comprehensive analysis and clean up tracking documents before final results.""" + logger.info( + "get_chunk: %s chunk=%d", os.path.basename(absolute_file_path), chunk_number + ) + logger.debug("get_chunk full path: %s", absolute_file_path) + result = tools.get_chunk( + absolute_file_path=absolute_file_path, + chunk_number=chunk_number, + include_context=include_context, + ) + return result + + +@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True), structured_output=False) +def find_chunks_for_files( + absolute_file_path: Annotated[ + str, Field(description="Absolute path to the diff file") + ], + pattern: Annotated[ + str, + Field( + description="Glob pattern to match file paths (e.g., '*.py', '*test*', 'src/*')" + ), + ], +) -> str: + """Locate chunks containing files that match a specific glob pattern. Auto-loads the diff file if not already loaded. Essential for targeted analysis when you need to focus on specific file types, directories, or naming patterns (e.g., '*.py' for Python files, '*test*' for test files, 'src/*' for source directory). Returns chunk numbers which you then examine using get_chunk. CRITICAL: You must use an absolute directory path - relative paths will fail. DO NOT attempt direct file reading. Use this for efficient navigation to relevant changes instead of processing entire large diffs sequentially.""" + logger.info( + "find_chunks_for_files: %s pattern=%s", + os.path.basename(absolute_file_path), + pattern, + ) + logger.debug("find_chunks_for_files full path: %s", absolute_file_path) + result = tools.find_chunks_for_files( + absolute_file_path=absolute_file_path, + pattern=pattern, + ) + return json.dumps(result) + + +@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True), structured_output=False) +def get_file_diff( + absolute_file_path: Annotated[ + str, Field(description="Absolute path to the diff file") + ], + file_path: Annotated[ + str, + Field( + description="Exact file path or glob pattern matching a single file within the diff (e.g., 'src/main.py', '*.config')" + ), + ], +) -> str: + """Extract the complete diff for a single file from a loaded diff. Returns the diff --git header and all hunks for that file. Use this when you need changes for one specific file without fetching the entire chunk. Auto-loads the diff file if not already loaded. Supports exact file paths or glob patterns that match exactly one file. Use list_chunks with file_details to see per-file line counts and decide whether to use this tool or get_chunk. CRITICAL: You must use an absolute directory path - relative paths will fail.""" + logger.info( + "get_file_diff: %s file=%s", os.path.basename(absolute_file_path), file_path + ) + logger.debug("get_file_diff full path: %s", absolute_file_path) + result = tools.get_file_diff( + absolute_file_path=absolute_file_path, + file_path=file_path, + ) + return result diff --git a/src/tools.py b/src/tools.py index c05e742..d94652e 100644 --- a/src/tools.py +++ b/src/tools.py @@ -1,12 +1,16 @@ """MCP tools implementation for diffchunk.""" import fnmatch -import os import hashlib +import logging +import os +import time from typing import Dict, Any, List, Optional from .models import DiffSession from .chunker import DiffChunker +logger = logging.getLogger("diffchunk") + class DiffChunkTools: """MCP tools for diff chunk navigation.""" @@ -71,7 +75,9 @@ def _load_diff_internal( raise ValueError(f"Path is not a file: {resolved_file_path}") if not os.path.exists(resolved_file_path): - raise ValueError(f"Diff file not found: {absolute_file_path}") + raise ValueError( + f"Diff file not found: {absolute_file_path}. Verify the file exists and the path is absolute." + ) if not os.access(resolved_file_path, os.R_OK): raise ValueError(f"Cannot read file: {resolved_file_path}") @@ -89,20 +95,26 @@ def _load_diff_internal( # Create new session session = DiffSession(resolved_file_path) - # Configure chunker - self.chunker.max_chunk_lines = max_chunk_lines - - try: - # Chunk the diff - self.chunker.chunk_diff( - session, - skip_trivial=skip_trivial, - skip_generated=skip_generated, - include_patterns=include_list, - exclude_patterns=exclude_list, - ) - except ValueError as e: - raise e + # Chunk the diff + start = time.monotonic() + self.chunker.chunk_diff( + session, + max_chunk_lines=max_chunk_lines, + skip_trivial=skip_trivial, + skip_generated=skip_generated, + include_patterns=include_list, + exclude_patterns=exclude_list, + ) + elapsed = time.monotonic() - start + logger.info( + "Loading diff: %s (max_chunk_lines=%d) - %d chunks, %d files in %.2fs", + os.path.basename(resolved_file_path), + max_chunk_lines, + session.stats.chunks_count, + session.stats.total_files, + elapsed, + ) + logger.debug("Full diff path: %s", resolved_file_path) # Store session file_key = self._get_file_key(absolute_file_path) @@ -171,7 +183,7 @@ def get_chunk( if not chunk: total_chunks = len(session.chunks) raise ValueError( - f"Chunk {chunk_number} not found. Available chunks: 1-{total_chunks}" + f"Chunk {chunk_number} does not exist. The diff has {total_chunks} chunks (1-{total_chunks}). Use list_chunks to see what each chunk contains." ) if include_context: @@ -189,7 +201,9 @@ def find_chunks_for_files(self, absolute_file_path: str, pattern: str) -> List[i session = self.sessions[file_key] if not isinstance(pattern, str) or not pattern.strip(): - raise ValueError("Pattern must be a non-empty string") + raise ValueError( + "Pattern must be a non-empty string. Use '*' to match all files, '*.py' for Python files, or 'src/*' for a directory." + ) matching_chunks = session.find_chunks_for_files(pattern.strip()) @@ -206,24 +220,28 @@ def get_file_diff(self, absolute_file_path: str, file_path: str) -> str: file_path = file_path.strip() all_files = list(session.file_to_chunks.keys()) - # Try exact match first, then glob matching + # Try exact match first, then case-insensitive, then glob matches = [f for f in all_files if f == file_path] if not matches: - matches = [f for f in all_files if fnmatch.fnmatch(f, file_path)] + matches = [f for f in all_files if f.lower() == file_path.lower()] + if not matches: + matches = [ + f for f in all_files if fnmatch.fnmatch(f.lower(), file_path.lower()) + ] if not matches: available = ", ".join(sorted(all_files)[:20]) suffix = "..." if len(all_files) > 20 else "" raise ValueError( f"No file matching '{file_path}' found in diff. " - f"Available files: {available}{suffix}" + f"Available files: {available}{suffix}. Use find_chunks_for_files to search by pattern." ) if len(matches) > 1: matched_list = ", ".join(sorted(matches)) raise ValueError( - f"Pattern '{file_path}' matches multiple files: {matched_list}. " - f"Use a more specific path." + f"Pattern '{file_path}' matches {len(matches)} files: {matched_list}. " + f"Use a more specific path or exact filename." ) target_file = matches[0] diff --git a/tests/test_case_insensitive.py b/tests/test_case_insensitive.py new file mode 100644 index 0000000..3abec3a --- /dev/null +++ b/tests/test_case_insensitive.py @@ -0,0 +1,95 @@ +"""Tests for case-insensitive file matching.""" + +from pathlib import Path + +import pytest + +from src.tools import DiffChunkTools + + +@pytest.fixture +def tools(): + return DiffChunkTools() + + +@pytest.fixture +def react_diff_file(): + diff_file = Path(__file__).parent / "test_data" / "react_18.0_to_18.3.diff" + if not diff_file.exists(): + pytest.skip("React test diff not found") + return str(diff_file) + + +@pytest.fixture +def go_diff_file(): + diff_file = ( + Path(__file__).parent / "test_data" / "go_version_upgrade_1.22_to_1.23.diff" + ) + if not diff_file.exists(): + pytest.skip("Go test diff not found") + return str(diff_file) + + +class TestCaseInsensitiveMatching: + """Test case-insensitive file pattern matching.""" + + def test_find_chunks_uppercase_extension(self, tools, react_diff_file): + """find_chunks_for_files with '*.JS' matches .js files.""" + tools.load_diff(react_diff_file) + lower_chunks = tools.find_chunks_for_files(react_diff_file, "*.js") + upper_chunks = tools.find_chunks_for_files(react_diff_file, "*.JS") + assert lower_chunks == upper_chunks + assert len(lower_chunks) > 0 + + def test_find_chunks_mixed_case_pattern(self, tools, go_diff_file): + """find_chunks_for_files with mixed case pattern matches files.""" + tools.load_diff(go_diff_file) + lower_chunks = tools.find_chunks_for_files(go_diff_file, "*.go") + mixed_chunks = tools.find_chunks_for_files(go_diff_file, "*.Go") + assert lower_chunks == mixed_chunks + + def test_find_chunks_uppercase_directory(self, tools, react_diff_file): + """find_chunks_for_files with uppercase dir pattern matches.""" + tools.load_diff(react_diff_file) + all_chunks = tools.find_chunks_for_files(react_diff_file, "*") + assert len(all_chunks) > 0 + # Uppercase wildcard should match the same set + upper_all = tools.find_chunks_for_files(react_diff_file, "*") + assert all_chunks == upper_all + + def test_find_chunks_exact_case_still_works(self, tools, react_diff_file): + """Exact case pattern still matches correctly.""" + tools.load_diff(react_diff_file) + chunks = tools.find_chunks_for_files(react_diff_file, "*.js") + assert len(chunks) > 0 + + def test_get_file_diff_case_insensitive_exact(self, tools, react_diff_file): + """get_file_diff matches with different case in file path.""" + tools.load_diff(react_diff_file) + + # Get a file name from the diff + chunks = tools.list_chunks(react_diff_file) + file_name = chunks[0]["files"][0] + + # Exact case should work + result_exact = tools.get_file_diff(react_diff_file, file_name) + assert "diff --git" in result_exact + + # Uppercase version should also work + result_upper = tools.get_file_diff(react_diff_file, file_name.upper()) + assert result_upper == result_exact + + def test_get_file_diff_case_insensitive_glob(self, tools, react_diff_file): + """get_file_diff glob matching is case-insensitive.""" + tools.load_diff(react_diff_file) + + chunks = tools.list_chunks(react_diff_file) + # Find a file that has a unique extension to use as glob pattern + file_name = chunks[0]["files"][0] + + # If the file has an extension, try matching with different case + if "." in file_name: + base, ext = file_name.rsplit(".", 1) + upper_pattern = f"{base.upper()}.{ext.upper()}" + result = tools.get_file_diff(react_diff_file, upper_pattern) + assert "diff --git" in result diff --git a/tests/test_cli_integration.py b/tests/test_cli_integration.py index 9c934bb..98da753 100644 --- a/tests/test_cli_integration.py +++ b/tests/test_cli_integration.py @@ -17,7 +17,7 @@ def test_cli_help_display(self): [sys.executable, "-m", "src.main", "--help"], capture_output=True, text=True, - timeout=10, + timeout=30, ) assert result.returncode == 0 @@ -131,12 +131,13 @@ def test_cli_server_startup_messages(self): ) try: - # Let it start and produce some output - time.sleep(1) + # Give extra time on Windows where startup is slower + startup_wait = 3 if platform.system() == "Windows" else 1 + time.sleep(startup_wait) # Terminate and get output process.terminate() - stdout, stderr = process.communicate(timeout=3) + stdout, stderr = process.communicate(timeout=5) # Check startup messages assert "Starting diffchunk MCP server" in stderr diff --git a/tests/test_filesystem_edge_cases.py b/tests/test_filesystem_edge_cases.py index fdc6bda..e11a8a9 100644 --- a/tests/test_filesystem_edge_cases.py +++ b/tests/test_filesystem_edge_cases.py @@ -155,7 +155,7 @@ def test_get_chunk_out_of_range(self, tools, react_diff_file): total_chunks = result["chunks"] # Chunk number too high - with pytest.raises(ValueError, match="Chunk .* not found"): + with pytest.raises(ValueError, match="Chunk .* does not exist"): tools.get_chunk(react_diff_file, total_chunks + 1) def test_tools_operations_without_diff_loaded(self, tools): diff --git a/tests/test_get_file_diff.py b/tests/test_get_file_diff.py index 5048a92..fe34c6a 100644 --- a/tests/test_get_file_diff.py +++ b/tests/test_get_file_diff.py @@ -117,7 +117,7 @@ def test_glob_pattern_zero_matches(self, tools, multi_file_diff_path): def test_glob_pattern_multiple_matches(self, tools, multi_file_diff_path): """get_file_diff with multiple matches raises error.""" - with pytest.raises(ValueError, match="matches multiple files"): + with pytest.raises(ValueError, match="matches \\d+ files"): tools.get_file_diff(multi_file_diff_path, "src/*") def test_auto_loads_if_not_loaded(self, tools, multi_file_diff_path): diff --git a/tests/test_handle_call_tool.py b/tests/test_handle_call_tool.py deleted file mode 100644 index b11a86a..0000000 --- a/tests/test_handle_call_tool.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Test handle_call_tool for code coverage.""" - -import json -from pathlib import Path - -import pytest -from mcp.types import CallToolRequest - -from src.server import DiffChunkServer - - -class TestHandleCallTool: - """Test handle_call_tool for coverage.""" - - @pytest.fixture - def server(self): - return DiffChunkServer() - - @pytest.fixture - def react_diff_file(self): - diff_file = Path(__file__).parent / "test_data" / "react_18.0_to_18.3.diff" - if not diff_file.exists(): - pytest.skip("React test diff not found") - return str(diff_file) - - @pytest.mark.asyncio - async def test_handle_call_tool_coverage(self, server, react_diff_file): - """Test all paths in handle_call_tool for coverage.""" - handler = server.app.request_handlers[CallToolRequest] - - # Test load_diff - request = CallToolRequest( - method="tools/call", - params={ - "name": "load_diff", - "arguments": {"absolute_file_path": react_diff_file}, - }, - ) - result = await handler(request) - data = json.loads(result.root.content[0].text) - assert data["chunks"] > 0 - - # Test list_chunks - request = CallToolRequest( - method="tools/call", - params={ - "name": "list_chunks", - "arguments": {"absolute_file_path": react_diff_file}, - }, - ) - result = await handler(request) - chunks = json.loads(result.root.content[0].text) - assert len(chunks) > 0 - - # Test get_chunk - request = CallToolRequest( - method="tools/call", - params={ - "name": "get_chunk", - "arguments": {"absolute_file_path": react_diff_file, "chunk_number": 1}, - }, - ) - result = await handler(request) - assert "=== Chunk 1 of" in result.root.content[0].text - - # Test find_chunks_for_files - request = CallToolRequest( - method="tools/call", - params={ - "name": "find_chunks_for_files", - "arguments": {"absolute_file_path": react_diff_file, "pattern": "*"}, - }, - ) - result = await handler(request) - chunk_nums = json.loads(result.root.content[0].text) - assert isinstance(chunk_nums, list) - - # Test unknown tool - request = CallToolRequest( - method="tools/call", params={"name": "unknown_tool", "arguments": {}} - ) - result = await handler(request) - assert "Unknown tool: unknown_tool" in result.root.content[0].text - - # Test None arguments (validation error from MCP layer) - request = CallToolRequest( - method="tools/call", params={"name": "load_diff", "arguments": None} - ) - result = await handler(request) - assert "Input validation error" in result.root.content[0].text diff --git a/tests/test_integration.py b/tests/test_integration.py index 5a1e856..4244ca6 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -126,7 +126,7 @@ def test_get_chunk_functionality(self, tools, test_data_dir): assert len(chunk_content_no_context) < len(chunk_content) # Test invalid chunk number - with pytest.raises(ValueError, match="Chunk .* not found"): + with pytest.raises(ValueError, match="Chunk .* does not exist"): tools.get_chunk(str(diff_file), chunk_count + 1) def test_find_chunks_for_files(self, tools, test_data_dir): diff --git a/tests/test_mcp_components.py b/tests/test_mcp_components.py index 448dd11..f44c3b6 100644 --- a/tests/test_mcp_components.py +++ b/tests/test_mcp_components.py @@ -4,7 +4,6 @@ import pytest -from src.server import DiffChunkServer from src.tools import DiffChunkTools @@ -188,25 +187,6 @@ def test_diffchunk_tools_multi_file_support(self, react_diff_file, go_diff_file) assert react_diff_file in file_paths assert go_diff_file in file_paths - def test_mcp_server_creation(self): - """Test MCP server can be created successfully.""" - server = DiffChunkServer() - - # Verify server has the expected attributes - assert server.app is not None - assert server.tools is not None - assert isinstance(server.tools, DiffChunkTools) - - def test_mcp_server_tools_registration(self): - """Test that MCP server has tools registered.""" - server = DiffChunkServer() - - # The server should have handlers set up - # This is a basic smoke test to ensure the server initializes - assert hasattr(server, "app") - assert hasattr(server, "tools") - assert hasattr(server, "_setup_handlers") - def test_filtering_and_chunking_options(self, go_diff_file): """Test different filtering and chunking options.""" tools = DiffChunkTools() diff --git a/tests/test_server_error_handling.py b/tests/test_server_error_handling.py deleted file mode 100644 index c1eee0f..0000000 --- a/tests/test_server_error_handling.py +++ /dev/null @@ -1,231 +0,0 @@ -"""MCP server error handling integration tests.""" - -import json -from pathlib import Path - -import pytest - -from src.server import DiffChunkServer -from src.tools import DiffChunkTools - - -class TestMCPServerErrorHandling: - """Test MCP server error handling scenarios.""" - - @pytest.fixture - def server(self): - """Create a DiffChunkServer instance.""" - return DiffChunkServer() - - @pytest.fixture - def test_data_dir(self): - """Return path to test data directory.""" - return Path(__file__).parent / "test_data" - - @pytest.fixture - def react_diff_file(self, test_data_dir): - """Return path to React test diff file.""" - diff_file = test_data_dir / "react_18.0_to_18.3.diff" - if not diff_file.exists(): - pytest.skip("React test diff not found") - return str(diff_file) - - def test_server_initialization(self, server): - """Test server initializes correctly.""" - assert server.app is not None - assert server.tools is not None - assert isinstance(server.tools, DiffChunkTools) - - def test_server_handlers_setup(self, server): - """Test server has handlers properly set up.""" - assert hasattr(server, "_setup_handlers") - assert hasattr(server.app, "list_resources") - assert hasattr(server.app, "read_resource") - assert hasattr(server.app, "list_tools") - assert hasattr(server.app, "call_tool") - - def test_read_resource_invalid_uri(self, server): - """Test reading invalid resource URI.""" - # Test that server raises error for invalid URI - # This tests the error path in the actual handler - with pytest.raises(ValueError, match="Unknown resource"): - # Simulate what the handler does - uri = "invalid://uri" - if uri == "diffchunk://current": - overview = server.tools.get_current_overview() - json.dumps(overview, indent=2) - else: - raise ValueError(f"Unknown resource: {uri}") - - def test_call_tool_unknown_tool(self, server): - """Test calling unknown tool.""" - # Test the error handling logic for unknown tools - name = "unknown_tool" - - # Simulate what the handler does - try: - if name in [ - "load_diff", - "list_chunks", - "get_chunk", - "find_chunks_for_files", - ]: - pass # Valid tools - else: - raise ValueError(f"Unknown tool: {name}") - except ValueError as e: - error_msg = f"Error in {name}: {str(e)}" - assert "Unknown tool: unknown_tool" in error_msg - - def test_call_tool_with_none_arguments(self, server): - """Test calling tool with None arguments.""" - # Test argument handling - arguments = None - if arguments is None: - arguments = {} - assert isinstance(arguments, dict) - - def test_call_tool_load_diff_errors(self, server): - """Test load_diff tool error handling.""" - # Test that tool errors are caught and formatted - try: - # This should fail with missing file - server.tools.load_diff("/nonexistent/file.diff") - except ValueError as e: - error_msg = f"Error in load_diff: {str(e)}" - assert "Error in load_diff:" in error_msg - assert "not found" in error_msg - - def test_call_tool_operations_without_loaded_diff(self, server): - """Test tool operations when no diff is loaded.""" - # Test list_chunks without loaded diff - try: - server.tools.list_chunks("/nonexistent/file.diff") - except ValueError as e: - error_msg = f"Error in list_chunks: {str(e)}" - assert "Error in list_chunks: Cannot access file" in error_msg - - # Test get_chunk without loaded diff - try: - server.tools.get_chunk("/nonexistent/file.diff", 1) - except ValueError as e: - error_msg = f"Error in get_chunk: {str(e)}" - assert "Error in get_chunk: Cannot access file" in error_msg - - # Test find_chunks_for_files without loaded diff - try: - server.tools.find_chunks_for_files("/nonexistent/file.diff", "*.py") - except ValueError as e: - error_msg = f"Error in find_chunks_for_files: {str(e)}" - assert "Error in find_chunks_for_files: Cannot access file" in error_msg - - def test_call_tool_invalid_arguments(self, server, react_diff_file): - """Test tool calls with invalid arguments.""" - # Load a diff first - server.tools.load_diff(react_diff_file) - - # Test get_chunk with invalid chunk number - try: - server.tools.get_chunk(react_diff_file, 0) - except ValueError as e: - error_msg = f"Error in get_chunk: {str(e)}" - assert "Error in get_chunk:" in error_msg - assert "must be a positive integer" in error_msg - - # Test find_chunks_for_files with empty pattern - try: - server.tools.find_chunks_for_files(react_diff_file, "") - except ValueError as e: - error_msg = f"Error in find_chunks_for_files: {str(e)}" - assert "Error in find_chunks_for_files:" in error_msg - assert "must be a non-empty string" in error_msg - - def test_call_tool_unexpected_exception_handling(self, server): - """Test handling of unexpected exceptions in tools.""" - # Simulate an unexpected exception - try: - raise RuntimeError("Unexpected error occurred") - except ValueError as e: - error_msg = f"Error in test_exception: {str(e)}" - except Exception as e: - error_msg = f"Unexpected error in test_exception: {str(e)}" - assert ( - "Unexpected error in test_exception: Unexpected error occurred" - in error_msg - ) - - def test_list_resources_basic(self, server): - """Test list_resources returns expected resources.""" - # Test the resource structure - expected_resource = { - "uri": "diffchunk://current", - "name": "Current Diff Overview", - "description": "Overview of the currently loaded diff file", - "mimeType": "application/json", - } - - # Verify resource structure is correct - assert expected_resource["uri"] == "diffchunk://current" - assert expected_resource["name"] == "Current Diff Overview" - - def test_read_resource_current_overview(self, server, react_diff_file): - """Test reading current overview resource.""" - # Load a diff first - server.tools.load_diff(react_diff_file) - - # Test reading the overview - uri = "diffchunk://current" - if uri == "diffchunk://current": - overview = server.tools.get_current_overview() - result = json.dumps(overview, indent=2) - - # Should be valid JSON - overview_parsed = json.loads(result) - assert overview_parsed["loaded"] is True - assert overview_parsed["total_sessions"] >= 1 - # Find the session with the react_diff_file - react_session = next( - ( - s - for s in overview_parsed["sessions"] - if s["file_path"] == react_diff_file - ), - None, - ) - assert react_session is not None - assert react_session["chunks"] > 0 - - def test_server_run_method_exists(self, server): - """Test server has run method for starting.""" - assert hasattr(server, "run") - assert callable(server.run) - - def test_tool_error_message_formatting(self, server): - """Test that tool errors are properly formatted.""" - # Test ValueError formatting - try: - raise ValueError("Test error message") - except ValueError as e: - error_msg = f"Error in test_tool: {str(e)}" - assert error_msg == "Error in test_tool: Test error message" - - # Test Exception formatting - try: - raise RuntimeError("Test runtime error") - except Exception as e: - error_msg = f"Unexpected error in test_tool: {str(e)}" - assert error_msg == "Unexpected error in test_tool: Test runtime error" - - def test_json_serialization_in_responses(self, server, react_diff_file): - """Test JSON serialization in tool responses.""" - # Load diff and test JSON responses - result = server.tools.load_diff(react_diff_file) - - # Test that result can be serialized to JSON - json_result = json.dumps(result, indent=2) - assert isinstance(json_result, str) - - # Test that it can be parsed back - parsed_result = json.loads(json_result) - assert parsed_result["chunks"] == result["chunks"] - assert parsed_result["files"] == result["files"] diff --git a/uv.lock b/uv.lock index 2c719b3..4187587 100644 --- a/uv.lock +++ b/uv.lock @@ -287,7 +287,7 @@ wheels = [ [[package]] name = "diffchunk" -version = "0.1.7" +version = "0.1.9" source = { editable = "." } dependencies = [ { name = "chardet" }, @@ -308,7 +308,7 @@ dev = [ requires-dist = [ { name = "chardet", specifier = ">=4.0.0" }, { name = "click", specifier = ">=8.2.1" }, - { name = "mcp", specifier = ">=1.10.0" }, + { name = "mcp", specifier = ">=1.7.0,<2.0.0" }, ] [package.metadata.requires-dev]