Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Ensure diff test data uses LF line endings on all platforms
*.diff eol=lf
tests/test_data/** eol=lf
14 changes: 11 additions & 3 deletions docs/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ MCP server that chunks large diff files for efficient LLM navigation. Uses file-
Diff File → Canonicalize Path → Hash Content → Cache Check → Parse → Filter → Chunk → Index → Tools
```

The server uses FastMCP with module-level `@mcp.tool()` decorated sync functions (not a class). Resources use `@mcp.resource()`. Error handling is automatic: `ValueError` raised in any tool function is caught by FastMCP and returned as `CallToolResult(isError=True)`. All tools have `annotations={"readOnlyHint": True}` and `structured_output=False` (to prevent `outputSchema` generation).

## Tools

### load_diff (Optional)
Expand Down Expand Up @@ -156,14 +158,15 @@ class DiffStats:
- File existence validation
- Diff format verification
- Graceful handling of malformed sections
- Clear error messages for invalid patterns
- Actionable error messages that guide LLMs to self-correct
- `ValueError` automatically surfaces as `isError=True` through FastMCP

## Project Structure

```
src/
├── main.py # CLI entry point
├── server.py # MCP server (DiffChunkServer)
├── server.py # MCP server (FastMCP module-level tools)
├── tools.py # MCP tools (DiffChunkTools)
├── models.py # Data models
├── parser.py # Diff parsing (DiffParser)
Expand All @@ -172,7 +175,12 @@ src/

## Resources

- `diffchunk://current` - Overview of loaded diffs via MCP resource protocol
- `diffchunk://current` - Overview of loaded diffs via `@mcp.resource("diffchunk://current")` decorator

### File Matching

- Pattern matching (glob) is case-insensitive, matching macOS/Windows filesystem behavior
- Both `find_chunks_for_files` and `get_file_diff` use case-insensitive comparison

## Performance

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "diffchunk"
version = "0.1.8"
version = "0.1.9"
description = "MCP server for navigating large diff files with intelligent chunking"
readme = "README.md"
requires-python = ">=3.10"
Expand All @@ -25,7 +25,7 @@ classifiers = [
]
dependencies = [
"click>=8.2.1",
"mcp>=1.10.0",
"mcp>=1.7.0,<2.0.0",
"chardet>=4.0.0",
]

Expand Down
46 changes: 11 additions & 35 deletions scripts/pre-push
Original file line number Diff line number Diff line change
@@ -1,41 +1,17 @@
#!/bin/bash
#!/bin/sh
# Pre-push hook - runs the same checks as CI to catch issues early

# Comprehensive code quality check script
# Can be used manually or copied to .git/hooks/pre-push
set -e

set -e # Exit on any error
echo "Running pre-push checks..."

echo "🚀 Running pre-push checks..."
echo "================================"
echo "Checking ruff lint..."
uv run ruff check src/

# Function to run a check and report results
run_check() {
local name="$1"
local command="$2"

echo "🔍 Running $name..."
if eval "$command"; then
echo "✅ $name passed!"
else
echo "❌ $name failed!"
echo "Please fix the issues before pushing."
exit 1
fi
echo ""
}
echo "Checking code formatting..."
uv run ruff format --check src/

# 1. Code formatting check
run_check "Ruff format check" "uv run ruff format --check"
echo "Running tests..."
uv run pytest tests/ -x -q

# 2. Linting check
run_check "Ruff linting" "uv run ruff check"

# 3. Type checking
run_check "MyPy type checking" "uv run mypy src/"

# 4. Test suite
run_check "Test suite" "uv run pytest -x -q"

echo "🎉 All pre-push checks passed!"
echo "✅ Code is ready to push"
echo "================================"
echo "All checks passed."
2 changes: 1 addition & 1 deletion src/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""diffchunk - MCP server for navigating large diff files."""

__version__ = "0.1.0"
__version__ = "0.1.9"
24 changes: 14 additions & 10 deletions src/chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,13 @@ def chunk_diff(
skip_generated: bool = True,
include_patterns: List[str] | None = None,
exclude_patterns: List[str] | None = None,
max_chunk_lines: int | None = None,
) -> None:
"""Chunk a diff file into the session."""
if max_chunk_lines is None:
max_chunk_lines = self.max_chunk_lines
elif not isinstance(max_chunk_lines, int) or max_chunk_lines <= 0:
raise ValueError("max_chunk_lines must be a positive integer")
chunk_number = 1
current_chunk_lines = 0
current_chunk_content: List[str] = []
Expand Down Expand Up @@ -55,7 +60,7 @@ def chunk_diff(
file_name = files[-1]

# Check if this file needs to be split
if content_lines > self.max_chunk_lines:
if content_lines > max_chunk_lines:
# Save current chunk if it has content
if current_chunk_content:
self._save_chunk(
Expand All @@ -73,7 +78,9 @@ def chunk_diff(
current_chunk_file_line_counts = {}

# Split the large file
file_chunks = self._split_large_file(files, content, content_lines)
file_chunks = self._split_large_file(
files, content, content_lines, max_chunk_lines
)
parent_file = files[0] if len(files) == 1 else f"{len(files)} files"

for sub_index, (sub_files, sub_content, sub_lines) in enumerate(
Expand All @@ -95,7 +102,7 @@ def chunk_diff(
# Check if we need to start a new chunk
if (
current_chunk_content
and current_chunk_lines + content_lines > self.max_chunk_lines
and current_chunk_lines + content_lines > max_chunk_lines
):
# Save current chunk
self._save_chunk(
Expand Down Expand Up @@ -172,10 +179,10 @@ def _save_chunk(
session.add_chunk(chunk)

def _split_large_file(
self, files: List[str], content: str, file_line_count: int
self, files: List[str], content: str, file_line_count: int, max_chunk_lines: int
) -> List[Tuple[List[str], str, int]]:
"""Split a large file's diff content at hunk boundaries."""
if file_line_count <= self.max_chunk_lines:
if file_line_count <= max_chunk_lines:
return [(files, content, file_line_count)]

# Pattern to match hunk headers like @@ -1,4 +1,6 @@
Expand All @@ -192,7 +199,7 @@ def _split_large_file(

# Be very aggressive about staying under the limit
target_chunk_size = max(
self.max_chunk_lines * 0.8, 200
max_chunk_lines * 0.8, 200
) # 80% of limit or 200 lines minimum

for i, line in enumerate(lines):
Expand Down Expand Up @@ -236,10 +243,7 @@ def _split_large_file(
current_chunk_line_count += 1

# STRICT enforcement: split immediately if we exceed limit
if (
current_chunk_line_count + len(file_header_lines)
>= self.max_chunk_lines
):
if current_chunk_line_count + len(file_header_lines) >= max_chunk_lines:
# Find the last hunk header in current chunk to split there
last_hunk_idx = None
for j in range(len(current_chunk_lines) - 1, -1, -1):
Expand Down
18 changes: 12 additions & 6 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import asyncio
import sys
from importlib.metadata import version
from .server import DiffChunkServer

from .server import mcp


def main():
Expand Down Expand Up @@ -35,12 +36,17 @@ def main():

try:
print(
f"Starting diffchunk MCP server v{version('diffchunk')}...", file=sys.stderr
f"Starting diffchunk MCP server v{version('diffchunk')}...",
file=sys.stderr,
flush=True,
)
print(
"Server ready - waiting for MCP client connection",
file=sys.stderr,
flush=True,
)
print("Server ready - waiting for MCP client connection", file=sys.stderr)
server = DiffChunkServer()
asyncio.run(server.run())
except KeyboardInterrupt:
mcp.run()
except (KeyboardInterrupt, asyncio.CancelledError):
print("Server shutdown requested", file=sys.stderr)
sys.exit(0)
except Exception as e:
Expand Down
2 changes: 1 addition & 1 deletion src/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def find_chunks_for_files(self, pattern: str) -> List[int]:
matching_chunks = set()

for file_path, chunk_numbers in self.file_to_chunks.items():
if fnmatch.fnmatch(file_path, pattern):
if fnmatch.fnmatch(file_path.lower(), pattern.lower()):
matching_chunks.update(chunk_numbers)

return sorted(matching_chunks)
Expand Down
9 changes: 9 additions & 0 deletions src/parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
"""Diff file parsing functionality."""

import fnmatch
import logging
import re
from typing import List, Tuple, Iterator

logger = logging.getLogger("diffchunk")


class DiffParser:
"""Parser for unified diff files."""
Expand Down Expand Up @@ -135,6 +138,12 @@ def _read_diff_file(self, file_path: str) -> List[str]:
encoding = (
result.get("encoding") if result.get("confidence", 0) > 0.7 else "utf-8"
)
logger.debug(
"Detected encoding %s (confidence %.1f) for %s",
encoding,
result.get("confidence", 0),
file_path,
)

try:
with open(file_path, "r", encoding=encoding) as f:
Expand Down
Loading
Loading