Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion src/application/api/file_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ def _validate_file_type(filename: str, content_type: str) -> None:
)


def _validate_prefix(prefix: str) -> str:
normalized = posixpath.normpath(prefix.replace("\\", "/"))
if normalized == ".":
normalized = ""
if normalized.startswith("..") or posixpath.isabs(normalized):
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="prefix must be a relative path within the bucket",
)
if prefix.endswith("/") and not normalized.endswith("/"):
normalized += "/"
return normalized


@file_router.get(
"/files/list",
status_code=status.HTTP_200_OK,
Expand All @@ -68,6 +82,7 @@ async def list_files(
recursive: bool = True,
use_case: ListFilesUseCase = Depends(get_list_files_use_case),
) -> list[FileInfoResponse]:
prefix = _validate_prefix(prefix)
files = await use_case.execute(prefix=prefix, recursive=recursive)
return [FileInfoResponse(**asdict(f)) for f in files]

Expand All @@ -77,10 +92,12 @@ async def list_files(
status_code=status.HTTP_200_OK,
)
async def list_folders(
prefix: str = "",
use_case: ListFoldersUseCase = Depends(get_list_folders_use_case),
) -> list[str]:
prefix = _validate_prefix(prefix)
try:
return await use_case.execute()
return await use_case.execute(prefix=prefix)
except FileNotFoundError as e:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
Expand Down
34 changes: 32 additions & 2 deletions src/application/api/mcp_file_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
"""

import logging
import posixpath
from dataclasses import asdict

from fastmcp import FastMCP
from fastmcp.exceptions import ToolError

from application.responses.file_response import FileContentResponse, FileInfoResponse
from dependencies import (
get_list_files_use_case,
get_list_folders_use_case,
get_read_file_use_case,
)

Expand All @@ -19,6 +22,17 @@
mcp_files = FastMCP("RAGAnythingFiles")


def _validate_prefix(prefix: str) -> str:
normalized = posixpath.normpath(prefix.replace("\\", "/"))
if normalized == ".":
normalized = ""
if normalized.startswith("..") or posixpath.isabs(normalized):
raise ToolError(f"prefix must be a relative path within the bucket, got: {prefix!r}")
if prefix.endswith("/") and not normalized.endswith("/"):
normalized += "/"
return normalized


@mcp_files.tool()
async def list_files(
prefix: str = "", recursive: bool = True
Expand All @@ -32,11 +46,27 @@ async def list_files(
Returns:
List of file objects with object_name, size, and last_modified
"""
prefix = _validate_prefix(prefix)
use_case = get_list_files_use_case()
files = await use_case.execute(prefix=prefix, recursive=recursive)
return [FileInfoResponse(**asdict(f)) for f in files]


@mcp_files.tool()
async def list_folders(prefix: str = "") -> list[str]:
"""List folders in MinIO storage under a given prefix.

Args:
prefix: MinIO prefix/path to list folders under (e.g. 'documents/')

Returns:
List of folder prefix strings (e.g. ['docs/', 'photos/'])
"""
prefix = _validate_prefix(prefix)
use_case = get_list_folders_use_case()
return await use_case.execute(prefix=prefix)


@mcp_files.tool()
async def read_file(file_path: str) -> FileContentResponse:
"""Read and extract text content from a file stored in MinIO.
Expand All @@ -54,10 +84,10 @@ async def read_file(file_path: str) -> FileContentResponse:
try:
result = await use_case.execute(file_path=file_path)
except FileNotFoundError:
raise ValueError(f"File not found: {file_path}") from None
raise ToolError(f"File not found: {file_path}") from None
except Exception:
logger.exception("Unexpected error reading file: %s", file_path)
raise RuntimeError("Failed to read file") from None
raise ToolError(f"Failed to read file: {file_path}") from None
return FileContentResponse(
content=result.content,
metadata=result.metadata,
Expand Down
4 changes: 2 additions & 2 deletions src/application/use_cases/list_folders_use_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ def __init__(self, storage: StoragePort, bucket: str) -> None:
self.storage = storage
self.bucket = bucket

async def execute(self) -> list[str]:
return await self.storage.list_folders(self.bucket)
async def execute(self, prefix: str = "") -> list[str]:
return await self.storage.list_folders(self.bucket, prefix)
5 changes: 3 additions & 2 deletions src/domain/ports/storage_port.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,13 @@ async def list_files_metadata(
pass

@abstractmethod
async def list_folders(self, bucket: str) -> list[str]:
async def list_folders(self, bucket: str, prefix: str = "") -> list[str]:
"""
List top-level folder prefixes in the bucket.
List folder prefixes in the bucket under a given prefix.

Args:
bucket: The bucket name to list folders from.
prefix: The prefix to list folders under (default: root).

Returns:
A list of folder prefix strings (e.g., ['docs/', 'photos/']).
Expand Down
4 changes: 2 additions & 2 deletions src/infrastructure/storage/minio_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ async def list_files_metadata(
if not obj.is_dir
]

async def list_folders(self, bucket: str) -> list[str]:
objects = await self._list_minio_objects(bucket, prefix="", recursive=False)
async def list_folders(self, bucket: str, prefix: str = "") -> list[str]:
objects = await self._list_minio_objects(bucket, prefix=prefix, recursive=False)
return [obj.object_name for obj in objects if obj.is_dir]

async def ping(self, bucket: str) -> bool:
Expand Down
59 changes: 59 additions & 0 deletions tests/unit/test_file_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,26 @@ async def test_list_files_empty_result(
body = response.json()
assert body == []

async def test_list_files_rejects_path_traversal_prefix(self) -> None:
async with httpx.AsyncClient(
transport=ASGITransport(app=app), base_url="http://test"
) as client:
response = await client.get(
"/api/v1/files/list", params={"prefix": "../../etc"}
)

assert response.status_code == 422

async def test_list_files_rejects_absolute_prefix(self) -> None:
async with httpx.AsyncClient(
transport=ASGITransport(app=app), base_url="http://test"
) as client:
response = await client.get(
"/api/v1/files/list", params={"prefix": "/etc/passwd"}
)

assert response.status_code == 422


class TestReadFileRoute:
@pytest.fixture
Expand Down Expand Up @@ -265,6 +285,25 @@ async def test_list_folders_empty_result(
body = response.json()
assert body == []

async def test_list_folders_with_prefix_param(
self, mock_list_folders_use_case: AsyncMock
) -> None:
mock_list_folders_use_case.execute.return_value = ["reports/", "exports/"]
app.dependency_overrides[get_list_folders_use_case] = lambda: (
mock_list_folders_use_case
)

async with httpx.AsyncClient(
transport=ASGITransport(app=app), base_url="http://test"
) as client:
response = await client.get(
"/api/v1/files/folders", params={"prefix": "docs/"}
)

assert response.status_code == 200
assert response.json() == ["reports/", "exports/"]
mock_list_folders_use_case.execute.assert_called_once_with(prefix="docs/")

async def test_list_folders_returns_404_for_missing_bucket(
self, mock_list_folders_use_case: AsyncMock
) -> None:
Expand All @@ -281,3 +320,23 @@ async def test_list_folders_returns_404_for_missing_bucket(
response = await client.get("/api/v1/files/folders")

assert response.status_code == 404

async def test_list_folders_rejects_path_traversal_prefix(self) -> None:
async with httpx.AsyncClient(
transport=ASGITransport(app=app), base_url="http://test"
) as client:
response = await client.get(
"/api/v1/files/folders", params={"prefix": "../../etc"}
)

assert response.status_code == 422

async def test_list_folders_rejects_absolute_prefix(self) -> None:
async with httpx.AsyncClient(
transport=ASGITransport(app=app), base_url="http://test"
) as client:
response = await client.get(
"/api/v1/files/folders", params={"prefix": "/etc/passwd"}
)

assert response.status_code == 422
13 changes: 12 additions & 1 deletion tests/unit/test_list_folders_use_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,18 @@ async def test_execute_calls_storage_list_folders(

await use_case.execute()

mock_storage.list_folders.assert_called_once_with("test-bucket")
mock_storage.list_folders.assert_called_once_with("test-bucket", "")

async def test_execute_with_prefix_passes_prefix_to_storage(
self, mock_storage: AsyncMock
) -> None:
mock_storage.list_folders.return_value = ["reports/", "exports/"]
use_case = ListFoldersUseCase(storage=mock_storage, bucket="test-bucket")

result = await use_case.execute(prefix="docs/")

mock_storage.list_folders.assert_called_once_with("test-bucket", "docs/")
assert result == ["reports/", "exports/"]

async def test_execute_returns_folder_prefixes(
self, mock_storage: AsyncMock
Expand Down
89 changes: 83 additions & 6 deletions tests/unit/test_mcp_file_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from unittest.mock import AsyncMock, patch

import pytest
from fastmcp.exceptions import ToolError

from application.api.mcp_file_tools import (
_validate_prefix,
list_files,
list_folders,
mcp_files,
read_file,
)
Expand All @@ -21,6 +24,33 @@ def test_mcp_files_has_correct_name(self) -> None:
assert mcp_files.name == "RAGAnythingFiles"


class TestValidatePrefix:
"""Tests for the _validate_prefix helper."""

def test_empty_prefix_returns_empty(self) -> None:
assert _validate_prefix("") == ""

def test_valid_prefix_passes_through(self) -> None:
assert _validate_prefix("docs/reports/") == "docs/reports/"

def test_trailing_slash_preserved(self) -> None:
assert _validate_prefix("docs/") == "docs/"

def test_backslash_normalized(self) -> None:
assert _validate_prefix("docs\\reports/") == "docs/reports/"

def test_dot_normalized_to_empty(self) -> None:
assert _validate_prefix(".") == ""

def test_path_traversal_rejected(self) -> None:
with pytest.raises(ToolError, match="prefix must be a relative path"):
_validate_prefix("../../etc")

def test_absolute_path_rejected(self) -> None:
with pytest.raises(ToolError, match="prefix must be a relative path"):
_validate_prefix("/etc/passwd")


class TestListFiles:
"""Tests for the list_files MCP tool."""

Expand Down Expand Up @@ -100,6 +130,53 @@ async def test_returns_empty_list_when_no_files(self) -> None:

assert result == []

async def test_rejects_path_traversal_prefix(self) -> None:
"""Should raise ToolError for path traversal in prefix."""
with pytest.raises(ToolError, match="prefix must be a relative path"):
await list_files(prefix="../../etc")

async def test_rejects_absolute_prefix(self) -> None:
"""Should raise ToolError for absolute path in prefix."""
with pytest.raises(ToolError, match="prefix must be a relative path"):
await list_files(prefix="/etc/passwd")


class TestListFolders:
"""Tests for the list_folders MCP tool."""

async def test_returns_folder_list(self) -> None:
mock_use_case = AsyncMock()
mock_use_case.execute.return_value = ["docs/", "photos/"]

with patch(
"application.api.mcp_file_tools.get_list_folders_use_case",
return_value=mock_use_case,
):
result = await list_folders()

assert result == ["docs/", "photos/"]

async def test_forwards_prefix_to_use_case(self) -> None:
mock_use_case = AsyncMock()
mock_use_case.execute.return_value = ["reports/"]

with patch(
"application.api.mcp_file_tools.get_list_folders_use_case",
return_value=mock_use_case,
):
result = await list_folders(prefix="docs/")

mock_use_case.execute.assert_called_once_with(prefix="docs/")
assert result == ["reports/"]

async def test_rejects_path_traversal_prefix(self) -> None:
with pytest.raises(ToolError, match="prefix must be a relative path"):
await list_folders(prefix="../../etc")

async def test_rejects_absolute_prefix(self) -> None:
with pytest.raises(ToolError, match="prefix must be a relative path"):
await list_folders(prefix="/etc/passwd")


class TestReadFile:
"""Tests for the read_file MCP tool."""
Expand Down Expand Up @@ -128,8 +205,8 @@ async def test_returns_file_content_response(
assert result.content == "Extracted text from the document."
assert result.metadata.mime_type == "application/pdf"

async def test_raises_value_error_for_file_not_found(self) -> None:
"""Should convert FileNotFoundError to ValueError with helpful message."""
async def test_raises_tool_error_for_file_not_found(self) -> None:
"""Should convert FileNotFoundError to ToolError."""
mock_use_case = AsyncMock()
mock_use_case.execute.side_effect = FileNotFoundError

Expand All @@ -138,12 +215,12 @@ async def test_raises_value_error_for_file_not_found(self) -> None:
"application.api.mcp_file_tools.get_read_file_use_case",
return_value=mock_use_case,
),
pytest.raises(ValueError, match="File not found: missing.pdf"),
pytest.raises(ToolError, match="File not found: missing.pdf"),
):
await read_file(file_path="missing.pdf")

async def test_raises_runtime_error_for_generic_failure(self) -> None:
"""Should convert generic exceptions to RuntimeError."""
async def test_raises_tool_error_for_generic_failure(self) -> None:
"""Should convert generic exceptions to ToolError."""
mock_use_case = AsyncMock()
mock_use_case.execute.side_effect = Exception("Disk full")

Expand All @@ -152,7 +229,7 @@ async def test_raises_runtime_error_for_generic_failure(self) -> None:
"application.api.mcp_file_tools.get_read_file_use_case",
return_value=mock_use_case,
),
pytest.raises(RuntimeError, match="Failed to read file"),
pytest.raises(ToolError, match="Failed to read file"),
):
await read_file(file_path="documents/broken.pdf")

Expand Down
Loading
Loading