From 6a99ed35149db640f9d0f735ccd3fb458a4e1b25 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Mon, 13 Apr 2026 10:58:57 +0200 Subject: [PATCH 1/2] feat: enhance folder listing functionality with prefix support --- src/application/api/file_routes.py | 3 ++- src/application/api/mcp_file_tools.py | 12 ++++++++++-- .../use_cases/list_folders_use_case.py | 4 ++-- src/domain/ports/storage_port.py | 5 +++-- src/infrastructure/storage/minio_adapter.py | 4 ++-- tests/unit/test_file_routes.py | 19 +++++++++++++++++++ tests/unit/test_list_folders_use_case.py | 13 ++++++++++++- tests/unit/test_minio_adapter.py | 19 +++++++++++++++++++ 8 files changed, 69 insertions(+), 10 deletions(-) diff --git a/src/application/api/file_routes.py b/src/application/api/file_routes.py index 1af66a6..4bdcb54 100644 --- a/src/application/api/file_routes.py +++ b/src/application/api/file_routes.py @@ -77,10 +77,11 @@ async def list_files( status_code=status.HTTP_200_OK, ) async def list_folders( + prefix: str = "", use_case: ListFoldersUseCase = Depends(get_list_folders_use_case), ) -> list[str]: try: - return await use_case.execute() + return await use_case.execute(prefix=prefix) except FileNotFoundError as e: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, diff --git a/src/application/api/mcp_file_tools.py b/src/application/api/mcp_file_tools.py index 2650c5c..c5063c7 100644 --- a/src/application/api/mcp_file_tools.py +++ b/src/application/api/mcp_file_tools.py @@ -54,10 +54,18 @@ async def read_file(file_path: str) -> FileContentResponse: try: result = await use_case.execute(file_path=file_path) except FileNotFoundError: - raise ValueError(f"File not found: {file_path}") from None + return FileContentResponse( + content=f"File not found: {file_path}", + metadata={"error": "file_not_found", "file_path": file_path}, + tables=[], + ) except Exception: logger.exception("Unexpected error reading file: %s", file_path) - raise RuntimeError("Failed to read file") from None + return FileContentResponse( + content=f"Failed to read file: {file_path}", + metadata={"error": "read_error", "file_path": file_path}, + tables=[], + ) return FileContentResponse( content=result.content, metadata=result.metadata, diff --git a/src/application/use_cases/list_folders_use_case.py b/src/application/use_cases/list_folders_use_case.py index 1468590..32852dd 100644 --- a/src/application/use_cases/list_folders_use_case.py +++ b/src/application/use_cases/list_folders_use_case.py @@ -6,5 +6,5 @@ def __init__(self, storage: StoragePort, bucket: str) -> None: self.storage = storage self.bucket = bucket - async def execute(self) -> list[str]: - return await self.storage.list_folders(self.bucket) + async def execute(self, prefix: str = "") -> list[str]: + return await self.storage.list_folders(self.bucket, prefix) diff --git a/src/domain/ports/storage_port.py b/src/domain/ports/storage_port.py index 45ce784..2d4591a 100644 --- a/src/domain/ports/storage_port.py +++ b/src/domain/ports/storage_port.py @@ -82,12 +82,13 @@ async def list_files_metadata( pass @abstractmethod - async def list_folders(self, bucket: str) -> list[str]: + async def list_folders(self, bucket: str, prefix: str = "") -> list[str]: """ - List top-level folder prefixes in the bucket. + List folder prefixes in the bucket under a given prefix. Args: bucket: The bucket name to list folders from. + prefix: The prefix to list folders under (default: root). Returns: A list of folder prefix strings (e.g., ['docs/', 'photos/']). diff --git a/src/infrastructure/storage/minio_adapter.py b/src/infrastructure/storage/minio_adapter.py index ac557c6..bacea03 100644 --- a/src/infrastructure/storage/minio_adapter.py +++ b/src/infrastructure/storage/minio_adapter.py @@ -104,8 +104,8 @@ async def list_files_metadata( if not obj.is_dir ] - async def list_folders(self, bucket: str) -> list[str]: - objects = await self._list_minio_objects(bucket, prefix="", recursive=False) + async def list_folders(self, bucket: str, prefix: str = "") -> list[str]: + objects = await self._list_minio_objects(bucket, prefix=prefix, recursive=False) return [obj.object_name for obj in objects if obj.is_dir] async def ping(self, bucket: str) -> bool: diff --git a/tests/unit/test_file_routes.py b/tests/unit/test_file_routes.py index e93cd23..3859a3d 100644 --- a/tests/unit/test_file_routes.py +++ b/tests/unit/test_file_routes.py @@ -265,6 +265,25 @@ async def test_list_folders_empty_result( body = response.json() assert body == [] + async def test_list_folders_with_prefix_param( + self, mock_list_folders_use_case: AsyncMock + ) -> None: + mock_list_folders_use_case.execute.return_value = ["reports/", "exports/"] + app.dependency_overrides[get_list_folders_use_case] = lambda: ( + mock_list_folders_use_case + ) + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get( + "/api/v1/files/folders", params={"prefix": "docs/"} + ) + + assert response.status_code == 200 + assert response.json() == ["reports/", "exports/"] + mock_list_folders_use_case.execute.assert_called_once_with(prefix="docs/") + async def test_list_folders_returns_404_for_missing_bucket( self, mock_list_folders_use_case: AsyncMock ) -> None: diff --git a/tests/unit/test_list_folders_use_case.py b/tests/unit/test_list_folders_use_case.py index 1109927..71b2092 100644 --- a/tests/unit/test_list_folders_use_case.py +++ b/tests/unit/test_list_folders_use_case.py @@ -12,7 +12,18 @@ async def test_execute_calls_storage_list_folders( await use_case.execute() - mock_storage.list_folders.assert_called_once_with("test-bucket") + mock_storage.list_folders.assert_called_once_with("test-bucket", "") + + async def test_execute_with_prefix_passes_prefix_to_storage( + self, mock_storage: AsyncMock + ) -> None: + mock_storage.list_folders.return_value = ["reports/", "exports/"] + use_case = ListFoldersUseCase(storage=mock_storage, bucket="test-bucket") + + result = await use_case.execute(prefix="docs/") + + mock_storage.list_folders.assert_called_once_with("test-bucket", "docs/") + assert result == ["reports/", "exports/"] async def test_execute_returns_folder_prefixes( self, mock_storage: AsyncMock diff --git a/tests/unit/test_minio_adapter.py b/tests/unit/test_minio_adapter.py index f842a12..844411c 100644 --- a/tests/unit/test_minio_adapter.py +++ b/tests/unit/test_minio_adapter.py @@ -270,6 +270,9 @@ async def test_returns_only_dir_objects( result = await adapter.list_folders("my-bucket") assert result == ["docs/", "photos/"] + mock_minio_client.list_objects.assert_called_once_with( + "my-bucket", prefix="", recursive=False + ) async def test_returns_empty_when_no_dirs( self, adapter: MinioAdapter, mock_minio_client: MagicMock @@ -319,3 +322,19 @@ async def test_propagates_file_not_found_for_missing_bucket( with pytest.raises(FileNotFoundError, match="Bucket not found"): await adapter.list_folders("bad-bucket") + + async def test_passes_prefix_to_minio_client( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + mock_dir = MagicMock() + mock_dir.object_name = "docs/reports/" + mock_dir.is_dir = True + + mock_minio_client.list_objects.return_value = [mock_dir] + + result = await adapter.list_folders("my-bucket", prefix="docs/") + + assert result == ["docs/reports/"] + mock_minio_client.list_objects.assert_called_once_with( + "my-bucket", prefix="docs/", recursive=False + ) From 77ac8b13351a8d711adb3ebab13791d449349320 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Mon, 13 Apr 2026 16:04:11 +0200 Subject: [PATCH 2/2] feat: add prefix validation for file and folder listing endpoints --- src/application/api/file_routes.py | 16 +++++ src/application/api/mcp_file_tools.py | 42 ++++++++++--- tests/unit/test_file_routes.py | 40 ++++++++++++ tests/unit/test_mcp_file_tools.py | 89 +++++++++++++++++++++++++-- 4 files changed, 171 insertions(+), 16 deletions(-) diff --git a/src/application/api/file_routes.py b/src/application/api/file_routes.py index 4bdcb54..b87db88 100644 --- a/src/application/api/file_routes.py +++ b/src/application/api/file_routes.py @@ -59,6 +59,20 @@ def _validate_file_type(filename: str, content_type: str) -> None: ) +def _validate_prefix(prefix: str) -> str: + normalized = posixpath.normpath(prefix.replace("\\", "/")) + if normalized == ".": + normalized = "" + if normalized.startswith("..") or posixpath.isabs(normalized): + raise HTTPException( + status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, + detail="prefix must be a relative path within the bucket", + ) + if prefix.endswith("/") and not normalized.endswith("/"): + normalized += "/" + return normalized + + @file_router.get( "/files/list", status_code=status.HTTP_200_OK, @@ -68,6 +82,7 @@ async def list_files( recursive: bool = True, use_case: ListFilesUseCase = Depends(get_list_files_use_case), ) -> list[FileInfoResponse]: + prefix = _validate_prefix(prefix) files = await use_case.execute(prefix=prefix, recursive=recursive) return [FileInfoResponse(**asdict(f)) for f in files] @@ -80,6 +95,7 @@ async def list_folders( prefix: str = "", use_case: ListFoldersUseCase = Depends(get_list_folders_use_case), ) -> list[str]: + prefix = _validate_prefix(prefix) try: return await use_case.execute(prefix=prefix) except FileNotFoundError as e: diff --git a/src/application/api/mcp_file_tools.py b/src/application/api/mcp_file_tools.py index c5063c7..862fcb7 100644 --- a/src/application/api/mcp_file_tools.py +++ b/src/application/api/mcp_file_tools.py @@ -4,13 +4,16 @@ """ import logging +import posixpath from dataclasses import asdict from fastmcp import FastMCP +from fastmcp.exceptions import ToolError from application.responses.file_response import FileContentResponse, FileInfoResponse from dependencies import ( get_list_files_use_case, + get_list_folders_use_case, get_read_file_use_case, ) @@ -19,6 +22,17 @@ mcp_files = FastMCP("RAGAnythingFiles") +def _validate_prefix(prefix: str) -> str: + normalized = posixpath.normpath(prefix.replace("\\", "/")) + if normalized == ".": + normalized = "" + if normalized.startswith("..") or posixpath.isabs(normalized): + raise ToolError(f"prefix must be a relative path within the bucket, got: {prefix!r}") + if prefix.endswith("/") and not normalized.endswith("/"): + normalized += "/" + return normalized + + @mcp_files.tool() async def list_files( prefix: str = "", recursive: bool = True @@ -32,11 +46,27 @@ async def list_files( Returns: List of file objects with object_name, size, and last_modified """ + prefix = _validate_prefix(prefix) use_case = get_list_files_use_case() files = await use_case.execute(prefix=prefix, recursive=recursive) return [FileInfoResponse(**asdict(f)) for f in files] +@mcp_files.tool() +async def list_folders(prefix: str = "") -> list[str]: + """List folders in MinIO storage under a given prefix. + + Args: + prefix: MinIO prefix/path to list folders under (e.g. 'documents/') + + Returns: + List of folder prefix strings (e.g. ['docs/', 'photos/']) + """ + prefix = _validate_prefix(prefix) + use_case = get_list_folders_use_case() + return await use_case.execute(prefix=prefix) + + @mcp_files.tool() async def read_file(file_path: str) -> FileContentResponse: """Read and extract text content from a file stored in MinIO. @@ -54,18 +84,10 @@ async def read_file(file_path: str) -> FileContentResponse: try: result = await use_case.execute(file_path=file_path) except FileNotFoundError: - return FileContentResponse( - content=f"File not found: {file_path}", - metadata={"error": "file_not_found", "file_path": file_path}, - tables=[], - ) + raise ToolError(f"File not found: {file_path}") from None except Exception: logger.exception("Unexpected error reading file: %s", file_path) - return FileContentResponse( - content=f"Failed to read file: {file_path}", - metadata={"error": "read_error", "file_path": file_path}, - tables=[], - ) + raise ToolError(f"Failed to read file: {file_path}") from None return FileContentResponse( content=result.content, metadata=result.metadata, diff --git a/tests/unit/test_file_routes.py b/tests/unit/test_file_routes.py index 3859a3d..57258b4 100644 --- a/tests/unit/test_file_routes.py +++ b/tests/unit/test_file_routes.py @@ -103,6 +103,26 @@ async def test_list_files_empty_result( body = response.json() assert body == [] + async def test_list_files_rejects_path_traversal_prefix(self) -> None: + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get( + "/api/v1/files/list", params={"prefix": "../../etc"} + ) + + assert response.status_code == 422 + + async def test_list_files_rejects_absolute_prefix(self) -> None: + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get( + "/api/v1/files/list", params={"prefix": "/etc/passwd"} + ) + + assert response.status_code == 422 + class TestReadFileRoute: @pytest.fixture @@ -300,3 +320,23 @@ async def test_list_folders_returns_404_for_missing_bucket( response = await client.get("/api/v1/files/folders") assert response.status_code == 404 + + async def test_list_folders_rejects_path_traversal_prefix(self) -> None: + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get( + "/api/v1/files/folders", params={"prefix": "../../etc"} + ) + + assert response.status_code == 422 + + async def test_list_folders_rejects_absolute_prefix(self) -> None: + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get( + "/api/v1/files/folders", params={"prefix": "/etc/passwd"} + ) + + assert response.status_code == 422 diff --git a/tests/unit/test_mcp_file_tools.py b/tests/unit/test_mcp_file_tools.py index b50f1e3..7b9bf77 100644 --- a/tests/unit/test_mcp_file_tools.py +++ b/tests/unit/test_mcp_file_tools.py @@ -3,9 +3,12 @@ from unittest.mock import AsyncMock, patch import pytest +from fastmcp.exceptions import ToolError from application.api.mcp_file_tools import ( + _validate_prefix, list_files, + list_folders, mcp_files, read_file, ) @@ -21,6 +24,33 @@ def test_mcp_files_has_correct_name(self) -> None: assert mcp_files.name == "RAGAnythingFiles" +class TestValidatePrefix: + """Tests for the _validate_prefix helper.""" + + def test_empty_prefix_returns_empty(self) -> None: + assert _validate_prefix("") == "" + + def test_valid_prefix_passes_through(self) -> None: + assert _validate_prefix("docs/reports/") == "docs/reports/" + + def test_trailing_slash_preserved(self) -> None: + assert _validate_prefix("docs/") == "docs/" + + def test_backslash_normalized(self) -> None: + assert _validate_prefix("docs\\reports/") == "docs/reports/" + + def test_dot_normalized_to_empty(self) -> None: + assert _validate_prefix(".") == "" + + def test_path_traversal_rejected(self) -> None: + with pytest.raises(ToolError, match="prefix must be a relative path"): + _validate_prefix("../../etc") + + def test_absolute_path_rejected(self) -> None: + with pytest.raises(ToolError, match="prefix must be a relative path"): + _validate_prefix("/etc/passwd") + + class TestListFiles: """Tests for the list_files MCP tool.""" @@ -100,6 +130,53 @@ async def test_returns_empty_list_when_no_files(self) -> None: assert result == [] + async def test_rejects_path_traversal_prefix(self) -> None: + """Should raise ToolError for path traversal in prefix.""" + with pytest.raises(ToolError, match="prefix must be a relative path"): + await list_files(prefix="../../etc") + + async def test_rejects_absolute_prefix(self) -> None: + """Should raise ToolError for absolute path in prefix.""" + with pytest.raises(ToolError, match="prefix must be a relative path"): + await list_files(prefix="/etc/passwd") + + +class TestListFolders: + """Tests for the list_folders MCP tool.""" + + async def test_returns_folder_list(self) -> None: + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = ["docs/", "photos/"] + + with patch( + "application.api.mcp_file_tools.get_list_folders_use_case", + return_value=mock_use_case, + ): + result = await list_folders() + + assert result == ["docs/", "photos/"] + + async def test_forwards_prefix_to_use_case(self) -> None: + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = ["reports/"] + + with patch( + "application.api.mcp_file_tools.get_list_folders_use_case", + return_value=mock_use_case, + ): + result = await list_folders(prefix="docs/") + + mock_use_case.execute.assert_called_once_with(prefix="docs/") + assert result == ["reports/"] + + async def test_rejects_path_traversal_prefix(self) -> None: + with pytest.raises(ToolError, match="prefix must be a relative path"): + await list_folders(prefix="../../etc") + + async def test_rejects_absolute_prefix(self) -> None: + with pytest.raises(ToolError, match="prefix must be a relative path"): + await list_folders(prefix="/etc/passwd") + class TestReadFile: """Tests for the read_file MCP tool.""" @@ -128,8 +205,8 @@ async def test_returns_file_content_response( assert result.content == "Extracted text from the document." assert result.metadata.mime_type == "application/pdf" - async def test_raises_value_error_for_file_not_found(self) -> None: - """Should convert FileNotFoundError to ValueError with helpful message.""" + async def test_raises_tool_error_for_file_not_found(self) -> None: + """Should convert FileNotFoundError to ToolError.""" mock_use_case = AsyncMock() mock_use_case.execute.side_effect = FileNotFoundError @@ -138,12 +215,12 @@ async def test_raises_value_error_for_file_not_found(self) -> None: "application.api.mcp_file_tools.get_read_file_use_case", return_value=mock_use_case, ), - pytest.raises(ValueError, match="File not found: missing.pdf"), + pytest.raises(ToolError, match="File not found: missing.pdf"), ): await read_file(file_path="missing.pdf") - async def test_raises_runtime_error_for_generic_failure(self) -> None: - """Should convert generic exceptions to RuntimeError.""" + async def test_raises_tool_error_for_generic_failure(self) -> None: + """Should convert generic exceptions to ToolError.""" mock_use_case = AsyncMock() mock_use_case.execute.side_effect = Exception("Disk full") @@ -152,7 +229,7 @@ async def test_raises_runtime_error_for_generic_failure(self) -> None: "application.api.mcp_file_tools.get_read_file_use_case", return_value=mock_use_case, ), - pytest.raises(RuntimeError, match="Failed to read file"), + pytest.raises(ToolError, match="Failed to read file"), ): await read_file(file_path="documents/broken.pdf")