diff --git a/.DS_Store b/.DS_Store index 9187e12..d5b3df0 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index bff2863..f361d0c 100644 --- a/.gitignore +++ b/.gitignore @@ -15,4 +15,5 @@ trivy-report-fixed.json coverage.xml .ruff_cache .pytest_cache -trivy-report-current.json \ No newline at end of file +trivy-report-current.json +.vscode \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index ded5beb..765a67c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,6 +22,7 @@ RUN apt-get update && apt-get install -y \ libgl1 \ git \ tesseract-ocr \ + tesseract-ocr-fra \ && rm -rf /var/lib/apt/lists/* # Set working directory diff --git a/README.md b/README.md index fec5ab1..0d04255 100644 --- a/README.md +++ b/README.md @@ -14,23 +14,24 @@ Multi-modal RAG service exposing a REST API and MCP server for document indexing | +---------------+---------------+ | | - Application Layer MCP Tools - +------------------------------+ (FastMCP) - | api/ | | - | indexing_routes.py | | - | query_routes.py | | - | file_routes.py | | - | health_routes.py | | - | use_cases/ | | - | IndexFileUseCase | | - | IndexFolderUseCase | | - | QueryUseCase | | - | ListFilesUseCase | | - | ReadFileUseCase | | - | requests/ responses/ | | - +------------------------------+ | - | | | | - v v v v + Application Layer MCP Servers (FastMCP) + +------------------------------+ | + | api/ | +---+--------+ +--+-----------+ + | indexing_routes.py | | RAGAnything | | RAGAnything | + | query_routes.py | | Query | | Files | + | file_routes.py | | /rag/mcp | | /files/mcp | + | health_routes.py | +---+--------+ +--+-----------+ + | use_cases/ | | | + | IndexFileUseCase | query_knowledge list_files + | IndexFolderUseCase | _base read_file + | QueryUseCase | query_knowledge + | ListFilesUseCase | _base_multimodal + | ListFoldersUseCase | + | ReadFileUseCase | + | requests/ responses/ | + +------------------------------+ + | | | + v v v Domain Layer (ports) +------------------------------------------+ | RAGEnginePort StoragePort BM25EnginePort DocumentReaderPort | @@ -173,7 +174,7 @@ The service automatically detects and processes the following document formats t | Format | Extensions | Notes | |--------|------------|-------| -| PDF | `.pdf` | Includes OCR support | +| PDF | `.pdf` | Includes OCR support (English + French via Tesseract) | | Microsoft Word | `.docx` | | | Microsoft PowerPoint | `.pptx` | | | Microsoft Excel | `.xlsx` | | @@ -213,6 +214,26 @@ Response (`200 OK`): | `prefix` | string | `""` | MinIO prefix to filter files by | | `recursive` | boolean | `true` | List files in subdirectories | +### List folders + +Returns top-level folder prefixes in the bucket. REST-only endpoint (not exposed as an MCP tool). + +```bash +curl http://localhost:8000/api/v1/files/folders +``` + +Response (`200 OK`): + +```json +["documents/", "photos/", "reports/"] +``` + +Error responses: + +| Status | Condition | +|--------|-----------| +| `404` | Bucket not found | + ### Read a file Downloads the file from MinIO, extracts its text content using Kreuzberg, and returns the result. Supports 91 file formats including PDF, Office documents, images, and HTML. @@ -382,20 +403,24 @@ Response (`200 OK`): The `combined_score` is the sum of `bm25_score` and `vector_score`, each computed as `1 / (k + rank)`. Results are sorted by `combined_score` descending. A chunk that appears in both result sets will have a higher combined score than one that appears in only one. -## MCP Server +## MCP Servers -The MCP server is mounted at `/mcp` and exposes the following tools: +The service exposes **two separate MCP servers**, both using streamable HTTP transport: -### Tool: `query_knowledge_base` +### RAGAnythingQuery — `/rag/mcp` + +Query-focused tools for searching the indexed knowledge base. + +#### Tool: `query_knowledge_base` | Parameter | Type | Default | Description | |-----------|------|---------|-------------| | `working_dir` | string | required | RAG workspace directory for this project | | `query` | string | required | The search query | -| `mode` | string | `"naive"` | Search mode: `naive`, `local`, `global`, `hybrid`, `hybrid+`, `mix`, `bm25`, `bypass` | -| `top_k` | integer | `10` | Number of chunks to retrieve | +| `mode` | string | `"hybrid"` | Search mode: `naive`, `local`, `global`, `hybrid`, `hybrid+`, `mix`, `bm25`, `bypass` | +| `top_k` | integer | `5` | Number of chunks to retrieve | -### Tool: `query_knowledge_base_multimodal` +#### Tool: `query_knowledge_base_multimodal` | Parameter | Type | Default | Description | |-----------|------|---------|-------------| @@ -405,14 +430,18 @@ The MCP server is mounted at `/mcp` and exposes the following tools: | `mode` | string | `"hybrid"` | Search mode | | `top_k` | integer | `5` | Number of chunks to retrieve | -### Tool: `list_files` +### RAGAnythingFiles — `/files/mcp` + +File browsing tools for listing and reading files from MinIO storage. + +#### Tool: `list_files` | Parameter | Type | Default | Description | |-----------|------|---------|-------------| | `prefix` | string | `""` | MinIO prefix to filter files by | | `recursive` | boolean | `true` | List files in subdirectories | -### Tool: `read_file` +#### Tool: `read_file` | Parameter | Type | Default | Description | |-----------|------|---------|-------------| @@ -420,39 +449,13 @@ The MCP server is mounted at `/mcp` and exposes the following tools: Downloads the file from MinIO, extracts its text content using Kreuzberg, and returns the extracted text along with metadata and any detected tables. -### Transport modes - -The `MCP_TRANSPORT` environment variable controls how the MCP server is exposed: - -| Value | Behavior | -|-------|----------| -| `stdio` | MCP runs over stdin/stdout; FastAPI runs in a background thread | -| `sse` | MCP mounted at `/mcp` as SSE endpoint | -| `streamable` | MCP mounted at `/mcp` as streamable HTTP endpoint | +### Transport -### Claude Desktop configuration +Both MCP servers use **streamable HTTP** transport exclusively. Connect MCP clients to the mount paths: -Add to `~/Library/Application Support/Claude/claude_desktop_config.json`: - -```json -{ - "mcpServers": { - "raganything": { - "command": "uv", - "args": [ - "run", - "--directory", - "/absolute/path/to/mcp-raganything", - "python", - "-m", - "src.main" - ], - "env": { - "MCP_TRANSPORT": "stdio" - } - } - } -} +``` +http://localhost:8000/rag/mcp # RAGAnythingQuery +http://localhost:8000/files/mcp # RAGAnythingFiles ``` ## Configuration @@ -465,7 +468,6 @@ All configuration is via environment variables, loaded through Pydantic Settings |----------|---------|-------------| | `HOST` | `0.0.0.0` | Server bind address | | `PORT` | `8000` | Server port | -| `MCP_TRANSPORT` | `stdio` | MCP transport: `stdio`, `sse`, `streamable` | | `ALLOWED_ORIGINS` | `["*"]` | CORS allowed origins | | `OUTPUT_DIR` | system temp | Temporary directory for downloaded files | | `UVICORN_LOG_LEVEL` | `critical` | Uvicorn log level | @@ -577,7 +579,7 @@ The PostgreSQL server must have the `pg_textsearch` extension installed and load ``` src/ - main.py -- FastAPI app, MCP mount, entry point + main.py -- FastAPI app, dual MCP mounts, entry point config.py -- Pydantic Settings config classes dependencies.py -- Dependency injection wiring domain/ @@ -593,8 +595,9 @@ src/ health_routes.py -- GET /health indexing_routes.py -- POST /file/index, /folder/index query_routes.py -- POST /query - file_routes.py -- GET /files/list, POST /files/read - mcp_tools.py -- MCP tools: query_knowledge_base, list_files, read_file + file_routes.py -- GET /files/list, GET /files/folders, POST /files/read + mcp_query_tools.py -- MCP tools: query_knowledge_base, query_knowledge_base_multimodal + mcp_file_tools.py -- MCP tools: list_files, read_file requests/ indexing_request.py -- IndexFileRequest, IndexFolderRequest query_request.py -- QueryRequest, MultimodalQueryRequest @@ -607,6 +610,7 @@ src/ index_folder_use_case.py -- Downloads from MinIO, indexes folder query_use_case.py -- Query with bm25/hybrid+ support list_files_use_case.py -- Lists files with metadata from MinIO + list_folders_use_case.py -- Lists folder prefixes from MinIO read_file_use_case.py -- Reads file from MinIO, extracts content via Kreuzberg infrastructure/ rag/ diff --git a/pyproject.toml b/pyproject.toml index ae181e3..781bbef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "fastmcp>=3.2.0", "cryptography>=46.0.5", "httpx>=0.27.0", - "kreuzberg>=4.0.0", + "kreuzberg[all]>=4.8.2", "lightrag-hku>=1.4.13", "lightrag-hku[api]>=1.4.13", "mcp>=1.24.0", diff --git a/src/application/api/file_routes.py b/src/application/api/file_routes.py index 7031004..91e7ea2 100644 --- a/src/application/api/file_routes.py +++ b/src/application/api/file_routes.py @@ -5,15 +5,19 @@ from application.requests.file_request import ReadFileRequest from application.responses.file_response import FileContentResponse, FileInfoResponse from application.use_cases.list_files_use_case import ListFilesUseCase +from application.use_cases.list_folders_use_case import ListFoldersUseCase from application.use_cases.read_file_use_case import ReadFileUseCase -from dependencies import get_list_files_use_case, get_read_file_use_case +from dependencies import ( + get_list_files_use_case, + get_list_folders_use_case, + get_read_file_use_case, +) file_router = APIRouter(tags=["Files"]) @file_router.get( "/files/list", - response_model=list[FileInfoResponse], status_code=status.HTTP_200_OK, ) async def list_files( @@ -25,9 +29,24 @@ async def list_files( return [FileInfoResponse(**asdict(f)) for f in files] +@file_router.get( + "/files/folders", + status_code=status.HTTP_200_OK, +) +async def list_folders( + use_case: ListFoldersUseCase = Depends(get_list_folders_use_case), +) -> list[str]: + try: + return await use_case.execute() + except FileNotFoundError as e: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=str(e), + ) from None + + @file_router.post( "/files/read", - response_model=FileContentResponse, status_code=status.HTTP_200_OK, ) async def read_file( diff --git a/src/application/api/health_routes.py b/src/application/api/health_routes.py index 7d09e10..01680d6 100644 --- a/src/application/api/health_routes.py +++ b/src/application/api/health_routes.py @@ -1,4 +1,8 @@ -from fastapi import APIRouter +from fastapi import APIRouter, Depends +from fastapi.responses import JSONResponse + +from application.use_cases.liveness_check_use_case import LivenessCheckUseCase +from dependencies import get_liveness_check_use_case health_router = APIRouter(tags=["Health"]) @@ -12,3 +16,17 @@ def health_check(): dict: Status message indicating the API is running. """ return {"message": "RAG Anything API is running"} + + +@health_router.get("/health/live") +async def liveness_check( + use_case: LivenessCheckUseCase = Depends(get_liveness_check_use_case), +): + """Liveness probe that checks PostgreSQL and MinIO connectivity. + + Returns: + 200 if both connections are healthy, 503 if any is unreachable. + """ + result = await use_case.execute() + status_code = 200 if result["status"] == "healthy" else 503 + return JSONResponse(content=result, status_code=status_code) diff --git a/src/application/api/mcp_file_tools.py b/src/application/api/mcp_file_tools.py new file mode 100644 index 0000000..2650c5c --- /dev/null +++ b/src/application/api/mcp_file_tools.py @@ -0,0 +1,65 @@ +"""MCP file tools for RAGAnything. + +These tools are registered with FastMCP for Claude Desktop integration. +""" + +import logging +from dataclasses import asdict + +from fastmcp import FastMCP + +from application.responses.file_response import FileContentResponse, FileInfoResponse +from dependencies import ( + get_list_files_use_case, + get_read_file_use_case, +) + +logger = logging.getLogger(__name__) + +mcp_files = FastMCP("RAGAnythingFiles") + + +@mcp_files.tool() +async def list_files( + prefix: str = "", recursive: bool = True +) -> list[FileInfoResponse]: + """List files in MinIO storage under a given prefix. + + Args: + prefix: MinIO prefix/path to filter files by (e.g. 'documents/') + recursive: Whether to list files in subdirectories (default True) + + Returns: + List of file objects with object_name, size, and last_modified + """ + use_case = get_list_files_use_case() + files = await use_case.execute(prefix=prefix, recursive=recursive) + return [FileInfoResponse(**asdict(f)) for f in files] + + +@mcp_files.tool() +async def read_file(file_path: str) -> FileContentResponse: + """Read and extract text content from a file stored in MinIO. + + Supports 91 file formats including PDF, Office documents, images, HTML, etc. + Uses Kreuzberg for document intelligence extraction. + + Args: + file_path: Path to the file in MinIO bucket (e.g. 'documents/report.pdf') + + Returns: + Extracted text content with metadata and any detected tables + """ + use_case = get_read_file_use_case() + try: + result = await use_case.execute(file_path=file_path) + except FileNotFoundError: + raise ValueError(f"File not found: {file_path}") from None + except Exception: + logger.exception("Unexpected error reading file: %s", file_path) + raise RuntimeError("Failed to read file") from None + return FileContentResponse( + content=result.content, + metadata=result.metadata, + tables=result.tables, + ) diff --git a/src/application/api/mcp_tools.py b/src/application/api/mcp_query_tools.py similarity index 58% rename from src/application/api/mcp_tools.py rename to src/application/api/mcp_query_tools.py index 11f98c8..bc364b8 100644 --- a/src/application/api/mcp_tools.py +++ b/src/application/api/mcp_query_tools.py @@ -1,29 +1,21 @@ -"""MCP tools for RAGAnything. +"""MCP query tools for RAGAnything. These tools are registered with FastMCP for Claude Desktop integration. """ -import logging -from dataclasses import asdict - from fastmcp import FastMCP from application.requests.query_request import MultimodalContentItem -from application.responses.file_response import FileContentResponse, FileInfoResponse from application.responses.query_response import ChunkResponse, QueryResponse from dependencies import ( - get_list_files_use_case, get_multimodal_query_use_case, get_query_use_case, - get_read_file_use_case, ) -logger = logging.getLogger(__name__) - -mcp = FastMCP("RAGAnything") +mcp_query = FastMCP("RAGAnythingQuery") -@mcp.tool() +@mcp_query.tool() async def query_knowledge_base( working_dir: str, query: str, mode: str = "hybrid", top_k: int = 5 ) -> list[ChunkResponse]: @@ -53,7 +45,7 @@ async def query_knowledge_base( return response.data.chunks -@mcp.tool() +@mcp_query.tool() async def query_knowledge_base_multimodal( working_dir: str, query: str, @@ -88,49 +80,3 @@ async def query_knowledge_base_multimodal( mode=mode, top_k=top_k, ) - - -@mcp.tool() -async def list_files( - prefix: str = "", recursive: bool = True -) -> list[FileInfoResponse]: - """List files in MinIO storage under a given prefix. - - Args: - prefix: MinIO prefix/path to filter files by (e.g. 'documents/') - recursive: Whether to list files in subdirectories (default True) - - Returns: - List of file objects with object_name, size, and last_modified - """ - use_case = get_list_files_use_case() - files = await use_case.execute(prefix=prefix, recursive=recursive) - return [FileInfoResponse(**asdict(f)) for f in files] - - -@mcp.tool() -async def read_file(file_path: str) -> FileContentResponse: - """Read and extract text content from a file stored in MinIO. - - Supports 91 file formats including PDF, Office documents, images, HTML, etc. - Uses Kreuzberg for document intelligence extraction. - - Args: - file_path: Path to the file in MinIO bucket (e.g. 'documents/report.pdf') - - Returns: - Extracted text content with metadata and any detected tables - """ - use_case = get_read_file_use_case() - try: - result = await use_case.execute(file_path=file_path) - except FileNotFoundError: - raise ValueError(f"File not found: {file_path}") from None - except Exception: - logger.exception("Unexpected error reading file: %s", file_path) - raise RuntimeError("Failed to read file") from None - return FileContentResponse( - content=result.content, - metadata=result.metadata, - tables=result.tables, - ) diff --git a/src/application/api/query_routes.py b/src/application/api/query_routes.py index 4dffebe..36a0a1e 100644 --- a/src/application/api/query_routes.py +++ b/src/application/api/query_routes.py @@ -13,9 +13,7 @@ query_router = APIRouter(tags=["RAG Query"]) -@query_router.post( - "/query", response_model=list[ChunkResponse], status_code=status.HTTP_200_OK -) +@query_router.post("/query", status_code=status.HTTP_200_OK) async def query_knowledge_base( request: QueryRequest, use_case: QueryUseCase = Depends(get_query_use_case), @@ -32,7 +30,6 @@ async def query_knowledge_base( @query_router.post( "/query/multimodal", - response_model=MultimodalQueryResponse, status_code=status.HTTP_200_OK, ) async def query_knowledge_base_multimodal( diff --git a/src/application/use_cases/list_folders_use_case.py b/src/application/use_cases/list_folders_use_case.py new file mode 100644 index 0000000..1468590 --- /dev/null +++ b/src/application/use_cases/list_folders_use_case.py @@ -0,0 +1,10 @@ +from domain.ports.storage_port import StoragePort + + +class ListFoldersUseCase: + def __init__(self, storage: StoragePort, bucket: str) -> None: + self.storage = storage + self.bucket = bucket + + async def execute(self) -> list[str]: + return await self.storage.list_folders(self.bucket) diff --git a/src/application/use_cases/liveness_check_use_case.py b/src/application/use_cases/liveness_check_use_case.py new file mode 100644 index 0000000..17211f4 --- /dev/null +++ b/src/application/use_cases/liveness_check_use_case.py @@ -0,0 +1,26 @@ +from domain.ports.postgres_health_port import PostgresHealthPort +from domain.ports.storage_port import StoragePort + + +class LivenessCheckUseCase: + def __init__( + self, + storage: StoragePort, + postgres_health: PostgresHealthPort, + bucket: str, + ) -> None: + self._storage = storage + self._postgres_health = postgres_health + self._bucket = bucket + + async def execute(self) -> dict: + checks: dict[str, str] = {} + + pg_ok = await self._postgres_health.ping() + checks["postgres"] = "ok" if pg_ok else "unreachable" + + minio_ok = await self._storage.ping(self._bucket) + checks["minio"] = "ok" if minio_ok else "unreachable" + + healthy = all(v == "ok" for v in checks.values()) + return {"status": "healthy" if healthy else "degraded", "checks": checks} diff --git a/src/config.py b/src/config.py index 6b1f89a..8fb94de 100644 --- a/src/config.py +++ b/src/config.py @@ -12,9 +12,6 @@ class AppConfig(BaseSettings): ALLOWED_ORIGINS: list[str] = Field( default=["*"], description="CORS allowed origins" ) - MCP_TRANSPORT: str = Field( - default="stdio", description="MCP transport mode: stdio, sse, streamable" - ) HOST: str = Field(default="0.0.0.0", description="Server host") PORT: int = Field(default=8000, description="Server port") UVICORN_LOG_LEVEL: str = Field(default="critical", description="Uvicorn log level") diff --git a/src/dependencies.py b/src/dependencies.py index 781a8fb..87a263a 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -5,6 +5,8 @@ from application.use_cases.index_file_use_case import IndexFileUseCase from application.use_cases.index_folder_use_case import IndexFolderUseCase from application.use_cases.list_files_use_case import ListFilesUseCase +from application.use_cases.list_folders_use_case import ListFoldersUseCase +from application.use_cases.liveness_check_use_case import LivenessCheckUseCase from application.use_cases.multimodal_query_use_case import MultimodalQueryUseCase from application.use_cases.query_use_case import QueryUseCase from application.use_cases.read_file_use_case import ReadFileUseCase @@ -17,6 +19,7 @@ RAGConfig, ) from domain.ports.bm25_engine import BM25EnginePort +from infrastructure.database.asyncpg_health_adapter import AsyncpgHealthAdapter from infrastructure.document_reader.kreuzberg_adapter import KreuzbergAdapter from infrastructure.rag.lightrag_adapter import LightRAGAdapter from infrastructure.rag.pg_textsearch_adapter import PostgresBM25Adapter @@ -51,6 +54,7 @@ bm25_adapter = None kreuzberg_adapter = KreuzbergAdapter() +postgres_health_adapter = AsyncpgHealthAdapter(db_config) def get_index_file_use_case() -> IndexFileUseCase: @@ -81,6 +85,10 @@ def get_list_files_use_case() -> ListFilesUseCase: return ListFilesUseCase(storage=minio_adapter, bucket=minio_config.MINIO_BUCKET) +def get_list_folders_use_case() -> ListFoldersUseCase: + return ListFoldersUseCase(storage=minio_adapter, bucket=minio_config.MINIO_BUCKET) + + def get_read_file_use_case() -> ReadFileUseCase: return ReadFileUseCase( storage=minio_adapter, @@ -88,3 +96,11 @@ def get_read_file_use_case() -> ReadFileUseCase: bucket=minio_config.MINIO_BUCKET, output_dir=app_config.OUTPUT_DIR, ) + + +def get_liveness_check_use_case() -> LivenessCheckUseCase: + return LivenessCheckUseCase( + storage=minio_adapter, + postgres_health=postgres_health_adapter, + bucket=minio_config.MINIO_BUCKET, + ) diff --git a/src/domain/ports/postgres_health_port.py b/src/domain/ports/postgres_health_port.py new file mode 100644 index 0000000..ac41eae --- /dev/null +++ b/src/domain/ports/postgres_health_port.py @@ -0,0 +1,14 @@ +from abc import ABC, abstractmethod + + +class PostgresHealthPort(ABC): + """Abstract port for PostgreSQL connectivity checks.""" + + @abstractmethod + async def ping(self) -> bool: + """Check if PostgreSQL is reachable. + + Returns: + True if the database connection succeeds, False otherwise. + """ + pass diff --git a/src/domain/ports/storage_port.py b/src/domain/ports/storage_port.py index ba50e62..4f5f733 100644 --- a/src/domain/ports/storage_port.py +++ b/src/domain/ports/storage_port.py @@ -62,3 +62,29 @@ async def list_files_metadata( A list of FileInfo objects with object_name, size, and last_modified. """ pass + + @abstractmethod + async def list_folders(self, bucket: str) -> list[str]: + """ + List top-level folder prefixes in the bucket. + + Args: + bucket: The bucket name to list folders from. + + Returns: + A list of folder prefix strings (e.g., ['docs/', 'photos/']). + """ + pass + + @abstractmethod + async def ping(self, bucket: str) -> bool: + """ + Check connectivity to the storage backend. + + Args: + bucket: The bucket name to check. + + Returns: + True if the backend is reachable, False otherwise. + """ + pass diff --git a/src/infrastructure/database/asyncpg_health_adapter.py b/src/infrastructure/database/asyncpg_health_adapter.py new file mode 100644 index 0000000..ba9ff2d --- /dev/null +++ b/src/infrastructure/database/asyncpg_health_adapter.py @@ -0,0 +1,26 @@ +import logging + +import asyncpg + +from config import DatabaseConfig + +logger = logging.getLogger(__name__) + + +class AsyncpgHealthAdapter: + """PostgreSQL health check using asyncpg direct connection.""" + + def __init__(self, db_config: DatabaseConfig) -> None: + self._db_url = db_config.DATABASE_URL.replace("+asyncpg", "") + + async def ping(self) -> bool: + try: + conn = await asyncpg.connect(self._db_url) + try: + await conn.fetchval("SELECT 1") + return True + finally: + await conn.close() + except Exception: + logger.warning("PostgreSQL health check failed", exc_info=True) + return False diff --git a/src/infrastructure/document_reader/kreuzberg_adapter.py b/src/infrastructure/document_reader/kreuzberg_adapter.py index 79e42da..65b93e9 100644 --- a/src/infrastructure/document_reader/kreuzberg_adapter.py +++ b/src/infrastructure/document_reader/kreuzberg_adapter.py @@ -1,4 +1,16 @@ -from kreuzberg import ParsingError, ValidationError, extract_file +"""Kreuzberg adapter for document extraction.""" + +import logging + +from kreuzberg import ( + ExtractionConfig, + OcrConfig, + OutputFormat, + ParsingError, + PdfConfig, + ValidationError, + extract_file, +) from domain.ports.document_reader_port import ( DocumentContent, @@ -7,11 +19,22 @@ TableData, ) +logger = logging.getLogger(__name__) + +_KREUZBERG_CONFIG = ExtractionConfig( + use_cache=True, + output_format=OutputFormat.MARKDOWN, + enable_quality_processing=True, + pdf_options=PdfConfig(extract_images=True, extract_metadata=True), + ocr=OcrConfig(backend="tesseract", language="fra"), +) + class KreuzbergAdapter(DocumentReaderPort): async def extract_content(self, file_path: str) -> DocumentContent: try: - result = await extract_file(file_path) + result = await extract_file(file_path, config=_KREUZBERG_CONFIG) + logger.debug("Full extraction result for %s: %s", file_path, result) except ParsingError as e: raise ValueError(f"Unsupported file format: {e}") from e except ValidationError as e: diff --git a/src/infrastructure/storage/minio_adapter.py b/src/infrastructure/storage/minio_adapter.py index db9dafe..3eb0b9a 100644 --- a/src/infrastructure/storage/minio_adapter.py +++ b/src/infrastructure/storage/minio_adapter.py @@ -80,3 +80,17 @@ async def list_files_metadata( for obj in objects if not obj.is_dir ] + + async def list_folders(self, bucket: str) -> list[str]: + objects = await self._list_minio_objects(bucket, prefix="", recursive=False) + return [obj.object_name for obj in objects if obj.is_dir] + + async def ping(self, bucket: str) -> bool: + try: + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + None, lambda: self.client.bucket_exists(bucket) + ) + except Exception: + logger.warning("MinIO health check failed", exc_info=True) + return False diff --git a/src/main.py b/src/main.py index 992372a..a4efa80 100644 --- a/src/main.py +++ b/src/main.py @@ -2,7 +2,6 @@ import logging import logging.config -import threading from contextlib import asynccontextmanager from pathlib import Path @@ -15,7 +14,8 @@ from application.api.file_routes import file_router from application.api.health_routes import health_router from application.api.indexing_routes import indexing_router -from application.api.mcp_tools import mcp +from application.api.mcp_file_tools import mcp_files +from application.api.mcp_query_tools import mcp_query from application.api.query_routes import query_router from dependencies import app_config, bm25_adapter @@ -53,8 +53,6 @@ logger = logging.getLogger(__name__) -MCP_PATH = "/mcp" - def _run_alembic_upgrade() -> None: """Run Alembic migrations to head.""" @@ -78,26 +76,25 @@ async def db_lifespan(_app: FastAPI): logger.info("Application shutdown complete") -# Create FastAPI app with appropriate lifespan -if app_config.MCP_TRANSPORT == "streamable": - mcp_app = mcp.http_app(path="/") +mcp_query_app = mcp_query.http_app(path="/") +mcp_files_app = mcp_files.http_app(path="/") - @asynccontextmanager - async def combined_lifespan(app: FastAPI): - """Combine database lifecycle with MCP lifecycle for streamable transport.""" - async with db_lifespan(app), mcp_app.lifespan(app): - yield - app = FastAPI( - title="RAG Anything API", - lifespan=combined_lifespan, - ) - app.mount(MCP_PATH, mcp_app) -else: - app = FastAPI( - title="RAG Anything API", - lifespan=db_lifespan, - ) +@asynccontextmanager +async def combined_lifespan(app: FastAPI): + """Combine database lifecycle with both MCP lifespans.""" + async with ( + db_lifespan(app), + mcp_query_app.lifespan(app), + mcp_files_app.lifespan(app), + ): + yield + + +app = FastAPI( + title="RAG Anything API", + lifespan=combined_lifespan, +) app.add_middleware( CORSMiddleware, @@ -113,6 +110,9 @@ async def combined_lifespan(app: FastAPI): app.include_router(query_router, prefix=REST_PATH) app.include_router(file_router, prefix=REST_PATH) +app.mount("/rag/mcp", mcp_query_app) +app.mount("/files/mcp", mcp_files_app) + def run_fastapi(): """Run FastAPI server with uvicorn.""" @@ -132,9 +132,4 @@ def run_fastapi(): if __name__ == "__main__": - if app_config.MCP_TRANSPORT == "stdio": - api_thread = threading.Thread(target=run_fastapi, daemon=True) - api_thread.start() - mcp.run(transport="stdio") - else: - run_fastapi() + run_fastapi() diff --git a/tests/fixtures/external.py b/tests/fixtures/external.py index 9539656..8a3dbb5 100644 --- a/tests/fixtures/external.py +++ b/tests/fixtures/external.py @@ -67,6 +67,7 @@ def mock_storage() -> AsyncMock: last_modified="2026-01-02 00:00:00+00:00", ), ] + mock.list_folders.return_value = ["project/", "documents/"] return mock diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py new file mode 100644 index 0000000..631056b --- /dev/null +++ b/tests/unit/test_config.py @@ -0,0 +1,106 @@ +"""Tests for config.py — property getters with fallback logic. + +These properties have conditional logic (fallbacks, warnings) that +is not exercised by simply instantiating the config. + +NOTE: The project loads .env via dotenv, so tests that need to verify +fallback behavior must explicitly override or remove env vars. +""" + +from unittest.mock import patch + +from config import DatabaseConfig, LLMConfig + + +class TestLLMConfigApiKey: + """Tests for LLMConfig.api_key property with fallback chain.""" + + def test_returns_open_router_api_key_when_set(self) -> None: + """Should return OPEN_ROUTER_API_KEY when it is explicitly set.""" + config = LLMConfig(OPEN_ROUTER_API_KEY="sk-or-key-123") + assert config.api_key == "sk-or-key-123" + + def test_falls_back_to_openrouter_api_key(self) -> None: + """Should fall back to OPENROUTER_API_KEY when OPEN_ROUTER_API_KEY is None.""" + config = LLMConfig( + OPEN_ROUTER_API_KEY=None, OPENROUTER_API_KEY="sk-fallback-key" + ) + assert config.api_key == "sk-fallback-key" + + def test_prefers_open_router_api_key_over_fallback(self) -> None: + """Should prefer OPEN_ROUTER_API_KEY over OPENROUTER_API_KEY.""" + config = LLMConfig( + OPEN_ROUTER_API_KEY="sk-primary", + OPENROUTER_API_KEY="sk-secondary", + ) + assert config.api_key == "sk-primary" + + def test_returns_empty_string_when_both_none(self) -> None: + """Should return empty string and print warning when both keys are None.""" + config = LLMConfig(OPEN_ROUTER_API_KEY=None, OPENROUTER_API_KEY=None) + with patch("builtins.print") as mock_print: + result = config.api_key + mock_print.assert_called_once_with( + "WARNING: OPENROUTER_API_KEY not set. API calls will fail." + ) + assert result == "" + + +class TestLLMConfigApiBaseUrl: + """Tests for LLMConfig.api_base_url property with fallback.""" + + def test_returns_base_url_when_set(self) -> None: + """Should return BASE_URL when it is set.""" + config = LLMConfig(BASE_URL="https://custom.api.com/v1") + assert config.api_base_url == "https://custom.api.com/v1" + + def test_falls_back_to_open_router_api_url(self) -> None: + """Should fall back to OPEN_ROUTER_API_URL when BASE_URL is None.""" + config = LLMConfig(BASE_URL=None) + assert config.api_base_url == "https://openrouter.ai/api/v1" + + def test_prefers_base_url_over_default(self) -> None: + """Should prefer BASE_URL over OPEN_ROUTER_API_URL.""" + config = LLMConfig( + BASE_URL="https://custom.api.com/v1", + OPEN_ROUTER_API_URL="https://other.api.com/v1", + ) + assert config.api_base_url == "https://custom.api.com/v1" + + +class TestDatabaseConfigURL: + """Tests for DatabaseConfig.DATABASE_URL property.""" + + def test_constructs_valid_postgres_url(self) -> None: + """Should construct a proper asyncpg PostgreSQL URL.""" + config = DatabaseConfig( + POSTGRES_USER="myuser", + POSTGRES_PASSWORD="secret", + POSTGRES_HOST="db.example.com", + POSTGRES_PORT="5433", + POSTGRES_DATABASE="mydb", + ) + expected = "postgresql+asyncpg://myuser:secret@db.example.com:5433/mydb" + assert expected == config.DATABASE_URL + + def test_uses_explicit_default_values(self) -> None: + """Should construct URL with the default field values from the model.""" + config = DatabaseConfig( + POSTGRES_USER="raganything", + POSTGRES_PASSWORD="raganything", + POSTGRES_HOST="localhost", + POSTGRES_PORT="5432", + POSTGRES_DATABASE="raganything", + ) + expected = ( + "postgresql+asyncpg://raganything:raganything@localhost:5432/raganything" + ) + assert expected == config.DATABASE_URL + + def test_handles_special_characters_in_password(self) -> None: + """Should include special characters verbatim in the URL.""" + config = DatabaseConfig( + POSTGRES_USER="admin", + POSTGRES_PASSWORD="p@ss:w0rd/complex", + ) + assert "p@ss:w0rd/complex" in config.DATABASE_URL diff --git a/tests/unit/test_file_routes.py b/tests/unit/test_file_routes.py index e612284..e93cd23 100644 --- a/tests/unit/test_file_routes.py +++ b/tests/unit/test_file_routes.py @@ -5,8 +5,13 @@ from httpx import ASGITransport from application.use_cases.list_files_use_case import ListFilesUseCase +from application.use_cases.list_folders_use_case import ListFoldersUseCase from application.use_cases.read_file_use_case import ReadFileUseCase -from dependencies import get_list_files_use_case, get_read_file_use_case +from dependencies import ( + get_list_files_use_case, + get_list_folders_use_case, + get_read_file_use_case, +) from domain.ports.document_reader_port import DocumentContent, DocumentMetadata from domain.ports.storage_port import FileInfo from main import app @@ -206,3 +211,73 @@ async def test_read_file_rejects_path_traversal(self) -> None: ) assert response.status_code == 422 + + +class TestListFoldersRoute: + @pytest.fixture + def mock_list_folders_use_case(self) -> AsyncMock: + mock = AsyncMock(spec=ListFoldersUseCase) + mock.execute.return_value = ["docs/", "photos/"] + return mock + + async def test_list_folders_returns_200( + self, mock_list_folders_use_case: AsyncMock + ) -> None: + app.dependency_overrides[get_list_folders_use_case] = lambda: ( + mock_list_folders_use_case + ) + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/files/folders") + + assert response.status_code == 200 + + async def test_list_folders_returns_folder_list( + self, mock_list_folders_use_case: AsyncMock + ) -> None: + app.dependency_overrides[get_list_folders_use_case] = lambda: ( + mock_list_folders_use_case + ) + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/files/folders") + + body = response.json() + assert body == ["docs/", "photos/"] + + async def test_list_folders_empty_result( + self, mock_list_folders_use_case: AsyncMock + ) -> None: + mock_list_folders_use_case.execute.return_value = [] + app.dependency_overrides[get_list_folders_use_case] = lambda: ( + mock_list_folders_use_case + ) + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/files/folders") + + body = response.json() + assert body == [] + + async def test_list_folders_returns_404_for_missing_bucket( + self, mock_list_folders_use_case: AsyncMock + ) -> None: + mock_list_folders_use_case.execute.side_effect = FileNotFoundError( + "Bucket not found" + ) + app.dependency_overrides[get_list_folders_use_case] = lambda: ( + mock_list_folders_use_case + ) + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/files/folders") + + assert response.status_code == 404 diff --git a/tests/unit/test_health_liveness.py b/tests/unit/test_health_liveness.py new file mode 100644 index 0000000..d922987 --- /dev/null +++ b/tests/unit/test_health_liveness.py @@ -0,0 +1,205 @@ +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest +from httpx import ASGITransport + +from application.use_cases.liveness_check_use_case import LivenessCheckUseCase +from dependencies import get_liveness_check_use_case +from main import app + + +@pytest.fixture(autouse=True) +def _clear_dependency_overrides(): + yield + app.dependency_overrides.clear() + + +class TestLivenessRoute: + async def test_returns_200_when_all_healthy(self) -> None: + mock_use_case = AsyncMock(spec=LivenessCheckUseCase) + mock_use_case.execute.return_value = { + "status": "healthy", + "checks": {"postgres": "ok", "minio": "ok"}, + } + app.dependency_overrides[get_liveness_check_use_case] = lambda: mock_use_case + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/health/live") + + assert response.status_code == 200 + body = response.json() + assert body["status"] == "healthy" + assert body["checks"]["postgres"] == "ok" + assert body["checks"]["minio"] == "ok" + + async def test_returns_503_when_postgres_down(self) -> None: + mock_use_case = AsyncMock(spec=LivenessCheckUseCase) + mock_use_case.execute.return_value = { + "status": "degraded", + "checks": {"postgres": "unreachable", "minio": "ok"}, + } + app.dependency_overrides[get_liveness_check_use_case] = lambda: mock_use_case + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/health/live") + + assert response.status_code == 503 + body = response.json() + assert body["status"] == "degraded" + assert body["checks"]["postgres"] == "unreachable" + + async def test_returns_503_when_minio_down(self) -> None: + mock_use_case = AsyncMock(spec=LivenessCheckUseCase) + mock_use_case.execute.return_value = { + "status": "degraded", + "checks": {"postgres": "ok", "minio": "unreachable"}, + } + app.dependency_overrides[get_liveness_check_use_case] = lambda: mock_use_case + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/health/live") + + assert response.status_code == 503 + body = response.json() + assert body["checks"]["minio"] == "unreachable" + + async def test_returns_503_when_both_down(self) -> None: + mock_use_case = AsyncMock(spec=LivenessCheckUseCase) + mock_use_case.execute.return_value = { + "status": "degraded", + "checks": {"postgres": "unreachable", "minio": "unreachable"}, + } + app.dependency_overrides[get_liveness_check_use_case] = lambda: mock_use_case + + async with httpx.AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + response = await client.get("/api/v1/health/live") + + assert response.status_code == 503 + body = response.json() + assert body["checks"]["postgres"] == "unreachable" + assert body["checks"]["minio"] == "unreachable" + + +class TestLivenessCheckUseCase: + async def test_returns_healthy_when_both_ok(self) -> None: + mock_storage = AsyncMock() + mock_storage.ping.return_value = True + mock_pg = AsyncMock() + mock_pg.ping.return_value = True + + use_case = LivenessCheckUseCase( + storage=mock_storage, + postgres_health=mock_pg, + bucket="test-bucket", + ) + result = await use_case.execute() + + assert result == { + "status": "healthy", + "checks": {"postgres": "ok", "minio": "ok"}, + } + mock_storage.ping.assert_awaited_once_with("test-bucket") + mock_pg.ping.assert_awaited_once() + + async def test_returns_degraded_when_postgres_down(self) -> None: + mock_storage = AsyncMock() + mock_storage.ping.return_value = True + mock_pg = AsyncMock() + mock_pg.ping.return_value = False + + use_case = LivenessCheckUseCase( + storage=mock_storage, + postgres_health=mock_pg, + bucket="test-bucket", + ) + result = await use_case.execute() + + assert result["status"] == "degraded" + assert result["checks"]["postgres"] == "unreachable" + + async def test_returns_degraded_when_minio_down(self) -> None: + mock_storage = AsyncMock() + mock_storage.ping.return_value = False + mock_pg = AsyncMock() + mock_pg.ping.return_value = True + + use_case = LivenessCheckUseCase( + storage=mock_storage, + postgres_health=mock_pg, + bucket="test-bucket", + ) + result = await use_case.execute() + + assert result["status"] == "degraded" + assert result["checks"]["minio"] == "unreachable" + + +class TestAsyncpgHealthAdapter: + @patch("infrastructure.database.asyncpg_health_adapter.asyncpg") + async def test_returns_true_on_success(self, mock_asyncpg: MagicMock) -> None: + from config import DatabaseConfig + from infrastructure.database.asyncpg_health_adapter import AsyncpgHealthAdapter + + mock_conn = AsyncMock() + mock_conn.fetchval.return_value = 1 + mock_asyncpg.connect = AsyncMock(return_value=mock_conn) + + adapter = AsyncpgHealthAdapter(DatabaseConfig()) + result = await adapter.ping() + + assert result is True + mock_conn.close.assert_awaited_once() + + @patch("infrastructure.database.asyncpg_health_adapter.asyncpg") + async def test_returns_false_on_connection_error( + self, mock_asyncpg: MagicMock + ) -> None: + from config import DatabaseConfig + from infrastructure.database.asyncpg_health_adapter import AsyncpgHealthAdapter + + mock_asyncpg.connect = AsyncMock(side_effect=ConnectionRefusedError) + + adapter = AsyncpgHealthAdapter(DatabaseConfig()) + result = await adapter.ping() + + assert result is False + + +class TestMinioAdapterPing: + @patch("infrastructure.storage.minio_adapter.Minio") + async def test_returns_true_when_bucket_exists( + self, mock_minio_cls: MagicMock + ) -> None: + from infrastructure.storage.minio_adapter import MinioAdapter + + mock_client = MagicMock() + mock_client.bucket_exists.return_value = True + mock_minio_cls.return_value = mock_client + + adapter = MinioAdapter("localhost:9000", "access", "secret") + result = await adapter.ping("test-bucket") + + assert result is True + mock_client.bucket_exists.assert_called_once_with("test-bucket") + + @patch("infrastructure.storage.minio_adapter.Minio") + async def test_returns_false_on_exception(self, mock_minio_cls: MagicMock) -> None: + from infrastructure.storage.minio_adapter import MinioAdapter + + mock_client = MagicMock() + mock_client.bucket_exists.side_effect = Exception("connection refused") + mock_minio_cls.return_value = mock_client + + adapter = MinioAdapter("localhost:9000", "access", "secret") + result = await adapter.ping("test-bucket") + + assert result is False diff --git a/tests/unit/test_lightrag_adapter_multimodal.py b/tests/unit/test_lightrag_adapter_multimodal.py new file mode 100644 index 0000000..3a778fb --- /dev/null +++ b/tests/unit/test_lightrag_adapter_multimodal.py @@ -0,0 +1,252 @@ +"""Tests for LightRAGAdapter.query_multimodal() and _build_vision_messages(). + +These methods exist in lightrag_adapter.py but have uncovered paths. +The RAG engine (RAGAnything) is an external dependency, so we mock it. +""" + +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from application.requests.query_request import MultimodalContentItem +from infrastructure.rag.lightrag_adapter import ( + LightRAGAdapter, + _build_vision_messages, +) + + +@pytest.fixture +def mock_llm_config() -> MagicMock: + config = MagicMock() + config.CHAT_MODEL = "test-model" + config.VISION_MODEL = "test-vision" + config.EMBEDDING_MODEL = "test-embed" + config.EMBEDDING_DIM = 1536 + config.MAX_TOKEN_SIZE = 8192 + config.api_key = "test-key" + config.api_base_url = "http://test" + return config + + +@pytest.fixture +def mock_rag_config() -> MagicMock: + config = MagicMock() + config.ENABLE_IMAGE_PROCESSING = True + config.ENABLE_TABLE_PROCESSING = True + config.ENABLE_EQUATION_PROCESSING = True + config.MAX_CONCURRENT_FILES = 1 + config.COSINE_THRESHOLD = 0.2 + config.RAG_STORAGE_TYPE = "postgres" + return config + + +@pytest.fixture +def adapter(mock_llm_config: MagicMock, mock_rag_config: MagicMock) -> LightRAGAdapter: + return LightRAGAdapter(llm_config=mock_llm_config, rag_config=mock_rag_config) + + +class TestQueryMultimodal: + """Tests for LightRAGAdapter.query_multimodal.""" + + async def test_returns_multimodal_result(self, adapter: LightRAGAdapter) -> None: + """Should call rag.aquery_with_multimodal and return its string result.""" + mock_rag = AsyncMock() + mock_rag._ensure_lightrag_initialized = AsyncMock() + mock_rag.aquery_with_multimodal.return_value = ( + "Image shows a bar chart with sales data." + ) + + # Pre-populate the rag dict so _ensure_initialized succeeds + adapter.rag["/tmp/project"] = mock_rag + + content = [ + MultimodalContentItem(type="image", img_path="/tmp/chart.png"), + ] + + result = await adapter.query_multimodal( + query="What does this image show?", + multimodal_content=content, + mode="hybrid", + top_k=5, + working_dir="/tmp/project", + ) + + assert result == "Image shows a bar chart with sales data." + mock_rag.aquery_with_multimodal.assert_called_once_with( + query="What does this image show?", + multimodal_content=[item.model_dump(exclude_none=True) for item in content], + mode="hybrid", + top_k=5, + ) + + async def test_raises_runtime_error_when_not_initialized( + self, adapter: LightRAGAdapter + ) -> None: + """Should raise RuntimeError if working_dir was never initialized.""" + content = [ + MultimodalContentItem(type="image", img_path="/tmp/img.png"), + ] + + with pytest.raises( + RuntimeError, + match="RAG engine not initialized.*Call init_project", + ): + await adapter.query_multimodal( + query="test", + multimodal_content=content, + working_dir="/tmp/uninitialized", + ) + + async def test_passes_mode_and_top_k(self, adapter: LightRAGAdapter) -> None: + """Should forward mode and top_k to aquery_with_multimodal.""" + mock_rag = AsyncMock() + mock_rag._ensure_lightrag_initialized = AsyncMock() + mock_rag.aquery_with_multimodal.return_value = "result" + adapter.rag["/tmp/project"] = mock_rag + + content = [ + MultimodalContentItem(type="table", table_data="A,B\n1,2"), + ] + + await adapter.query_multimodal( + query="Analyze", + multimodal_content=content, + mode="local", + top_k=20, + working_dir="/tmp/project", + ) + + call_kwargs = mock_rag.aquery_with_multimodal.call_args[1] + assert call_kwargs["mode"] == "local" + assert call_kwargs["top_k"] == 20 + + async def test_serializes_multimodal_items_excluding_none( + self, adapter: LightRAGAdapter + ) -> None: + """Should serialize content items with exclude_none=True.""" + mock_rag = AsyncMock() + mock_rag._ensure_lightrag_initialized = AsyncMock() + mock_rag.aquery_with_multimodal.return_value = "result" + adapter.rag["/tmp/project"] = mock_rag + + content = [ + MultimodalContentItem( + type="equation", + latex="E = mc^2", + equation_caption="Mass-energy equivalence", + ), + ] + + await adapter.query_multimodal( + query="Explain", + multimodal_content=content, + working_dir="/tmp/project", + ) + + raw_content = mock_rag.aquery_with_multimodal.call_args[1]["multimodal_content"] + # img_path and image_data should NOT be present since they are None + assert "img_path" not in raw_content[0] + assert "image_data" not in raw_content[0] + assert raw_content[0]["type"] == "equation" + assert raw_content[0]["latex"] == "E = mc^2" + + +class TestBuildVisionMessages: + """Tests for the module-level _build_vision_messages helper.""" + + def test_builds_messages_with_image_data_string(self) -> None: + """Should wrap base64 image_data in a data URI.""" + messages = _build_vision_messages( + system_prompt="You are a vision assistant.", + history_messages=[], + prompt="What is this?", + image_data="iVBORw0KGgo=", + ) + + # system message + user message = 2 + assert len(messages) == 2 + assert messages[0]["role"] == "system" + assert messages[0]["content"] == "You are a vision assistant." + user_msg = messages[1] + assert user_msg["role"] == "user" + assert len(user_msg["content"]) == 2 # text + image + assert user_msg["content"][0]["type"] == "text" + assert user_msg["content"][1]["type"] == "image_url" + assert user_msg["content"][1]["image_url"]["url"].startswith( + "data:image/jpeg;base64," + ) + + def test_builds_messages_with_image_url(self) -> None: + """Should use http URLs directly without base64 prefix.""" + messages = _build_vision_messages( + system_prompt=None, + history_messages=[], + prompt="Describe", + image_data="https://example.com/image.png", + ) + + user_msg = messages[0] + assert ( + user_msg["content"][1]["image_url"]["url"] + == "https://example.com/image.png" + ) + + def test_builds_messages_with_multiple_images(self) -> None: + """Should handle list of images in image_data.""" + messages = _build_vision_messages( + system_prompt=None, + history_messages=[], + prompt="Compare these", + image_data=["https://img1.png", "https://img2.png"], + ) + + user_msg = messages[0] + # 1 text + 2 images = 3 content items + assert len(user_msg["content"]) == 3 + assert user_msg["content"][1]["image_url"]["url"] == "https://img1.png" + assert user_msg["content"][2]["image_url"]["url"] == "https://img2.png" + + def test_builds_messages_without_system_prompt(self) -> None: + """Should skip system message when system_prompt is None.""" + messages = _build_vision_messages( + system_prompt=None, + history_messages=[], + prompt="Hello", + image_data=None, + ) + + assert len(messages) == 1 + assert messages[0]["role"] == "user" + + def test_includes_history_messages(self) -> None: + """Should include history messages before the user message.""" + history = [ + {"role": "assistant", "content": "Previous answer."}, + {"role": "user", "content": "Follow-up question."}, + ] + + messages = _build_vision_messages( + system_prompt="Be helpful.", + history_messages=history, + prompt="Next question", + image_data=None, + ) + + # system, history[0], history[1], user + assert len(messages) == 4 + assert messages[1]["role"] == "assistant" + assert messages[2]["role"] == "user" + assert messages[3]["role"] == "user" + + def test_builds_text_only_when_no_image_data(self) -> None: + """Should create a text-only user message when image_data is None.""" + messages = _build_vision_messages( + system_prompt=None, + history_messages=[], + prompt="Just text", + image_data=None, + ) + + user_msg = messages[0] + assert len(user_msg["content"]) == 1 + assert user_msg["content"][0] == {"type": "text", "text": "Just text"} diff --git a/tests/unit/test_list_folders_use_case.py b/tests/unit/test_list_folders_use_case.py new file mode 100644 index 0000000..1109927 --- /dev/null +++ b/tests/unit/test_list_folders_use_case.py @@ -0,0 +1,34 @@ +from unittest.mock import AsyncMock + +from application.use_cases.list_folders_use_case import ListFoldersUseCase + + +class TestListFoldersUseCase: + async def test_execute_calls_storage_list_folders( + self, mock_storage: AsyncMock + ) -> None: + mock_storage.list_folders.return_value = ["docs/", "photos/"] + use_case = ListFoldersUseCase(storage=mock_storage, bucket="test-bucket") + + await use_case.execute() + + mock_storage.list_folders.assert_called_once_with("test-bucket") + + async def test_execute_returns_folder_prefixes( + self, mock_storage: AsyncMock + ) -> None: + expected_folders = ["docs/", "photos/", "reports/"] + mock_storage.list_folders.return_value = expected_folders + use_case = ListFoldersUseCase(storage=mock_storage, bucket="test-bucket") + + result = await use_case.execute() + + assert result == expected_folders + + async def test_execute_empty_result(self, mock_storage: AsyncMock) -> None: + mock_storage.list_folders.return_value = [] + use_case = ListFoldersUseCase(storage=mock_storage, bucket="test-bucket") + + result = await use_case.execute() + + assert result == [] diff --git a/tests/unit/test_mcp_file_tools.py b/tests/unit/test_mcp_file_tools.py new file mode 100644 index 0000000..b50f1e3 --- /dev/null +++ b/tests/unit/test_mcp_file_tools.py @@ -0,0 +1,177 @@ +"""Tests for mcp_file_tools.py — file MCP tools registered with FastMCP.""" + +from unittest.mock import AsyncMock, patch + +import pytest + +from application.api.mcp_file_tools import ( + list_files, + mcp_files, + read_file, +) +from domain.ports.document_reader_port import DocumentContent, DocumentMetadata +from domain.ports.storage_port import FileInfo + + +class TestMCPFilesInstance: + """Verify the FastMCP instance configuration.""" + + def test_mcp_files_has_correct_name(self) -> None: + """mcp_files should be named 'RAGAnythingFiles'.""" + assert mcp_files.name == "RAGAnythingFiles" + + +class TestListFiles: + """Tests for the list_files MCP tool.""" + + @pytest.fixture + def mock_files(self) -> list[FileInfo]: + return [ + FileInfo( + object_name="project/doc1.pdf", + size=1024, + last_modified="2026-01-01 00:00:00+00:00", + ), + FileInfo( + object_name="project/doc2.pdf", + size=2048, + last_modified="2026-01-02 00:00:00+00:00", + ), + ] + + async def test_returns_file_info_list(self, mock_files: list[FileInfo]) -> None: + """Should call use_case.execute and return file info list.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_files + + with patch( + "application.api.mcp_file_tools.get_list_files_use_case", + return_value=mock_use_case, + ): + result = await list_files(prefix="project/") + + assert isinstance(result, list) + assert len(result) == 2 + assert result[0].object_name == "project/doc1.pdf" + assert result[0].size == 1024 + + async def test_uses_default_prefix_and_recursive( + self, mock_files: list[FileInfo] + ) -> None: + """Should use default prefix='' and recursive=True.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_files + + with patch( + "application.api.mcp_file_tools.get_list_files_use_case", + return_value=mock_use_case, + ): + await list_files() + + mock_use_case.execute.assert_called_once_with(prefix="", recursive=True) + + async def test_calls_use_case_with_custom_prefix( + self, mock_files: list[FileInfo] + ) -> None: + """Should forward custom prefix and recursive to use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_files + + with patch( + "application.api.mcp_file_tools.get_list_files_use_case", + return_value=mock_use_case, + ): + await list_files(prefix="reports/", recursive=False) + + mock_use_case.execute.assert_called_once_with( + prefix="reports/", recursive=False + ) + + async def test_returns_empty_list_when_no_files(self) -> None: + """Should return empty list when no files match prefix.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = [] + + with patch( + "application.api.mcp_file_tools.get_list_files_use_case", + return_value=mock_use_case, + ): + result = await list_files(prefix="nonexistent/") + + assert result == [] + + +class TestReadFile: + """Tests for the read_file MCP tool.""" + + @pytest.fixture + def mock_document_content(self) -> DocumentContent: + return DocumentContent( + content="Extracted text from the document.", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[], + ) + + async def test_returns_file_content_response( + self, mock_document_content: DocumentContent + ) -> None: + """Should call use_case.execute and return FileContentResponse.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_document_content + + with patch( + "application.api.mcp_file_tools.get_read_file_use_case", + return_value=mock_use_case, + ): + result = await read_file(file_path="documents/report.pdf") + + assert result.content == "Extracted text from the document." + assert result.metadata.mime_type == "application/pdf" + + async def test_raises_value_error_for_file_not_found(self) -> None: + """Should convert FileNotFoundError to ValueError with helpful message.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = FileNotFoundError + + with ( + patch( + "application.api.mcp_file_tools.get_read_file_use_case", + return_value=mock_use_case, + ), + pytest.raises(ValueError, match="File not found: missing.pdf"), + ): + await read_file(file_path="missing.pdf") + + async def test_raises_runtime_error_for_generic_failure(self) -> None: + """Should convert generic exceptions to RuntimeError.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = Exception("Disk full") + + with ( + patch( + "application.api.mcp_file_tools.get_read_file_use_case", + return_value=mock_use_case, + ), + pytest.raises(RuntimeError, match="Failed to read file"), + ): + await read_file(file_path="documents/broken.pdf") + + async def test_includes_tables_in_response(self) -> None: + """Should include tables in the FileContentResponse.""" + from domain.ports.document_reader_port import TableData + + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = DocumentContent( + content="Report with table", + metadata=DocumentMetadata(format_type="pdf", mime_type="application/pdf"), + tables=[TableData(markdown="| A | B |\n|---|---|")], + ) + + with patch( + "application.api.mcp_file_tools.get_read_file_use_case", + return_value=mock_use_case, + ): + result = await read_file(file_path="docs/table.pdf") + + assert len(result.tables) == 1 + assert result.tables[0].markdown == "| A | B |\n|---|---|" diff --git a/tests/unit/test_mcp_query_tools.py b/tests/unit/test_mcp_query_tools.py new file mode 100644 index 0000000..40b6826 --- /dev/null +++ b/tests/unit/test_mcp_query_tools.py @@ -0,0 +1,379 @@ +"""Tests for mcp_query_tools.py — query MCP tools registered with FastMCP.""" + +from unittest.mock import AsyncMock, patch + +import pytest + +from application.api.mcp_query_tools import ( + mcp_query, + query_knowledge_base, + query_knowledge_base_multimodal, +) +from application.requests.query_request import MultimodalContentItem +from application.responses.query_response import ChunkResponse + + +class TestMCPQueryInstance: + """Verify the FastMCP instance configuration.""" + + def test_mcp_query_has_correct_name(self) -> None: + """mcp_query should be named 'RAGAnythingQuery'.""" + assert mcp_query.name == "RAGAnythingQuery" + + +class TestQueryKnowledgeBase: + """Tests for the query_knowledge_base MCP tool.""" + + @pytest.fixture + def mock_query_result(self) -> dict: + return { + "status": "success", + "message": "", + "data": { + "entities": [], + "relationships": [], + "chunks": [ + { + "reference_id": "1", + "content": "Relevant chunk content", + "file_path": "/docs/report.pdf", + }, + ], + "references": [], + }, + } + + async def test_returns_chunks_from_use_case(self, mock_query_result: dict) -> None: + """Should call use_case.execute and return the chunk list.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + result = await query_knowledge_base( + working_dir="/tmp/rag/project_1", + query="What is the summary?", + ) + + assert isinstance(result, list) + assert len(result) == 1 + assert isinstance(result[0], ChunkResponse) + assert result[0].content == "Relevant chunk content" + + async def test_calls_use_case_with_defaults(self, mock_query_result: dict) -> None: + """Should pass default mode='hybrid' and top_k=5 to use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base( + working_dir="/tmp/rag/test", + query="test query", + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="test query", + mode="hybrid", + top_k=5, + ) + + async def test_calls_use_case_with_custom_mode_and_top_k( + self, mock_query_result: dict + ) -> None: + """Should forward custom mode and top_k to use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base( + working_dir="/tmp/rag/project_42", + query="What are the findings?", + mode="local", + top_k=20, + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/project_42", + query="What are the findings?", + mode="local", + top_k=20, + ) + + async def test_handles_naive_mode(self, mock_query_result: dict) -> None: + """Should work with naive mode (vector search only).""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + result = await query_knowledge_base( + working_dir="/tmp/rag/test", + query="search", + mode="naive", + ) + + assert isinstance(result, list) + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="search", + mode="naive", + top_k=5, + ) + + async def test_handles_global_mode(self, mock_query_result: dict) -> None: + """Should work with global mode.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base( + working_dir="/tmp/rag/test", + query="search", + mode="global", + top_k=10, + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="search", + mode="global", + top_k=10, + ) + + async def test_handles_mix_mode(self, mock_query_result: dict) -> None: + """Should work with mix mode.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base( + working_dir="/tmp/rag/test", + query="search", + mode="mix", + top_k=15, + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="search", + mode="mix", + top_k=15, + ) + + async def test_handles_hybrid_plus_mode(self, mock_query_result: dict) -> None: + """Should work with hybrid+ mode (BM25 + vector).""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = mock_query_result + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base( + working_dir="/tmp/rag/test", + query="search", + mode="hybrid+", + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="search", + mode="hybrid+", + top_k=5, + ) + + async def test_returns_empty_chunks_when_no_results(self) -> None: + """Should return empty list when query returns no chunks.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = { + "status": "success", + "message": "", + "data": { + "entities": [], + "relationships": [], + "chunks": [], + "references": [], + }, + } + + with patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ): + result = await query_knowledge_base( + working_dir="/tmp/rag/empty", + query="nothing matches", + ) + + assert result == [] + + async def test_propagates_use_case_error(self) -> None: + """Should let exceptions from the use case propagate.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = RuntimeError("RAG engine failure") + + with ( + patch( + "application.api.mcp_query_tools.get_query_use_case", + return_value=mock_use_case, + ), + pytest.raises(RuntimeError, match="RAG engine failure"), + ): + await query_knowledge_base( + working_dir="/tmp/rag/test", + query="will fail", + ) + + +class TestQueryKnowledgeBaseMultimodal: + """Tests for the query_knowledge_base_multimodal MCP tool.""" + + @pytest.fixture + def multimodal_content(self) -> list[MultimodalContentItem]: + return [ + MultimodalContentItem(type="image", img_path="/tmp/images/chart.png"), + ] + + async def test_returns_result_from_use_case( + self, multimodal_content: list[MultimodalContentItem] + ) -> None: + """Should call multimodal use case and return its result.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = { + "status": "success", + "data": "The chart shows increasing revenue.", + } + + with patch( + "application.api.mcp_query_tools.get_multimodal_query_use_case", + return_value=mock_use_case, + ): + result = await query_knowledge_base_multimodal( + working_dir="/tmp/rag/project_1", + query="What does this image show?", + multimodal_content=multimodal_content, + ) + + assert result["status"] == "success" + assert result["data"] == "The chart shows increasing revenue." + + async def test_calls_use_case_with_defaults( + self, multimodal_content: list[MultimodalContentItem] + ) -> None: + """Should pass default mode='hybrid' and top_k=5.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = {"status": "success", "data": ""} + + with patch( + "application.api.mcp_query_tools.get_multimodal_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base_multimodal( + working_dir="/tmp/rag/test", + query="Describe", + multimodal_content=multimodal_content, + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="Describe", + multimodal_content=multimodal_content, + mode="hybrid", + top_k=5, + ) + + async def test_calls_use_case_with_custom_params( + self, + multimodal_content: list[MultimodalContentItem], # noqa: ARG002 + ) -> None: + """Should forward custom mode and top_k to use case.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = {"status": "success", "data": ""} + + with patch( + "application.api.mcp_query_tools.get_multimodal_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base_multimodal( + working_dir="/tmp/rag/project_42", + query="Analyze this table", + multimodal_content=[ + MultimodalContentItem( + type="table", + table_data="A,B\n1,2", + table_caption="Test table", + ), + ], + mode="global", + top_k=20, + ) + + call_kwargs = mock_use_case.execute.call_args[1] + assert call_kwargs["working_dir"] == "/tmp/rag/project_42" + assert call_kwargs["query"] == "Analyze this table" + assert call_kwargs["mode"] == "global" + assert call_kwargs["top_k"] == 20 + assert len(call_kwargs["multimodal_content"]) == 1 + + async def test_handles_naive_mode( + self, multimodal_content: list[MultimodalContentItem] + ) -> None: + """Should work with naive mode.""" + mock_use_case = AsyncMock() + mock_use_case.execute.return_value = {"status": "success", "data": ""} + + with patch( + "application.api.mcp_query_tools.get_multimodal_query_use_case", + return_value=mock_use_case, + ): + await query_knowledge_base_multimodal( + working_dir="/tmp/rag/test", + query="search", + multimodal_content=multimodal_content, + mode="naive", + ) + + mock_use_case.execute.assert_called_once_with( + working_dir="/tmp/rag/test", + query="search", + multimodal_content=multimodal_content, + mode="naive", + top_k=5, + ) + + async def test_propagates_use_case_error( + self, multimodal_content: list[MultimodalContentItem] + ) -> None: + """Should let exceptions from the use case propagate.""" + mock_use_case = AsyncMock() + mock_use_case.execute.side_effect = RuntimeError("Vision model failed") + + with ( + patch( + "application.api.mcp_query_tools.get_multimodal_query_use_case", + return_value=mock_use_case, + ), + pytest.raises(RuntimeError, match="Vision model failed"), + ): + await query_knowledge_base_multimodal( + working_dir="/tmp/rag/test", + query="will fail", + multimodal_content=multimodal_content, + ) diff --git a/tests/unit/test_minio_adapter.py b/tests/unit/test_minio_adapter.py new file mode 100644 index 0000000..f842a12 --- /dev/null +++ b/tests/unit/test_minio_adapter.py @@ -0,0 +1,321 @@ +"""Tests for MinioAdapter — the MinIO implementation of StoragePort. + +MinIO client is an external dependency (third-party S3-compatible service), +so we mock the Minio client itself while testing our adapter logic. +""" + +from datetime import UTC, datetime +from unittest.mock import MagicMock, patch + +import pytest +from minio.error import S3Error + +from domain.ports.storage_port import FileInfo +from infrastructure.storage.minio_adapter import MinioAdapter + + +@pytest.fixture +def mock_minio_client() -> MagicMock: + """Provide a mocked Minio client.""" + return MagicMock() + + +@pytest.fixture +def adapter(mock_minio_client: MagicMock) -> MinioAdapter: + """Provide a MinioAdapter with mocked client.""" + with patch( + "infrastructure.storage.minio_adapter.Minio", + return_value=mock_minio_client, + ): + adapter = MinioAdapter( + host="localhost:9000", + access="minioadmin", + secret="minioadmin", + secure=False, + ) + # Replace client directly to ensure mock is used + adapter.client = mock_minio_client + return adapter + + +class TestGetObject: + """Tests for MinioAdapter.get_object.""" + + async def test_returns_object_bytes_on_success( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should return the raw bytes of the requested object.""" + mock_response = MagicMock() + mock_response.read.return_value = b"file content here" + mock_response.close = MagicMock() + mock_response.release_conn = MagicMock() + mock_minio_client.get_object.return_value = mock_response + + result = await adapter.get_object("my-bucket", "docs/report.pdf") + + assert result == b"file content here" + mock_response.close.assert_called_once() + mock_response.release_conn.assert_called_once() + + async def test_raises_file_not_found_for_no_such_key( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should convert S3Error NoSuchKey to FileNotFoundError.""" + mock_minio_client.get_object.side_effect = S3Error( + response=None, + code="NoSuchKey", + message="The specified key does not exist.", + resource="resource", + request_id="request_id", + host_id="host_id", + ) + + with pytest.raises(FileNotFoundError, match="Object not found"): + await adapter.get_object("my-bucket", "missing.pdf") + + async def test_raises_file_not_found_for_no_such_bucket( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should convert S3Error NoSuchBucket to FileNotFoundError.""" + mock_minio_client.get_object.side_effect = S3Error( + response=None, + code="NoSuchBucket", + message="The specified bucket does not exist.", + resource="resource", + request_id="request_id", + host_id="host_id", + ) + + with pytest.raises(FileNotFoundError, match="Object not found"): + await adapter.get_object("bad-bucket", "any/path") + + async def test_re_raises_other_s3_errors( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should re-raise S3Error for non-404 error codes like AccessDenied.""" + mock_minio_client.get_object.side_effect = S3Error( + response=None, + code="AccessDenied", + message="Access Denied.", + resource="resource", + request_id="request_id", + host_id="host_id", + ) + + with pytest.raises(S3Error) as exc_info: + await adapter.get_object("my-bucket", "private/doc.pdf") + + assert exc_info.value.code == "AccessDenied" + + +class TestListObjects: + """Tests for MinioAdapter.list_objects.""" + + async def test_returns_object_names_filtering_dirs( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should return only non-directory object names.""" + mock_obj1 = MagicMock() + mock_obj1.object_name = "docs/report.pdf" + mock_obj1.is_dir = False + + mock_obj2 = MagicMock() + mock_obj2.object_name = "docs/" + mock_obj2.is_dir = True + + mock_obj3 = MagicMock() + mock_obj3.object_name = "docs/notes.txt" + mock_obj3.is_dir = False + + mock_minio_client.list_objects.return_value = [mock_obj1, mock_obj2, mock_obj3] + + result = await adapter.list_objects("my-bucket", "docs/", recursive=True) + + assert result == ["docs/report.pdf", "docs/notes.txt"] + mock_minio_client.list_objects.assert_called_once_with( + "my-bucket", prefix="docs/", recursive=True + ) + + async def test_returns_empty_list_when_no_objects( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should return empty list when bucket is empty.""" + mock_minio_client.list_objects.return_value = [] + + result = await adapter.list_objects("my-bucket", "", recursive=True) + + assert result == [] + + +class TestListFilesMetadata: + """Tests for MinioAdapter.list_files_metadata.""" + + async def test_returns_file_info_list( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should return FileInfo list with size and last_modified.""" + dt = datetime(2026, 3, 15, 10, 30, 0, tzinfo=UTC) + + mock_obj = MagicMock() + mock_obj.object_name = "data/file.csv" + mock_obj.is_dir = False + mock_obj.size = 4096 + mock_obj.last_modified = dt + + mock_dir = MagicMock() + mock_dir.object_name = "data/" + mock_dir.is_dir = True + + mock_minio_client.list_objects.return_value = [mock_obj, mock_dir] + + result = await adapter.list_files_metadata("my-bucket", "data/", recursive=True) + + assert len(result) == 1 + assert isinstance(result[0], FileInfo) + assert result[0].object_name == "data/file.csv" + assert result[0].size == 4096 + assert result[0].last_modified == str(dt) + + async def test_handles_none_size_as_zero( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should treat None size as 0 in FileInfo.""" + mock_obj = MagicMock() + mock_obj.object_name = "unknown-size-file" + mock_obj.is_dir = False + mock_obj.size = None + mock_obj.last_modified = datetime(2026, 1, 1, tzinfo=UTC) + + mock_minio_client.list_objects.return_value = [mock_obj] + + result = await adapter.list_files_metadata("my-bucket", "") + + assert result[0].size == 0 + + async def test_handles_none_last_modified_as_none( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should set last_modified to None when MinIO returns None.""" + mock_obj = MagicMock() + mock_obj.object_name = "no-date-file" + mock_obj.is_dir = False + mock_obj.size = 100 + mock_obj.last_modified = None + + mock_minio_client.list_objects.return_value = [mock_obj] + + result = await adapter.list_files_metadata("my-bucket", "") + + assert result[0].last_modified is None + + +class TestListMinioObjects: + """Tests for MinioAdapter._list_minio_objects (internal helper).""" + + async def test_raises_file_not_found_for_missing_bucket( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should convert NoSuchBucket S3Error to FileNotFoundError.""" + mock_minio_client.list_objects.side_effect = S3Error( + response=None, + code="NoSuchBucket", + message="The specified bucket does not exist.", + resource="resource", + request_id="request_id", + host_id="host_id", + ) + + with pytest.raises(FileNotFoundError, match="Bucket not found"): + await adapter._list_minio_objects("bad-bucket", "", recursive=True) + + async def test_re_raises_non_bucket_s3_errors( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + """Should re-raise S3Errors that are not NoSuchBucket.""" + mock_minio_client.list_objects.side_effect = S3Error( + response=None, + code="InternalError", + message="We encountered an internal error.", + resource="resource", + request_id="request_id", + host_id="host_id", + ) + + with pytest.raises(S3Error) as exc_info: + await adapter._list_minio_objects("my-bucket", "", recursive=True) + + assert exc_info.value.code == "InternalError" + + +class TestListFolders: + """Tests for MinioAdapter.list_folders.""" + + async def test_returns_only_dir_objects( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + mock_dir1 = MagicMock() + mock_dir1.object_name = "docs/" + mock_dir1.is_dir = True + + mock_file = MagicMock() + mock_file.object_name = "docs/report.pdf" + mock_file.is_dir = False + + mock_dir2 = MagicMock() + mock_dir2.object_name = "photos/" + mock_dir2.is_dir = True + + mock_minio_client.list_objects.return_value = [mock_dir1, mock_file, mock_dir2] + + result = await adapter.list_folders("my-bucket") + + assert result == ["docs/", "photos/"] + + async def test_returns_empty_when_no_dirs( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + mock_file = MagicMock() + mock_file.object_name = "report.pdf" + mock_file.is_dir = False + + mock_minio_client.list_objects.return_value = [mock_file] + + result = await adapter.list_folders("my-bucket") + + assert result == [] + + async def test_excludes_files_from_result( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + mock_dir = MagicMock() + mock_dir.object_name = "archive/" + mock_dir.is_dir = True + + mock_file1 = MagicMock() + mock_file1.object_name = "readme.txt" + mock_file1.is_dir = False + + mock_file2 = MagicMock() + mock_file2.object_name = "data.csv" + mock_file2.is_dir = False + + mock_minio_client.list_objects.return_value = [mock_dir, mock_file1, mock_file2] + + result = await adapter.list_folders("my-bucket") + + assert result == ["archive/"] + + async def test_propagates_file_not_found_for_missing_bucket( + self, adapter: MinioAdapter, mock_minio_client: MagicMock + ) -> None: + mock_minio_client.list_objects.side_effect = S3Error( + response=None, + code="NoSuchBucket", + message="The specified bucket does not exist.", + resource="resource", + request_id="request_id", + host_id="host_id", + ) + + with pytest.raises(FileNotFoundError, match="Bucket not found"): + await adapter.list_folders("bad-bucket") diff --git a/tests/unit/test_requests_indexing.py b/tests/unit/test_requests_indexing.py new file mode 100644 index 0000000..b5eb8cc --- /dev/null +++ b/tests/unit/test_requests_indexing.py @@ -0,0 +1,65 @@ +"""Tests for _coerce_file_extensions validator in indexing_request.py. + +The validator converts various input types into a consistent list[str] | None +format for the IndexFolderRequest.file_extensions field. +""" + +from application.requests.indexing_request import ( + IndexFolderRequest, + _coerce_file_extensions, +) + + +class TestCoerceFileExtensions: + """Tests for the _coerce_file_extensions validator function.""" + + def test_string_input_wrapped_in_list(self) -> None: + """A single string should be wrapped in a list.""" + result = _coerce_file_extensions(".pdf") + assert result == [".pdf"] + + def test_list_passthrough(self) -> None: + """A list should be returned unchanged.""" + extensions = [".pdf", ".docx", ".txt"] + result = _coerce_file_extensions(extensions) + assert result == extensions + + def test_none_returns_none(self) -> None: + """None input should return None.""" + result = _coerce_file_extensions(None) + assert result is None + + def test_empty_string_returns_none(self) -> None: + """Empty string should be treated as None.""" + result = _coerce_file_extensions("") + assert result is None + + def test_single_extension_string(self) -> None: + """A single extension string should produce a single-element list.""" + result = _coerce_file_extensions(".xlsx") + assert result == [".xlsx"] + + +class TestIndexFolderRequestFileExtensions: + """Integration tests for file_extensions coercion via Pydantic model.""" + + def test_accepts_list_of_extensions(self) -> None: + """Should accept a list of file extensions directly.""" + request = IndexFolderRequest( + working_dir="/tmp/test", + file_extensions=[".pdf", ".docx"], + ) + assert request.file_extensions == [".pdf", ".docx"] + + def test_coerces_single_string_to_list(self) -> None: + """Should coerce a single string into a one-element list.""" + request = IndexFolderRequest( + working_dir="/tmp/test", + file_extensions=".pdf", + ) + assert request.file_extensions == [".pdf"] + + def test_defaults_to_none(self) -> None: + """Should default to None when file_extensions is not provided.""" + request = IndexFolderRequest(working_dir="/tmp/test") + assert request.file_extensions is None diff --git a/uv.lock b/uv.lock index 0cb893f..d49d462 100644 --- a/uv.lock +++ b/uv.lock @@ -1176,6 +1176,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/10/5da547df7a391dcde17f59520a231527b8571e6f46fc8efb02ccb370ab12/docutils-0.22.4-py3-none-any.whl", hash = "sha256:d0013f540772d1420576855455d050a2180186c91c15779301ac2ccb3eeb68de", size = 633196, upload-time = "2025-12-18T19:00:18.077Z" }, ] +[[package]] +name = "easyocr" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ninja", marker = "python_full_version < '3.14'" }, + { name = "numpy", marker = "python_full_version < '3.14'" }, + { name = "opencv-python-headless", marker = "python_full_version < '3.14'" }, + { name = "pillow", marker = "python_full_version < '3.14'" }, + { name = "pyclipper", marker = "python_full_version < '3.14'" }, + { name = "python-bidi", marker = "python_full_version < '3.14'" }, + { name = "pyyaml", marker = "python_full_version < '3.14'" }, + { name = "scikit-image", marker = "python_full_version < '3.14'" }, + { name = "scipy", marker = "python_full_version < '3.14'" }, + { name = "shapely", marker = "python_full_version < '3.14'" }, + { name = "torch", marker = "python_full_version < '3.14'" }, + { name = "torchvision", marker = "python_full_version < '3.14'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/84/4a2cab0e6adde6a85e7ba543862e5fc0250c51f3ac721a078a55cdcff250/easyocr-1.7.2-py3-none-any.whl", hash = "sha256:5be12f9b0e595d443c9c3d10b0542074b50f0ec2d98b141a109cd961fd1c177c", size = 2870178, upload-time = "2024-09-24T11:34:43.554Z" }, +] + [[package]] name = "ecdsa" version = "0.19.2" @@ -2061,6 +2083,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/56/258d1d227620075d6f1a21cd7a5f0b80488d816598c3c9edcdf1d067a6eb/kreuzberg-4.8.2-cp310-abi3-win_amd64.whl", hash = "sha256:232b3bdbd30ca850e129f55ef8735be7696f91cc372ad29c77ef0539a13974de", size = 66916658, upload-time = "2026-04-10T08:07:12.619Z" }, ] +[package.optional-dependencies] +all = [ + { name = "easyocr", marker = "python_full_version < '3.14'" }, + { name = "torch", marker = "python_full_version < '3.14'" }, +] + [[package]] name = "latex2mathml" version = "3.79.0" @@ -2456,7 +2484,7 @@ dependencies = [ { name = "fastapi" }, { name = "fastmcp" }, { name = "httpx" }, - { name = "kreuzberg" }, + { name = "kreuzberg", extra = ["all"] }, { name = "lightrag-hku", extra = ["api"] }, { name = "mcp" }, { name = "minio" }, @@ -2490,7 +2518,7 @@ requires-dist = [ { name = "fastapi", specifier = ">=0.124.0" }, { name = "fastmcp", specifier = ">=3.2.0" }, { name = "httpx", specifier = ">=0.27.0" }, - { name = "kreuzberg", specifier = ">=4.0.0" }, + { name = "kreuzberg", extras = ["all"], specifier = ">=4.8.2" }, { name = "lightrag-hku", specifier = ">=1.4.13" }, { name = "lightrag-hku", extras = ["api"], specifier = ">=1.4.13" }, { name = "mcp", specifier = ">=1.24.0" }, @@ -2835,6 +2863,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, ] +[[package]] +name = "ninja" +version = "1.13.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/73/79a0b22fc731989c708068427579e840a6cf4e937fe7ae5c5d0b7356ac22/ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978", size = 242558, upload-time = "2025-08-11T15:10:19.421Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/74/d02409ed2aa865e051b7edda22ad416a39d81a84980f544f8de717cab133/ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1", size = 310125, upload-time = "2025-08-11T15:09:50.971Z" }, + { url = "https://files.pythonhosted.org/packages/8e/de/6e1cd6b84b412ac1ef327b76f0641aeb5dcc01e9d3f9eee0286d0c34fd93/ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630", size = 177467, upload-time = "2025-08-11T15:09:52.767Z" }, + { url = "https://files.pythonhosted.org/packages/c8/83/49320fb6e58ae3c079381e333575fdbcf1cca3506ee160a2dcce775046fa/ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c", size = 187834, upload-time = "2025-08-11T15:09:54.115Z" }, + { url = "https://files.pythonhosted.org/packages/56/c7/ba22748fb59f7f896b609cd3e568d28a0a367a6d953c24c461fe04fc4433/ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e", size = 202736, upload-time = "2025-08-11T15:09:55.745Z" }, + { url = "https://files.pythonhosted.org/packages/79/22/d1de07632b78ac8e6b785f41fa9aad7a978ec8c0a1bf15772def36d77aac/ninja-1.13.0-py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988", size = 179034, upload-time = "2025-08-11T15:09:57.394Z" }, + { url = "https://files.pythonhosted.org/packages/ed/de/0e6edf44d6a04dabd0318a519125ed0415ce437ad5a1ec9b9be03d9048cf/ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb46acf6b93b8dd0322adc3a4945452a4e774b75b91293bafcc7b7f8e6517dfa", size = 180716, upload-time = "2025-08-11T15:09:58.696Z" }, + { url = "https://files.pythonhosted.org/packages/54/28/938b562f9057aaa4d6bfbeaa05e81899a47aebb3ba6751e36c027a7f5ff7/ninja-1.13.0-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4be9c1b082d244b1ad7ef41eb8ab088aae8c109a9f3f0b3e56a252d3e00f42c1", size = 146843, upload-time = "2025-08-11T15:10:00.046Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fb/d06a3838de4f8ab866e44ee52a797b5491df823901c54943b2adb0389fbb/ninja-1.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:6739d3352073341ad284246f81339a384eec091d9851a886dfa5b00a6d48b3e2", size = 154402, upload-time = "2025-08-11T15:10:01.657Z" }, + { url = "https://files.pythonhosted.org/packages/31/bf/0d7808af695ceddc763cf251b84a9892cd7f51622dc8b4c89d5012779f06/ninja-1.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f", size = 552388, upload-time = "2025-08-11T15:10:03.349Z" }, + { url = "https://files.pythonhosted.org/packages/9d/70/c99d0c2c809f992752453cce312848abb3b1607e56d4cd1b6cded317351a/ninja-1.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aa45b4037b313c2f698bc13306239b8b93b4680eb47e287773156ac9e9304714", size = 472501, upload-time = "2025-08-11T15:10:04.735Z" }, + { url = "https://files.pythonhosted.org/packages/9f/43/c217b1153f0e499652f5e0766da8523ce3480f0a951039c7af115e224d55/ninja-1.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f8e1e8a1a30835eeb51db05cf5a67151ad37542f5a4af2a438e9490915e5b72", size = 638280, upload-time = "2025-08-11T15:10:06.512Z" }, + { url = "https://files.pythonhosted.org/packages/8c/45/9151bba2c8d0ae2b6260f71696330590de5850e5574b7b5694dce6023e20/ninja-1.13.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:3d7d7779d12cb20c6d054c61b702139fd23a7a964ec8f2c823f1ab1b084150db", size = 642420, upload-time = "2025-08-11T15:10:08.35Z" }, + { url = "https://files.pythonhosted.org/packages/3c/fb/95752eb635bb8ad27d101d71bef15bc63049de23f299e312878fc21cb2da/ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5", size = 585106, upload-time = "2025-08-11T15:10:09.818Z" }, + { url = "https://files.pythonhosted.org/packages/c1/31/aa56a1a286703800c0cbe39fb4e82811c277772dc8cd084f442dd8e2938a/ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96", size = 707138, upload-time = "2025-08-11T15:10:11.366Z" }, + { url = "https://files.pythonhosted.org/packages/34/6f/5f5a54a1041af945130abdb2b8529cbef0cdcbbf9bcf3f4195378319d29a/ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200", size = 581758, upload-time = "2025-08-11T15:10:13.295Z" }, + { url = "https://files.pythonhosted.org/packages/95/97/51359c77527d45943fe7a94d00a3843b81162e6c4244b3579fe8fc54cb9c/ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9", size = 267201, upload-time = "2025-08-11T15:10:15.158Z" }, + { url = "https://files.pythonhosted.org/packages/29/45/c0adfbfb0b5895aa18cec400c535b4f7ff3e52536e0403602fc1a23f7de9/ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e", size = 309975, upload-time = "2025-08-11T15:10:16.697Z" }, + { url = "https://files.pythonhosted.org/packages/df/93/a7b983643d1253bb223234b5b226e69de6cda02b76cdca7770f684b795f5/ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9", size = 290806, upload-time = "2025-08-11T15:10:18.018Z" }, +] + [[package]] name = "numpy" version = "2.4.4" @@ -4033,6 +4087,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9d/7a/d968e294073affff457b041c2be9868a40c1c71f4a35fcc1e45e5493067b/pytest_cov-7.1.0-py3-none-any.whl", hash = "sha256:a0461110b7865f9a271aa1b51e516c9a95de9d696734a2f71e3e78f46e1d4678", size = 22876, upload-time = "2026-03-21T20:11:14.438Z" }, ] +[[package]] +name = "python-bidi" +version = "0.6.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ed/e3/c0c8bf6fca79ac946a28d57f116e3b9e5b10a4469b6f70bf73f3744c49bf/python_bidi-0.6.7.tar.gz", hash = "sha256:c10065081c0e137975de5d9ba2ff2306286dbf5e0c586d4d5aec87c856239b41", size = 45503, upload-time = "2025-10-22T09:52:49.624Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/a5/8ad0a448d42fd5d01dd127c1dc5ab974a8ea6e20305ac89a3356dacd3bdf/python_bidi-0.6.7-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1c061207212cd1db27bf6140b96dcd0536246f1e13e99bb5d03f4632f8e2ad7f", size = 272129, upload-time = "2025-10-22T09:52:00.761Z" }, + { url = "https://files.pythonhosted.org/packages/e6/c0/a13981fc0427a0d35e96fc4e31fbb0f981b28d0ce08416f98f42d51ea3bc/python_bidi-0.6.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a2eb8fca918c7381531035c3aae31c29a1c1300ab8a63cad1ec3a71331096c78", size = 263174, upload-time = "2025-10-22T09:51:51.401Z" }, + { url = "https://files.pythonhosted.org/packages/9c/32/74034239d0bca32c315cac5c3ec07ef8eb44fa0e8cea1585cad85f5b8651/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:414004fe9cba33d288ff4a04e1c9afe6a737f440595d01b5bbed00d750296bbd", size = 292496, upload-time = "2025-10-22T09:51:00.708Z" }, + { url = "https://files.pythonhosted.org/packages/83/fa/d6c853ed2668b1c12d66e71d4f843d0710d1ccaecc17ce09b35d2b1382a7/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5013ba963e9da606c4c03958cc737ebd5f8b9b8404bd71ab0d580048c746f875", size = 300727, upload-time = "2025-10-22T09:51:09.152Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8d/55685bddfc1fbfa6e28e1c0be7df4023e504de7d2ac1355a3fa610836bc1/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad5f0847da00687f52d2b81828e8d887bdea9eb8686a9841024ea7a0e153028e", size = 438823, upload-time = "2025-10-22T09:51:17.844Z" }, + { url = "https://files.pythonhosted.org/packages/9f/54/db9e70443f89e3ec6fa70dcd16809c3656d1efe7946076dcd59832f722df/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26a8fe0d532b966708fc5f8aea0602107fde4745a8a5ae961edd3cf02e807d07", size = 325721, upload-time = "2025-10-22T09:51:26.132Z" }, + { url = "https://files.pythonhosted.org/packages/55/c5/98ac9c00f17240f9114c756791f0cd9ba59a5d4b5d84fd1a6d0d50604e82/python_bidi-0.6.7-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6323e943c7672b271ad9575a2232508f17e87e81a78d7d10d6e93040e210eddf", size = 300493, upload-time = "2025-10-22T09:51:43.783Z" }, + { url = "https://files.pythonhosted.org/packages/0b/cb/382538dd7c656eb50408802b9a9466dbd3432bea059410e65a6c14bc79f9/python_bidi-0.6.7-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:349b89c3110bd25aa56d79418239ca4785d4bcc7a596e63bb996a9696fc6a907", size = 312889, upload-time = "2025-10-22T09:51:36.011Z" }, + { url = "https://files.pythonhosted.org/packages/50/8d/dbc784cecd9b2950ba99c8fef0387ae588837e4e2bfd543be191d18bf9f6/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e7cad66317f12f0fd755fe41ee7c6b06531d2189a9048a8f37addb5109f7e3e3", size = 472798, upload-time = "2025-10-22T09:52:10.446Z" }, + { url = "https://files.pythonhosted.org/packages/83/e6/398d59075265717d2950622ede1d366aff88ffcaa67a30b85709dea72206/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:49639743f1230648fd4fb47547f8a48ada9c5ca1426b17ac08e3be607c65394c", size = 564974, upload-time = "2025-10-22T09:52:22.416Z" }, + { url = "https://files.pythonhosted.org/packages/7c/8e/2b939be0651bc2b69c234dc700723a26b93611d5bdd06b253d67d9da3557/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4636d572b357ab9f313c5340915c1cf51e3e54dd069351e02b6b76577fd1a854", size = 491711, upload-time = "2025-10-22T09:52:32.322Z" }, + { url = "https://files.pythonhosted.org/packages/8f/05/f53739ab2ce2eee0c855479a31b64933f6ff6164f3ddc611d04e4b79d922/python_bidi-0.6.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d7310312a68fdb1a8249cf114acb5435aa6b6a958b15810f053c1df5f98476e4", size = 463536, upload-time = "2025-10-22T09:52:43.142Z" }, + { url = "https://files.pythonhosted.org/packages/77/c6/800899e2764f723c2ea9172eabcc1a31ffb8b4bb71ea5869158fd83bd437/python_bidi-0.6.7-cp313-cp313-win32.whl", hash = "sha256:ec985386bc3cd54155f2ef0434fccbfd743617ed6fc1a84dae2ab1de6062e0c6", size = 155786, upload-time = "2025-10-22T09:53:01.357Z" }, + { url = "https://files.pythonhosted.org/packages/30/ba/a811c12c1a4b8fa7c0c0963d92c042284c2049b1586615af6b1774b786d9/python_bidi-0.6.7-cp313-cp313-win_amd64.whl", hash = "sha256:f57726b5a90d818625e6996f5116971b7a4ceb888832337d0e2cf43d1c362a90", size = 159863, upload-time = "2025-10-22T09:52:53.537Z" }, + { url = "https://files.pythonhosted.org/packages/6f/a5/cda302126e878be162bf183eb0bd6dc47ca3e680fb52111e49c62a8ea1eb/python_bidi-0.6.7-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:b0bee27fb596a0f518369c275a965d0448c39a0730e53a030b311bb10562d4d5", size = 271899, upload-time = "2025-10-22T09:52:01.758Z" }, + { url = "https://files.pythonhosted.org/packages/4d/4b/9c15ca0fe795a5c55a39daa391524ac74e26d9187493632d455257771023/python_bidi-0.6.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:6c19ab378fefb1f09623f583fcfa12ed42369a998ddfbd39c40908397243c56b", size = 262235, upload-time = "2025-10-22T09:51:52.379Z" }, + { url = "https://files.pythonhosted.org/packages/0f/5e/25b25be64bff05272aa28d8bef2fbbad8415db3159a41703eb2e63dc9824/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:630cee960ba9e3016f95a8e6f725a621ddeff6fd287839f5693ccfab3f3a9b5c", size = 471983, upload-time = "2025-10-22T09:52:12.182Z" }, + { url = "https://files.pythonhosted.org/packages/4d/78/a9363f5da1b10d9211514b96ea47ecc95c797ed5ac566684bfece0666082/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:0dbb4bbae212cca5bcf6e522fe8f572aff7d62544557734c2f810ded844d9eea", size = 565016, upload-time = "2025-10-22T09:52:23.515Z" }, + { url = "https://files.pythonhosted.org/packages/0d/ed/37dcb7d3dc250ecdff8120b026c37fcdbeada4111e4d7148c053180bcf54/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:1dd0a5ec0d8710905cebb4c9e5018aa8464395a33cb32a3a6c2a951bf1984fe5", size = 491180, upload-time = "2025-10-22T09:52:33.505Z" }, + { url = "https://files.pythonhosted.org/packages/40/a3/50d1f6060a7a500768768f5f8735cb68deba36391248dbf13d5d2c9c0885/python_bidi-0.6.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4ea928c31c7364098f853f122868f6f2155d6840661f7ea8b2ccfdf6084eb9f4", size = 463126, upload-time = "2025-10-22T09:52:44.28Z" }, + { url = "https://files.pythonhosted.org/packages/d2/47/712cd7d1068795c57fdf6c4acca00716688aa8b4e353b30de2ed8f599fd6/python_bidi-0.6.7-cp314-cp314-win32.whl", hash = "sha256:f7c055a50d068b3a924bd33a327646346839f55bcb762a26ec3fde8ea5d40564", size = 155793, upload-time = "2025-10-22T09:53:02.7Z" }, + { url = "https://files.pythonhosted.org/packages/c3/e8/1f86bf699b20220578351f9b7b635ed8b6e84dd51ad3cca08b89513ae971/python_bidi-0.6.7-cp314-cp314-win_amd64.whl", hash = "sha256:8a17631e3e691eec4ae6a370f7b035cf0a5767f4457bd615d11728c23df72e43", size = 159821, upload-time = "2025-10-22T09:52:54.95Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0"