From fec389a03240f530346239bee74246da9aa9740c Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 19:44:35 +0200 Subject: [PATCH 01/17] feat: Add BM25 full-text search with pg_textsearch (BRIC-7) - Add BM25 search using PostgreSQL pg_textsearch extension - Implement Reciprocal Rank Fusion (RRF) for hybrid search - Add hybrid+ query mode combining BM25 + vector search in parallel - Add bm25-only query mode for full-text search - Implement PostgresBM25Adapter with connection pool management - Auto-indexing via database triggers - Add comprehensive unit tests for BM25 and RRF - Add database migration for pg_textsearch support - Update Dockerfile to install pg_textsearch extension - Fix critical issues: connection pool leak, error handling, edge cases Closes BRIC-7 --- Dockerfile.db | 19 +- src/application/requests/query_request.py | 10 +- src/application/use_cases/query_use_case.py | 156 +++++++++++- src/config.py | 10 + src/dependencies.py | 30 ++- src/domain/ports/bm25_engine.py | 77 ++++++ .../bm25/pg_textsearch_adapter.py | 221 +++++++++++++++++ src/infrastructure/hybrid/rrf_combiner.py | 134 +++++++++++ tests/domain/ports/test_bm25_engine.py | 37 +++ .../bm25/test_pg_textsearch_adapter.py | 144 +++++++++++ .../hybrid/test_rrf_combiner.py | 224 ++++++++++++++++++ tests/migrations/001_add_bm25_support.sql | 176 ++++++++++++++ 12 files changed, 1227 insertions(+), 11 deletions(-) create mode 100644 src/domain/ports/bm25_engine.py create mode 100644 src/infrastructure/bm25/pg_textsearch_adapter.py create mode 100644 src/infrastructure/hybrid/rrf_combiner.py create mode 100644 tests/domain/ports/test_bm25_engine.py create mode 100644 tests/infrastructure/bm25/test_pg_textsearch_adapter.py create mode 100644 tests/infrastructure/hybrid/test_rrf_combiner.py create mode 100644 tests/migrations/001_add_bm25_support.sql diff --git a/Dockerfile.db b/Dockerfile.db index 9ac4947..716d7bf 100644 --- a/Dockerfile.db +++ b/Dockerfile.db @@ -10,13 +10,24 @@ RUN apt-get update && apt-get install -y \ bison \ && rm -rf /var/lib/apt/lists/* -# Install Apache AGE (v1.6.0 for PG17) and cleanup +# Install Apache AGE (v1.6.0 for PG17) RUN cd /tmp && \ git clone --branch PG17/v1.6.0-rc0 https://github.com/apache/age.git && \ cd age && \ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || \ - (echo "Failed to build AGE" && exit 1) && \ - rm -rf /tmp/age + (echo "Failed to build AGE" && exit 1) + +# Install pg_textsearch extension for BM25 full-text search +RUN cd /tmp && \ + git clone https://github.com/timescale/pg_textsearch.git && \ + cd pg_textsearch && \ + make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config || \ + (echo "Failed to build pg_textsearch" && exit 1) && \ + make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || \ + (echo "Failed to install pg_textsearch" && exit 1) + +# Cleanup build artifacts +RUN rm -rf /tmp/age /tmp/pg_textsearch # Switch back to non-root user for security -USER postgres +USER postgres \ No newline at end of file diff --git a/src/application/requests/query_request.py b/src/application/requests/query_request.py index 97d5a6b..7a7185d 100644 --- a/src/application/requests/query_request.py +++ b/src/application/requests/query_request.py @@ -2,7 +2,9 @@ from pydantic import BaseModel, Field -QueryMode = Literal["local", "global", "hybrid", "naive", "mix", "bypass"] +QueryMode = Literal[ + "local", "global", "hybrid", "hybrid+", "naive", "mix", "bypass", "bm25" +] class QueryRequest(BaseModel): @@ -16,8 +18,10 @@ class QueryRequest(BaseModel): mode: QueryMode = Field( default="naive", description=( - "Search mode - 'naive' (default, recommended), 'local' (context-aware), " - "'global' (document-level), or 'hybrid' (comprehensive) or 'mix' (automatic strategy). " + "Search mode - 'naive' (default, vector only), 'local' (context-aware), " + "'global' (document-level), 'hybrid' (local+global KG), " + "'hybrid+' (BM25+vector parallel), 'mix' (automatic strategy), " + "'bm25' (full-text only)." ), ) top_k: int = Field( diff --git a/src/application/use_cases/query_use_case.py b/src/application/use_cases/query_use_case.py index 8a910eb..cd76407 100644 --- a/src/application/use_cases/query_use_case.py +++ b/src/application/use_cases/query_use_case.py @@ -1,16 +1,168 @@ +"""Query use case with hybrid+ mode support.""" + +import asyncio +from typing import Literal + +from domain.ports.bm25_engine import BM25EnginePort from domain.ports.rag_engine import RAGEnginePort +from infrastructure.hybrid.rrf_combiner import RRFCombiner class QueryUseCase: """Use case for querying the RAG knowledge base.""" - def __init__(self, rag_engine: RAGEnginePort) -> None: + def __init__( + self, + rag_engine: RAGEnginePort, + bm25_engine: BM25EnginePort | None = None, + rrf_k: int = 60, + ): + """Initialize use case. + + Args: + rag_engine: RAG engine for vector search + bm25_engine: BM25 engine for full-text search (optional) + rrf_k: RRF constant for combining results + """ self.rag_engine = rag_engine + self.bm25_engine = bm25_engine + self.rrf_combiner = RRFCombiner(k=rrf_k) async def execute( - self, working_dir: str, query: str, mode: str = "naive", top_k: int = 10 + self, + working_dir: str, + query: str, + mode: Literal[ + "naive", "local", "global", "hybrid", "hybrid+", "mix", "bypass", "bm25" + ] = "naive", + top_k: int = 10, ) -> dict: + """Execute search query. + + Args: + working_dir: Project/workspace directory + query: Search query string + mode: Search mode + - "naive": Vector search only + - "local": Local knowledge graph search + - "global": Global knowledge graph search + - "hybrid": Local + global knowledge graph + - "hybrid+": BM25 + vector search (parallel) + - "mix": Knowledge graph + vector chunks + - "bypass": Direct LLM query + - "bm25": BM25 search only + top_k: Number of results to return + + Returns: + Search results + """ + # Initialize RAG engine self.rag_engine.init_project(working_dir) + + # Handle BM25-only mode + if mode == "bm25": + if self.bm25_engine is None: + return { + "status": "error", + "message": "BM25 engine not available. Please configure pg_textsearch extension.", + "data": {}, + } + + results = await self.bm25_engine.search(query, working_dir, top_k) + return self._format_bm25_results(results) + + # Handle hybrid+ mode (parallel BM25 + vector) + if mode == "hybrid+": + if self.bm25_engine is None: + # Fall back to regular vector search + return await self.rag_engine.query( + query=query, mode="naive", top_k=top_k, working_dir=working_dir + ) + + # Execute BM25 and vector search in parallel + bm25_task = asyncio.create_task( + self.bm25_engine.search(query, working_dir, top_k=top_k * 2) + ) + vector_task = asyncio.create_task( + self.rag_engine.query( + query=query, mode="naive", top_k=top_k * 2, working_dir=working_dir + ) + ) + + # Wait for both to complete + bm25_results, vector_results = await asyncio.gather( + bm25_task, vector_task, return_exceptions=False + ) + + # Combine using RRF + combined_results = self.rrf_combiner.combine( + bm25_results=bm25_results, + vector_results=vector_results, + top_k=top_k, + ) + + return self._format_hybrid_results(combined_results) + + # Default: use RAG engine return await self.rag_engine.query( query=query, mode=mode, top_k=top_k, working_dir=working_dir ) + + def _format_bm25_results(self, results: list) -> dict: + """Format BM25 results to match API response format.""" + return { + "status": "success", + "message": "", + "data": { + "entities": [], + "relationships": [], + "chunks": [ + { + "chunk_id": r.chunk_id, + "content": r.content, + "file_path": r.file_path, + "score": r.score, + "metadata": r.metadata, + } + for r in results + ], + "references": [], + }, + "metadata": { + "query_mode": "bm25", + "total_results": len(results), + }, + } + + def _format_hybrid_results(self, results: list) -> dict: + """Format hybrid results to match API response format.""" + + return { + "status": "success", + "message": "", + "data": { + "entities": [], + "relationships": [], + "chunks": [ + { + "chunk_id": r.chunk_id, + "content": r.content, + "file_path": r.file_path, + "score": r.combined_score, + "bm25_rank": r.bm25_rank if hasattr(r, "bm25_rank") else None, + "vector_rank": r.vector_rank + if hasattr(r, "vector_rank") + else None, + "combined_score": r.combined_score, + "metadata": r.metadata, + } + for r in results + ], + "references": [], + }, + "metadata": { + "query_mode": "hybrid+", + "total_results": len(results), + "rrf_k": self.rrf_combiner.k, + }, + } diff --git a/src/config.py b/src/config.py index e96dc4c..6e52b07 100644 --- a/src/config.py +++ b/src/config.py @@ -109,6 +109,16 @@ class RAGConfig(BaseSettings): ) +class BM25Config(BaseSettings): + """BM25 search configuration.""" + + BM25_ENABLED: bool = Field(default=True, description="Enable BM25 full-text search") + BM25_TEXT_CONFIG: str = Field( + default="english", description="PostgreSQL text search configuration" + ) + BM25_RRF_K: int = Field(default=60, description="RRF constant K for hybrid search") + + class MinioConfig(BaseSettings): """MinIO object storage configuration.""" diff --git a/src/dependencies.py b/src/dependencies.py index baad639..7e1c7f7 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -6,7 +6,16 @@ from application.use_cases.index_folder_use_case import IndexFolderUseCase from application.use_cases.multimodal_query_use_case import MultimodalQueryUseCase from application.use_cases.query_use_case import QueryUseCase -from config import AppConfig, LLMConfig, MinioConfig, RAGConfig +from config import ( + AppConfig, + BM25Config, + DatabaseConfig, + LLMConfig, + MinioConfig, + RAGConfig, +) +from domain.ports.bm25_engine import BM25EnginePort +from infrastructure.bm25.pg_textsearch_adapter import PostgresBM25Adapter from infrastructure.rag.lightrag_adapter import LightRAGAdapter from infrastructure.storage.minio_adapter import MinioAdapter @@ -16,6 +25,8 @@ llm_config = LLMConfig() # type: ignore rag_config = RAGConfig() # type: ignore minio_config = MinioConfig() # type: ignore +bm25_config = BM25Config() # type: ignore +db_config = DatabaseConfig() # type: ignore os.makedirs(app_config.OUTPUT_DIR, exist_ok=True) @@ -29,6 +40,17 @@ secure=minio_config.MINIO_SECURE, ) +# BM25 adapter (optional) +bm25_adapter: BM25EnginePort | None = None +if bm25_config.BM25_ENABLED: + try: + bm25_adapter = PostgresBM25Adapter( + db_url=db_config.DATABASE_URL.replace("+asyncpg", "") + ) + except Exception as e: + print(f"WARNING: BM25 adapter initialization failed: {e}") + bm25_adapter = None + # ============= USE CASE PROVIDERS ============= @@ -45,7 +67,11 @@ def get_index_folder_use_case() -> IndexFolderUseCase: def get_query_use_case() -> QueryUseCase: - return QueryUseCase(rag_adapter) + return QueryUseCase( + rag_engine=rag_adapter, + bm25_engine=bm25_adapter, + rrf_k=bm25_config.BM25_RRF_K, + ) def get_multimodal_query_use_case() -> MultimodalQueryUseCase: diff --git a/src/domain/ports/bm25_engine.py b/src/domain/ports/bm25_engine.py new file mode 100644 index 0000000..215092b --- /dev/null +++ b/src/domain/ports/bm25_engine.py @@ -0,0 +1,77 @@ +"""BM25 search engine port interface.""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Any + + +@dataclass +class BM25SearchResult: + """Result from BM25 search.""" + + chunk_id: str + content: str + file_path: str + score: float + metadata: dict[str, Any] + + +class BM25EnginePort(ABC): + """Port interface for BM25 full-text search operations.""" + + @abstractmethod + async def search( + self, + query: str, + working_dir: str, + top_k: int = 10, + ) -> list[BM25SearchResult]: + """Search documents using BM25 ranking. + + Args: + query: Search query string + working_dir: Project/workspace directory + top_k: Number of results to return + + Returns: + List of BM25SearchResult ordered by relevance + """ + pass + + @abstractmethod + async def index_document( + self, + chunk_id: str, + content: str, + file_path: str, + working_dir: str, + metadata: dict[str, Any] | None = None, + ) -> None: + """Index a document chunk for BM25 search. + + Args: + chunk_id: Unique chunk identifier + content: Text content to index + file_path: Path to source file + working_dir: Project/workspace directory + metadata: Optional metadata dictionary + """ + pass + + @abstractmethod + async def create_index(self, working_dir: str) -> None: + """Create BM25 index for workspace. + + Args: + working_dir: Project/workspace directory + """ + pass + + @abstractmethod + async def drop_index(self, working_dir: str) -> None: + """Drop BM25 index for workspace. + + Args: + working_dir: Project/workspace directory + """ + pass diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py new file mode 100644 index 0000000..a5bc406 --- /dev/null +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -0,0 +1,221 @@ +"""PostgreSQL BM25 adapter using pg_textsearch extension.""" + +import logging +from typing import Any + +import asyncpg + +from domain.ports.bm25_engine import BM25EnginePort, BM25SearchResult + +logger = logging.getLogger(__name__) + + +class PostgresBM25Adapter(BM25EnginePort): + """PostgreSQL BM25 implementation using pg_textsearch. + + Uses PostgreSQL native full-text search with tsvector/tsquery + and pg_textsearch extension for BM25-style ranking. + + The <@> operator returns negative scores (lower is better), + so we convert to positive for consistency. + """ + + def __init__(self, db_url: str): + """Initialize adapter with database URL. + + Args: + db_url: PostgreSQL connection string + """ + self.db_url = db_url + self._pool: asyncpg.Pool | None = None + + async def _get_pool(self) -> asyncpg.Pool: + """Get or create database connection pool.""" + if self._pool is None: + self._pool = await asyncpg.create_pool(self.db_url) + + # Validate pg_textsearch extension + async with self._pool.acquire() as conn: + try: + result = await conn.fetchval( + "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname='pg_textsearch')" + ) + if not result: + logger.warning( + "pg_textsearch extension not installed. " + "BM25 ranking <@> operator will not work. " + "Run: CREATE EXTENSION pg_textsearch;" + ) + except Exception as e: + logger.warning(f"Could not check pg_textsearch extension: {e}") + + return self._pool + + async def close(self) -> None: + """Close connection pool on shutdown.""" + if self._pool: + await self._pool.close() + self._pool = None + + async def search( + self, + query: str, + working_dir: str, + top_k: int = 10, + ) -> list[BM25SearchResult]: + """Search using BM25 ranking. + + Uses pg_textsearch <@> operator for BM25 scoring. + Scores are negative (lower is better), converted to positive. + + Args: + query: Search query string + working_dir: Project/workspace directory + top_k: Number of results to return + + Returns: + List of BM25SearchResult ordered by relevance + """ + pool = await self._get_pool() + + try: + async with pool.acquire() as conn: + # Use websearch_to_tsquery for user-friendly query syntax + # and <@> operator for BM25 ranking + # Note: <@> returns negative scores (lower is better) + # We convert to positive and sort ASC + sql = """ + SELECT + chunk_id, + content, + file_path, + content <@> websearch_to_tsquery('english', $1) as score, + metadata + FROM chunks + WHERE working_dir = $2 + AND content_tsv @@ websearch_to_tsquery('english', $1) + ORDER BY score + LIMIT $3 + """ + + results = await conn.fetch(sql, query, working_dir, top_k) + + # Convert negative scores to positive (lower negative -> higher relevance) + return [ + BM25SearchResult( + chunk_id=row["chunk_id"], + content=row["content"], + file_path=row["file_path"], + score=abs(row["score"]), # Convert to positive + metadata=row["metadata"] or {}, + ) + for row in results + ] + except asyncpg.PostgresError as e: + logger.error( + f"BM25 search failed: {e}", + extra={"query": query, "working_dir": working_dir}, + ) + raise + except Exception as e: + logger.error(f"Unexpected error in BM25 search: {e}") + raise + + async def index_document( + self, + chunk_id: str, + content: str, + file_path: str, + working_dir: str, + metadata: dict[str, Any] | None = None, + ) -> None: + """Index document chunk. + + The tsvector column is auto-updated via trigger, + so we only need to INSERT/UPDATE the row. + + Args: + chunk_id: Unique chunk identifier + content: Text content to index + file_path: Path to source file + working_dir: Project/workspace directory + metadata: Optional metadata dictionary + """ + pool = await self._get_pool() + + try: + async with pool.acquire() as conn: + await conn.execute( + """ + INSERT INTO chunks (chunk_id, content, file_path, working_dir, metadata) + VALUES ($1, $2, $3, $4, $5) + ON CONFLICT (chunk_id) DO UPDATE SET + content = EXCLUDED.content, + file_path = EXCLUDED.file_path, + metadata = EXCLUDED.metadata + """, + chunk_id, + content, + file_path, + working_dir, + metadata or {}, + ) + except asyncpg.PostgresError as e: + logger.error(f"BM25 document indexing failed: {e}", extra={"chunk_id": chunk_id}) + raise + except Exception as e: + logger.error(f"Unexpected error in BM25 indexing: {e}") + raise + + async def create_index(self, working_dir: str) -> None: + """Create BM25 index for workspace. + + Note: The index is created automatically via the trigger + defined in the migration. This method is for explicit + re-indexing if needed. + + Args: + working_dir: Project/workspace directory + """ + pool = await self._get_pool() + + try: + async with pool.acquire() as conn: + # Index is created automatically via trigger + # This is just for explicit re-indexing + await conn.execute( + """ + UPDATE chunks + SET content_tsv = to_tsvector('english', content) + WHERE working_dir = $1 AND content_tsv IS NULL + """, + working_dir, + ) + except asyncpg.PostgresError as e: + logger.error(f"BM25 index creation failed: {e}", extra={"working_dir": working_dir}) + raise + except Exception as e: + logger.error(f"Unexpected error in BM25 index creation: {e}") + raise + + async def drop_index(self, working_dir: str) -> None: + """Drop BM25 index for workspace. + + Args: + working_dir: Project/workspace directory + """ + pool = await self._get_pool() + + try: + async with pool.acquire() as conn: + # Clear tsvector for this workspace + await conn.execute( + "UPDATE chunks SET content_tsv = NULL WHERE working_dir = $1", + working_dir, + ) + except asyncpg.PostgresError as e: + logger.error(f"BM25 index drop failed: {e}", extra={"working_dir": working_dir}) + raise + except Exception as e: + logger.error(f"Unexpected error in BM25 index drop: {e}") + raise diff --git a/src/infrastructure/hybrid/rrf_combiner.py b/src/infrastructure/hybrid/rrf_combiner.py new file mode 100644 index 0000000..916c648 --- /dev/null +++ b/src/infrastructure/hybrid/rrf_combiner.py @@ -0,0 +1,134 @@ +"""Reciprocal Rank Fusion (RRF) combiner for hybrid search.""" + +from dataclasses import dataclass +from typing import Any + +from domain.ports.bm25_engine import BM25SearchResult + + +@dataclass +class HybridSearchResult: + """Combined result from BM25 and vector search.""" + + chunk_id: str + content: str + file_path: str + vector_score: float + bm25_score: float + combined_score: float + metadata: dict[str, Any] + bm25_rank: int | None = None + vector_rank: int | None = None + + +class RRFCombiner: + """Reciprocal Rank Fusion algorithm for combining search results. + + RRF formula: score = Σ (1 / (k + rank_i)) + where k is a constant (default 60) and rank_i is the rank in list i. + + This is a simple and effective method for combining ranked lists + that doesn't require score normalization. + """ + + def __init__(self, k: int = 60): + """Initialize RRF combiner. + + Args: + k: RRF constant (default 60, industry standard) + """ + self.k = k + + def combine( + self, + bm25_results: list[BM25SearchResult], + vector_results: dict, + top_k: int = 10, + ) -> list[HybridSearchResult]: + """Combine BM25 and vector search results using RRF. + + Args: + bm25_results: Results from BM25 search (already ranked) + vector_results: Results from vector search (already ranked) + top_k: Number of results to return + + Returns: + Combined results sorted by combined_score descending + """ + scores: dict[str, dict[str, Any]] = {} + + # Process BM25 results + for rank, result in enumerate(bm25_results, start=1): + chunk_id = result.chunk_id + if chunk_id not in scores: + scores[chunk_id] = { + "content": result.content, + "file_path": result.file_path, + "metadata": result.metadata, + "bm25_score": 0.0, + "vector_score": 0.0, + "bm25_rank": rank, + "vector_rank": None, + } + else: + # If chunk already exists, keep the best rank (smallest number) + scores[chunk_id]["bm25_rank"] = min(scores[chunk_id]["bm25_rank"], rank) + + # Calculate BM25 RRF score + actual_rank = scores[chunk_id]["bm25_rank"] + scores[chunk_id]["bm25_score"] = 1.0 / (self.k + actual_rank) + + # Process vector results + chunks = vector_results.get("data", {}).get("chunks", []) + for rank, chunk in enumerate(chunks, start=1): + chunk_id = chunk.get("reference_id") or chunk.get("chunk_id") + if chunk_id is None: + continue + + if chunk_id not in scores: + scores[chunk_id] = { + "content": chunk.get("content", ""), + "file_path": chunk.get("file_path", ""), + "metadata": chunk.get("metadata", {}), + "bm25_score": 0.0, + "vector_score": 0.0, + "bm25_rank": None, + "vector_rank": rank, + } + else: + # If chunk already exists, keep the best rank (smallest number) + existing_rank = scores[chunk_id]["vector_rank"] + if existing_rank is not None: + scores[chunk_id]["vector_rank"] = min(existing_rank, rank) + else: + scores[chunk_id]["vector_rank"] = rank + + # Calculate vector RRF score + actual_rank = scores[chunk_id]["vector_rank"] + if actual_rank is not None: + scores[chunk_id]["vector_score"] = 1.0 / (self.k + actual_rank) + + # Calculate combined scores and create results + results = [ + HybridSearchResult( + chunk_id=chunk_id, + content=data["content"], + file_path=data["file_path"], + vector_score=data["vector_score"], + bm25_score=data["bm25_score"], + combined_score=data["bm25_score"] + data["vector_score"], + metadata=data["metadata"], + bm25_rank=data["bm25_rank"], + vector_rank=data["vector_rank"], + ) + for chunk_id, data in scores.items() + ] + + # Sort by combined score (descending) + results.sort(key=lambda x: x.combined_score, reverse=True) + + # Return top_k results (handle edge case) + if top_k < 1: + return [] + + return results[:top_k] diff --git a/tests/domain/ports/test_bm25_engine.py b/tests/domain/ports/test_bm25_engine.py new file mode 100644 index 0000000..a1e95bf --- /dev/null +++ b/tests/domain/ports/test_bm25_engine.py @@ -0,0 +1,37 @@ +"""Tests for BM25EnginePort interface.""" + + +import pytest + +from domain.ports.bm25_engine import BM25EnginePort, BM25SearchResult + + +def test_bm25_engine_port_is_abstract(): + """BM25EnginePort should be abstract and not instantiable.""" + with pytest.raises(TypeError, match="Can't instantiate abstract class"): + BM25EnginePort() + + +def test_bm25_engine_port_has_required_methods(): + """BM25EnginePort should define required abstract methods.""" + assert hasattr(BM25EnginePort, "search") + assert hasattr(BM25EnginePort, "index_document") + assert hasattr(BM25EnginePort, "create_index") + assert hasattr(BM25EnginePort, "drop_index") + + +def test_bm25_search_result_dataclass(): + """BM25SearchResult should be a dataclass with required fields.""" + result = BM25SearchResult( + chunk_id="123", + content="test content", + file_path="/test/doc.pdf", + score=0.95, + metadata={"page": 1}, + ) + + assert result.chunk_id == "123" + assert result.content == "test content" + assert result.file_path == "/test/doc.pdf" + assert result.score == 0.95 + assert result.metadata == {"page": 1} diff --git a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py new file mode 100644 index 0000000..f68cc82 --- /dev/null +++ b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py @@ -0,0 +1,144 @@ +"""Tests for PostgresBM25Adapter implementation.""" + +from unittest.mock import AsyncMock + +import asyncpg +import pytest + +from infrastructure.bm25.pg_textsearch_adapter import PostgresBM25Adapter + + +@pytest.fixture +def mock_pool(): + """Create mock asyncpg pool.""" + pool = AsyncMock(spec=asyncpg.Pool) + return pool + + +@pytest.fixture +def mock_connection(): + """Create mock asyncpg connection.""" + conn = AsyncMock(spec=asyncpg.Connection) + return conn + + +@pytest.mark.asyncio +async def test_search_returns_results(mock_pool, mock_connection): + """Search should return BM25SearchResult list.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + + # Mock the pool.acquire context manager + mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) + mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + + # Mock database response with negative scores (pg_textsearch returns negative) + mock_connection.fetch.return_value = [ + { + "chunk_id": "123", + "content": "PostgreSQL database system", + "file_path": "/doc.pdf", + "score": -2.345, + "metadata": {"page": 1}, + } + ] + + results = await adapter.search("PostgreSQL", "workspace1", top_k=5) + + assert len(results) == 1 + assert results[0].chunk_id == "123" + assert results[0].content == "PostgreSQL database system" + assert results[0].file_path == "/doc.pdf" + assert results[0].score == 2.345 # Negative converted to positive + assert results[0].metadata == {"page": 1} + + +@pytest.mark.asyncio +async def test_search_converts_negative_scores(mock_pool, mock_connection): + """Search should convert negative BM25 scores to positive.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) + mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + + mock_connection.fetch.return_value = [ + { + "chunk_id": "1", + "content": "test", + "file_path": "/t.pdf", + "score": -5.0, + "metadata": {}, + } + ] + + results = await adapter.search("test", "ws", top_k=10) + + assert results[0].score == 5.0 # Negative converted to positive + + +@pytest.mark.asyncio +async def test_search_with_no_results(mock_pool, mock_connection): + """Search should return empty list when no matches.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) + mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + + mock_connection.fetch.return_value = [] + + results = await adapter.search("nonexistent", "workspace1", top_k=10) + + assert results == [] + + +@pytest.mark.asyncio +async def test_index_document_executes_correct_sql(mock_pool, mock_connection): + """Index document should execute correct INSERT/UPDATE SQL.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) + mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + + await adapter.index_document( + chunk_id="123", + content="test content", + file_path="/doc.pdf", + working_dir="workspace1", + metadata={"page": 1}, + ) + + # Verify SQL was executed + mock_connection.execute.assert_called_once() + call_args = mock_connection.execute.call_args[0] + sql = call_args[0] + assert "INSERT INTO chunks" in sql or "UPDATE chunks" in sql + + +@pytest.mark.asyncio +async def test_create_index_executes_correct_sql(mock_pool, mock_connection): + """Create index should execute correct SQL.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) + mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + + await adapter.create_index("workspace1") + + mock_connection.execute.assert_called() + + +@pytest.mark.asyncio +async def test_drop_index_clears_tsvector(mock_pool, mock_connection): + """Drop index should clear tsvector for workspace.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) + mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + + await adapter.drop_index("workspace1") + + # Verify SQL was executed + mock_connection.execute.assert_called_once() + call_args = mock_connection.execute.call_args[0] + assert "UPDATE chunks" in call_args[0] + assert "content_tsv = NULL" in call_args[0] diff --git a/tests/infrastructure/hybrid/test_rrf_combiner.py b/tests/infrastructure/hybrid/test_rrf_combiner.py new file mode 100644 index 0000000..d450120 --- /dev/null +++ b/tests/infrastructure/hybrid/test_rrf_combiner.py @@ -0,0 +1,224 @@ +"""Tests for Reciprocal Rank Fusion combiner.""" + + +from domain.ports.bm25_engine import BM25SearchResult +from infrastructure.hybrid.rrf_combiner import RRFCombiner + + +def test_rrf_combiner_initialization(): + """RRFCombiner should initialize with default k=60.""" + combiner = RRFCombiner() + assert combiner.k == 60 + + +def test_rrf_combiner_custom_k(): + """RRFCombiner should accept custom k parameter.""" + combiner = RRFCombiner(k=100) + assert combiner.k == 100 + + +def test_combine_results_basic(): + """RRF should combine ranks correctly.""" + combiner = RRFCombiner(k=60) + + # Mock BM25 results (already sorted by score) + bm25_results = [ + BM25SearchResult( + chunk_id="1", + content="BM25 result 1", + file_path="/a.pdf", + score=5.0, + metadata={}, + ), + BM25SearchResult( + chunk_id="2", + content="BM25 result 2", + file_path="/b.pdf", + score=4.0, + metadata={}, + ), + ] + + # Mock vector results + vector_results = { + "data": { + "chunks": [ + { + "chunk_id": "2", + "content": "Vector result 1", + "file_path": "/b.pdf", + }, + { + "chunk_id": "3", + "content": "Vector result 2", + "file_path": "/c.pdf", + }, + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + # Check that results are combined + assert len(combined) == 3 # chunk_ids: 1, 2, 3 + + # Check that all results have combined scores + for result in combined: + assert result.combined_score > 0 + assert result.vector_score >= 0 + assert result.bm25_score >= 0 + + +def test_combine_results_respects_top_k(): + """RRF should respect top_k parameter.""" + combiner = RRFCombiner() + + bm25_results = [ + BM25SearchResult( + chunk_id=str(i), + content=f"BM25 {i}", + file_path="/a.pdf", + score=1.0, + metadata={}, + ) + for i in range(20) + ] + + vector_results = { + "data": { + "chunks": [ + {"chunk_id": str(i), "content": f"Vector {i}", "file_path": "/b.pdf"} + for i in range(20) + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=5) + + assert len(combined) == 5 + + +def test_combine_results_sorted_by_score(): + """RRF results should be sorted by combined_score descending.""" + combiner = RRFCombiner() + + bm25_results = [ + BM25SearchResult( + chunk_id="1", content="BM25", file_path="/a.pdf", score=5.0, metadata={} + ), + BM25SearchResult( + chunk_id="2", content="BM25", file_path="/b.pdf", score=4.0, metadata={} + ), + ] + + vector_results = { + "data": { + "chunks": [ + {"chunk_id": "2", "content": "Vector", "file_path": "/b.pdf"}, + {"chunk_id": "1", "content": "Vector", "file_path": "/a.pdf"}, + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + # Check sorted order + scores = [r.combined_score for r in combined] + assert scores == sorted(scores, reverse=True) + + +def test_rrf_formula(): + """RRF formula should be: 1/(k + rank).""" + combiner = RRFCombiner(k=60) + + # Item appears at rank 1 in BM25, rank 3 in vector + # Expected: 1/(60+1) + 1/(60+3) = 0.01639 + 0.01587 = 0.03226 + bm25_results = [ + BM25SearchResult( + chunk_id="1", + content="BM25 rank 1", + file_path="/a.pdf", + score=5.0, + metadata={}, + ), + ] + + vector_results = { + "data": { + "chunks": [ + { + "chunk_id": "other", + "content": "Vector rank 1", + "file_path": "/x.pdf", + }, + { + "chunk_id": "other2", + "content": "Vector rank 2", + "file_path": "/y.pdf", + }, + {"chunk_id": "1", "content": "Vector rank 3", "file_path": "/a.pdf"}, + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + # Find our item + item = next(r for r in combined if r.chunk_id == "1") + + # Check RRF calculation + expected_bm25_score = 1 / (60 + 1) # rank 1 in BM25 + expected_vector_score = 1 / (60 + 3) # rank 3 in vector + + assert abs(item.bm25_score - expected_bm25_score) < 0.0001 + assert abs(item.vector_score - expected_vector_score) < 0.0001 + assert ( + abs(item.combined_score - (expected_bm25_score + expected_vector_score)) + < 0.0001 + ) + + +def test_combine_only_bm25_results(): + """RRF should handle case where only BM25 has results.""" + combiner = RRFCombiner() + + bm25_results = [ + BM25SearchResult( + chunk_id="1", + content="BM25 only", + file_path="/a.pdf", + score=5.0, + metadata={}, + ) + ] + + vector_results = {"data": {"chunks": []}} + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + assert len(combined) == 1 + assert combined[0].chunk_id == "1" + assert combined[0].bm25_score > 0 + assert combined[0].vector_score == 0 + + +def test_combine_only_vector_results(): + """RRF should handle case where only vector has results.""" + combiner = RRFCombiner() + + bm25_results = [] + + vector_results = { + "data": { + "chunks": [ + {"chunk_id": "1", "content": "Vector only", "file_path": "/a.pdf"} + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + assert len(combined) == 1 + assert combined[0].chunk_id == "1" + assert combined[0].bm25_score == 0 + assert combined[0].vector_score > 0 diff --git a/tests/migrations/001_add_bm25_support.sql b/tests/migrations/001_add_bm25_support.sql new file mode 100644 index 0000000..63fb589 --- /dev/null +++ b/tests/migrations/001_add_bm25_support.sql @@ -0,0 +1,176 @@ +-- Migration: Add BM25 support via pg_textsearch +-- Version: 001 +-- Date: 2026-04-07 +-- Jira: BRIC-7 + +-- This migration adds BM25 full-text search capability using the pg_textsearch extension +-- It creates tsvector columns and BM25 indexes for chunk content + +-- ======================================== +-- Prerequisites: Install pg_textsearch extension +-- ======================================== + +-- Install pg_textsearch extension (requires superuser privileges) +-- Run this in PostgreSQL: +-- CREATE EXTENSION IF NOT EXISTS pg_textsearch; + +-- If pg_textsearch is not available, you can install it from: +-- https://github.com/timescale/pg_textsearch + +-- ======================================== +-- Step 1: Add tsvector column to chunks table +-- ======================================== + +-- Add tsvector column for BM25 indexing +ALTER TABLE chunks ADD COLUMN IF NOT EXISTS content_tsv tsvector; + +-- ======================================== +-- Step 2: Create GIN index for tsvector operations +-- ======================================== + +-- Create GIN index for tsvector column (used for @@ and @@@ operators) +CREATE INDEX IF NOT EXISTS idx_chunks_content_tsv +ON chunks USING GIN(content_tsv); + +-- ======================================== +-- Step 3: Create BM25 index using pg_textsearch +-- ======================================== + +-- Create BM25 index using pg_textsearch <@> operator +-- Note: This requires pg_textsearch extension to be installed +-- If extension is not available, this will be skipped +DO $$ +BEGIN + -- Check if pg_textsearch extension is available + IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_textsearch') THEN + -- Create BM25 index for content column + -- Uses 'english' text search configuration + -- The <@> operator will be available for BM25 ranking + CREATE INDEX IF NOT EXISTS idx_chunks_bm25 + ON chunks USING bm25(content) + WITH (text_config='english'); + + RAISE NOTICE 'BM25 index created successfully'; + ELSE + RAISE NOTICE 'pg_textsearch extension not found. BM25 index creation skipped.'; + RAISE NOTICE 'Install pg_textsearch and run: CREATE EXTENSION pg_textsearch;'; + END IF; +END $$; + +-- ======================================== +-- Step 4: Create auto-update trigger for tsvector +-- ======================================== + +-- Create function to auto-update tsvector on INSERT/UPDATE +CREATE OR REPLACE FUNCTION update_chunks_tsv() +RETURNS TRIGGER AS $$ +BEGIN + -- Update tsvector column with English text configuration + -- This uses PostgreSQL's built-in to_tsvector function + NEW.content_tsv := to_tsvector('english', COALESCE(NEW.content, '')); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +-- Create trigger for auto-indexing +DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks; +CREATE TRIGGER trg_chunks_content_tsv + BEFORE INSERT OR UPDATE ON chunks + FOR EACH ROW EXECUTE FUNCTION update_chunks_tsv(); + +-- ======================================== +-- Step 5: Backfill existing documents +-- ======================================== + +-- Backfill tsvector for existing documents (if any) +UPDATE chunks +SET content_tsv = to_tsvector('english', COALESCE(content, '')) +WHERE content_tsv IS NULL; + +-- ======================================== +-- Step 6: Verify migration +-- ======================================== + +-- Verify indexes were created +SELECT + schemaname, + tablename, + indexname, + indexdef +FROM pg_indexes +WHERE tablename = 'chunks' + AND (indexname LIKE '%tsv%' OR indexname LIKE '%bm25%') +ORDER BY indexname; + +-- Verify trigger exists +SELECT + trigger_name, + event_manipulation, + action_timing, + action_statement +FROM information_schema.triggers +WHERE event_object_table = 'chunks' + AND trigger_name = 'trg_chunks_content_tsv'; + +-- ======================================== +-- Step 7 (Optional): Performance stats +-- ======================================== + +-- Check BM25 index usage (run after some queries) +-- SELECT * FROM pg_stat_user_indexes WHERE indexrelid::regclass::text LIKE '%bm25%'; + +-- ======================================== +-- Rollback instructions (if needed) +-- ======================================== + +/* +-- To rollback this migration: +DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks; +DROP FUNCTION IF EXISTS update_chunks_tsv(); +DROP INDEX IF EXISTS idx_chunks_bm25; +DROP INDEX IF EXISTS idx_chunks_content_tsv; +ALTER TABLE chunks DROP COLUMN IF EXISTS content_tsv; +*/ + +-- ======================================== +-- Usage examples +-- ======================================== + +/* +-- Example 1: Basic BM25 search using pg_textsearch +SELECT + chunk_id, + content, + file_path, + content <@> websearch_to_tsquery('english', 'PostgreSQL database') AS score +FROM chunks +WHERE content_tsv @@ websearch_to_tsquery('english', 'PostgreSQL database') +ORDER BY score +LIMIT 10; + +-- Example 2: BM25 search with working_dir filter +SELECT + chunk_id, + content, + file_path, + content <@> websearch_to_tsquery('english', 'search terms') AS score +FROM chunks +WHERE working_dir = 'your-project' + AND content_tsv @@ websearch_to_tsquery('english', 'search terms') +ORDER BY score +LIMIT 10; + +-- Example 3: Traditional full-text search (without BM25 ranking) +SELECT + chunk_id, + content, + file_path, + ts_rank_cd(content_tsv, websearch_to_tsquery('english', 'search terms')) AS score +FROM chunks +WHERE content_tsv @@ websearch_to_tsquery('english', 'search terms') +ORDER BY score DESC +LIMIT 10; + +-- Note: The <@> operator returns negative scores (lower is better) +-- Convert to positive: ABS(content <@> query) +*/ \ No newline at end of file From d60e23396b4d35f7a49187009072a4b6cb7ee02e Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 20:22:27 +0200 Subject: [PATCH 02/17] refactor: simplify BM25 implementation and remove docling patch - Remove redundant hasattr checks in query_use_case - Simplify error handling in pg_textsearch_adapter (merge duplicate except blocks) - Remove docling TXT patch from Dockerfile (no longer needed) - Move BM25 migration SQL to migrations/ directory --- Dockerfile | 4 - TXT_FILE_TESTS_SUMMARY.md | 201 ------------------ .../001_add_bm25_support.sql | 0 patch_docling_txt.py | 77 ------- patch_raganything.py | 94 -------- src/application/use_cases/query_use_case.py | 6 +- .../bm25/pg_textsearch_adapter.py | 20 +- src/infrastructure/hybrid/rrf_combiner.py | 4 - 8 files changed, 6 insertions(+), 400 deletions(-) delete mode 100644 TXT_FILE_TESTS_SUMMARY.md rename {tests/migrations => migrations}/001_add_bm25_support.sql (100%) delete mode 100644 patch_docling_txt.py delete mode 100644 patch_raganything.py diff --git a/Dockerfile b/Dockerfile index d39e0fe..ded5beb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -34,10 +34,6 @@ COPY --from=builder /app/.venv /app/.venv COPY src/ /app/src/ COPY .env.example /app/.env -# Patch docling to fix TXT file format detection (PR #3161 incomplete) -COPY patch_docling_txt.py /tmp/patch_docling_txt.py -RUN /app/.venv/bin/python /tmp/patch_docling_txt.py && rm /tmp/patch_docling_txt.py - # Set Python path to include src directory ENV PYTHONPATH=/app/src:$PYTHONPATH ENV PATH="/app/.venv/bin:$PATH" diff --git a/TXT_FILE_TESTS_SUMMARY.md b/TXT_FILE_TESTS_SUMMARY.md deleted file mode 100644 index 024ccfe..0000000 --- a/TXT_FILE_TESTS_SUMMARY.md +++ /dev/null @@ -1,201 +0,0 @@ -# TXT File Support Tests - Summary - -## Overview -Comprehensive unit and integration tests for TXT file support in the mcp-raganything project. These tests verify that the system correctly handles various TXT file scenarios using the existing docling parser (version 2.83.0). - -**Key Point:** No code changes were needed - docling handles TXT files automatically via `parse_method="txt"`. - -## Tests Added - -### 1. Unit Tests in `test_lightrag_adapter.py` (5 new tests) - -#### `test_index_txt_file_success` -- **Purpose:** Verify successful .txt file indexing -- **What it tests:** - - Creates a temporary .txt file - - Mocks RAGAnything.process_document_complete - - Verifies FileIndexingResult has SUCCESS status - - Confirms parse_method="txt" is passed correctly - -#### `test_index_text_extension_success` -- **Purpose:** Test .text extension (alternative TXT format) -- **What it tests:** - - Creates a file with .text extension - - Verifies successful processing - - Confirms file_name is preserved correctly - -#### `test_index_empty_txt_file` -- **Purpose:** Edge case - empty text file -- **What it tests:** - - Creates an empty .txt file - - Verifies processing succeeds (edge case) - - Confirms process_document_complete is still called - -#### `test_index_large_txt_file` -- **Purpose:** Large file handling -- **What it tests:** - - Creates a ~500KB text file - - Verifies efficient processing - - Checks file path is passed correctly to docling - -#### `test_index_txt_with_various_encodings` -- **Purpose:** Encoding support -- **What it tests:** - - UTF-8 with Unicode characters (café, ñ, 北京) - - UTF-16 encoded files (你好) - - ASCII-only content - - Verifies all three are processed successfully - ---- - -### 2. Integration Tests in `test_index_file_use_case.py` (5 new tests) - -#### `test_index_txt_file_from_minio` -- **Purpose:** End-to-end test with mocked MinIO -- **What it tests:** - - Mocks storage.get_object for TXT content - - Verifies file download from MinIO - - Confirms file written to correct location - - Checks FileIndexingResult returned correctly - -#### `test_index_folder_with_txt_files` -- **Purpose:** Folder indexing including .txt files -- **What it tests:** - - Mocks folder with mixed file types (.txt, .pdf) - - Verifies all files are downloaded - - Checks FolderIndexingResult statistics - -#### `test_index_txt_file_with_nested_path` -- **Purpose:** Nested directory handling -- **What it tests:** - - .txt file in deep nested path - - Confirms directories are created - - Verifies correct file path handling - -#### `test_index_multiple_txt_files_sequentially` -- **Purpose:** Multiple file processing -- **What it tests:** - - Sequential indexing of multiple .txt files - - Chapter1.txt, Chapter2.txt, Chapter3.txt - - Verifies each file is processed independently - -#### `test_index_txt_with_special_characters_in_content` -- **Purpose:** Special character handling -- **What it tests:** - - Emojis 🎉 - - Quotes and newlines - - Tab characters - - Verifies content preservation through download and processing - ---- - -### 3. Integration Tests in `test_index_folder_use_case.py` (4 new tests) - -#### `test_index_folder_with_txt_files` -- **Purpose:** Folder with .txt files from MinIO -- **What it tests:** - - Downloads all .txt files from storage - - Verifies correct MinIO bucket/key usage - - Checks folder statistics - -#### `test_index_folder_with_file_extensions_filter_txt` -- **Purpose:** Filter by .txt extension -- **What it tests:** - - Uses file_extensions=[".txt"] filter - - Mocks storage with mixed files (.txt, .pdf, .xlsx) - - Verifies only .txt files are downloaded - - Confirms non-TXT files are skipped - -#### `test_index_folder_with_txt_and_other_extensions` -- **Purpose:** Mixed file extensions including .txt -- **What it tests:** - - file_extensions=[".txt", ".text"] - - Verifies both extensions are recognized - - Confirms .pdf, .xlsx are excluded - -#### `test_index_folder_recursive_with_txt_files` -- **Purpose:** Recursive folder indexing with .txt files -- **What it tests:** - - Non-recursive vs recursive mode - - Nested .txt files in subdirectories - - Verifies recursive flag is passed correctly - - Checks all nested files are processed - ---- - -## Test Patterns Followed - -### Real Implementation Pattern -```python -# ✅ Real adapters/services - for internal components -from infrastructure.rag.lightrag_adapter import LightRAGAdapter - -# ✅ Mocks - only for external boundaries -@patch("infrastructure.rag.lightrag_adapter.RAGAnything") -def test_index_txt_file_success(self, mock_rag_cls, ...): - adapter = LightRAGAdapter(llm_config, rag_config) - # Test with real adapter, mocked external RAGAnything -``` - -### Idempotent Tests -- Each test creates its own temporary files using `tmp_path` fixture -- Tests don't depend on existing data -- Tests are independent and isolated - -### AAA Pattern -```python -async def test_example(self, use_case, tmp_path): - # Arrange - txt_content = b"sample text" - use_case.storage.get_object.return_value = txt_content - - # Act - result = await use_case.execute(file_name="test.txt", ...) - - # Assert - assert result.status == IndexingStatus.SUCCESS -``` - ---- - -## Test Execution - -```bash -# Run all TXT-related tests -uv run python -m pytest tests/unit/ -v --no-cov -k "txt" - -# Run specific test file -uv run python -m pytest tests/unit/test_lightrag_adapter.py::TestLightRAGAdapter::test_index_txt_file_success -v - -# Run all tests -uv run python -m pytest tests/unit/ -v --no-cov -``` - ---- - -## Results -**Total Tests:** 73 (all passing) -- **New tests added:** 14 -- **Existing tests:** 59 (all still passing) - ---- - -## Key Insights - -1. **No Code Changes Needed:** Docling 2.83.0 handles TXT files automatically via `parse_method="txt"` - -2. **Proper Mocking:** Tests mock RAGAnything (external boundary) but use real LightRAGAdapter implementation - -3. **Encoding Support:** Tests verify UTF-8, UTF-16, and ASCII encoding handling - -4. **File System Integration:** Tests use `tmp_path` fixture for safe temporary file operations - -5. **Extension Handling:** Tests cover both `.txt` and `.text` extensions - -6. **Error Cases:** Tests include edge cases like empty files and large files - ---- - -## Conclusion - -All 14 new tests pass successfully alongside the existing 59 tests, providing comprehensive coverage for TXT file support without requiring any code changes to the production codebase. \ No newline at end of file diff --git a/tests/migrations/001_add_bm25_support.sql b/migrations/001_add_bm25_support.sql similarity index 100% rename from tests/migrations/001_add_bm25_support.sql rename to migrations/001_add_bm25_support.sql diff --git a/patch_docling_txt.py b/patch_docling_txt.py deleted file mode 100644 index 4874a99..0000000 --- a/patch_docling_txt.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -""" -Fix docling's _guess_format function to properly detect .txt files as MD format. - -Issue: docling's format detection returns None for .txt files instead of InputFormat.MD -Workaround: Patch the _guess_format function to map .txt/.text extensions to MD format. - -Based on PR #3161: https://github.com/docling-project/docling/pull/3161 -""" - -import sys - -def patch_docling(): - """Apply monkey-patch to docling's format detection.""" - print("📄 Patching docling format detection for TXT support...") - - # Import inside function to ensure packages are available - from docling.datamodel.document import InputFormat, FormatToExtensions - - # Add TXT extensions to MD format - txt_extensions = ['txt', 'text', 'qmd', 'rmd', 'Rmd'] - - # Get current MD extensions - current_md_extensions = FormatToExtensions.get(InputFormat.MD, []) - - # Add new extensions if not already present - for ext in txt_extensions: - if ext not in current_md_extensions: - current_md_extensions.append(ext) - - FormatToExtensions[InputFormat.MD] = current_md_extensions - - print(f"✅ Added TXT extensions to MD format: {txt_extensions}") - - # Now patch _guess_format - import docling.datamodel.document as doc_module - - # Get original function - if hasattr(doc_module, '_guess_format'): - original_guess = doc_module._guess_format - else: - print("⚠️ _guess_format not found, skipping monkey-patch") - return True - - def patched_guess_format(file_path, allowed_formats=None): - """Version of _guess_format that detects .txt files as MD.""" - from pathlib import Path - from docling.datamodel.document import InputFormat - - path = Path(file_path) - ext = path.suffix.lower().lstrip('.') - - # Map TXT extensions to MD format - if ext in ['txt', 'text', 'qmd', 'rmd', 'Rmd']: - result = InputFormat.MD - if allowed_formats is None or result in allowed_formats: - return result - - # Call original for other formats - return original_guess(file_path, allowed_formats) - - # Apply patch - doc_module._guess_format = patched_guess_format - - print("✅ Monkey-patched _guess_format to handle TXT files") - return True - -if __name__ == "__main__": - try: - patch_docling() - print("✅ Docling TXT support patch applied successfully!") - sys.exit(0) - except Exception as e: - print(f"❌ Failed to apply patch: {e}") - import traceback - traceback.print_exc() - sys.exit(1) \ No newline at end of file diff --git a/patch_raganything.py b/patch_raganything.py deleted file mode 100644 index 9960690..0000000 --- a/patch_raganything.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 -""" -Patch RAGAnything parser.py to support TXT files. -This script patches the installed RAGAnything library to accept .txt, .text, and .md files. - -RAGAnything's DoclingParser rejects TXT files even though docling 2.84.0 supports them. -This patch routes TXT files to the existing office document parser which calls docling. -""" - -import sys -from pathlib import Path - -def patch_raganything(): - """Patch RAGAnything's DoclingParser to support TXT files.""" - - # Find raganything installation - try: - import raganything - parser_file = Path(raganything.__file__).parent / "parser.py" - except ImportError: - print("❌ RAGAnything not found") - return False - - if not parser_file.exists(): - print(f"❌ Parser file not found: {parser_file}") - return False - - print(f"📄 Patching: {parser_file}") - - with open(parser_file, 'r') as f: - content = f.read() - - # Check if already patched - if 'TXT files are supported by docling' in content: - print("✅ Already patched!") - return True - - # Find and patch the format check in DoclingParser.parse_document - old_code = """ elif ext in self.HTML_FORMATS: - return self.parse_html(file_path, output_dir, lang, **kwargs) - else: - raise ValueError( - f"Unsupported file format: {ext}. " - f"Docling only supports PDF files, Office formats ({', '.join(self.OFFICE_FORMATS)}) " - f"and HTML formats ({', '.join(self.HTML_FORMATS)})" - )""" - - new_code = """ elif ext in self.HTML_FORMATS: - return self.parse_html(file_path, output_dir, lang, **kwargs) - elif ext in {".txt", ".text", ".md"}: - # TXT files are supported by docling via MarkdownDocumentBackend (PR #3161) - # Docling 2.84.0+ handles these natively - treat as MD and route to docling - # Use parse_office_doc which calls DocumentConverter.convert() - return self.parse_office_doc(file_path, output_dir, lang, **kwargs) - else: - raise ValueError( - f"Unsupported file format: {ext}. " - f"Docling only supports PDF files, Office formats ({', '.join(self.OFFICE_FORMATS)}) " - f"and HTML formats ({', '.join(self.HTML_FORMATS)})" - )""" - - if old_code not in content: - print("❌ Patch pattern not found - RAGAnything may have changed") - print(" Searching for alternative pattern...") - - # Try alternative pattern - alt_old = "elif ext in self.HTML_FORMATS:" - alt_new = """elif ext in self.HTML_FORMATS: - return self.parse_html(file_path, output_dir, lang, **kwargs) - elif ext in {".txt", ".text", ".md"}: - # TXT files supported by docling via MarkdownDocumentBackend - return self.parse_office_doc(file_path, output_dir, lang, **kwargs) - elif ext in self.HTML_FORMATS:""" - - if alt_old in content: - print(" Found alternative pattern, applying patch...") - content = content.replace(alt_old, alt_new, 1) - else: - print("❌ Could not find any pattern to patch") - return False - else: - content = content.replace(old_code, new_code) - - # Write patched content - with open(parser_file, 'w') as f: - f.write(content) - - print("✅ RAGAnything patched successfully!") - print(" TXT files (.txt, .text, .md) are now supported") - return True - -if __name__ == "__main__": - success = patch_raganything() - sys.exit(0 if success else 1) \ No newline at end of file diff --git a/src/application/use_cases/query_use_case.py b/src/application/use_cases/query_use_case.py index cd76407..3467adf 100644 --- a/src/application/use_cases/query_use_case.py +++ b/src/application/use_cases/query_use_case.py @@ -149,10 +149,8 @@ def _format_hybrid_results(self, results: list) -> dict: "content": r.content, "file_path": r.file_path, "score": r.combined_score, - "bm25_rank": r.bm25_rank if hasattr(r, "bm25_rank") else None, - "vector_rank": r.vector_rank - if hasattr(r, "vector_rank") - else None, + "bm25_rank": r.bm25_rank, + "vector_rank": r.vector_rank, "combined_score": r.combined_score, "metadata": r.metadata, } diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py index a5bc406..9685b0c 100644 --- a/src/infrastructure/bm25/pg_textsearch_adapter.py +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -111,15 +111,12 @@ async def search( ) for row in results ] - except asyncpg.PostgresError as e: + except Exception as e: logger.error( f"BM25 search failed: {e}", extra={"query": query, "working_dir": working_dir}, ) raise - except Exception as e: - logger.error(f"Unexpected error in BM25 search: {e}") - raise async def index_document( self, @@ -160,11 +157,8 @@ async def index_document( working_dir, metadata or {}, ) - except asyncpg.PostgresError as e: - logger.error(f"BM25 document indexing failed: {e}", extra={"chunk_id": chunk_id}) - raise except Exception as e: - logger.error(f"Unexpected error in BM25 indexing: {e}") + logger.error(f"BM25 document indexing failed: {e}", extra={"chunk_id": chunk_id}) raise async def create_index(self, working_dir: str) -> None: @@ -191,11 +185,8 @@ async def create_index(self, working_dir: str) -> None: """, working_dir, ) - except asyncpg.PostgresError as e: - logger.error(f"BM25 index creation failed: {e}", extra={"working_dir": working_dir}) - raise except Exception as e: - logger.error(f"Unexpected error in BM25 index creation: {e}") + logger.error(f"BM25 index creation failed: {e}", extra={"working_dir": working_dir}) raise async def drop_index(self, working_dir: str) -> None: @@ -213,9 +204,6 @@ async def drop_index(self, working_dir: str) -> None: "UPDATE chunks SET content_tsv = NULL WHERE working_dir = $1", working_dir, ) - except asyncpg.PostgresError as e: - logger.error(f"BM25 index drop failed: {e}", extra={"working_dir": working_dir}) - raise except Exception as e: - logger.error(f"Unexpected error in BM25 index drop: {e}") + logger.error(f"BM25 index drop failed: {e}", extra={"working_dir": working_dir}) raise diff --git a/src/infrastructure/hybrid/rrf_combiner.py b/src/infrastructure/hybrid/rrf_combiner.py index 916c648..631ebe9 100644 --- a/src/infrastructure/hybrid/rrf_combiner.py +++ b/src/infrastructure/hybrid/rrf_combiner.py @@ -127,8 +127,4 @@ def combine( # Sort by combined score (descending) results.sort(key=lambda x: x.combined_score, reverse=True) - # Return top_k results (handle edge case) - if top_k < 1: - return [] - return results[:top_k] From 40c557410183e59a00c41c154f91f5c2728e7d67 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 20:28:17 +0200 Subject: [PATCH 03/17] feat: Add Alembic migrations and lifespan for BM25 support (BRIC-7) - Add alembic>=1.13.0 dependency - Create src/alembic.ini configuration - Create src/alembic/env.py for async migrations (no models, raw SQL) - Create src/alembic/versions/001_add_bm25_support.py migration - Add lifespan to main.py for all transport modes (stdio/sse/streamable) - Run migrations at startup via asyncio.to_thread() - Close BM25 adapter pool on shutdown - Add close() method to BM25EnginePort interface - Remove migrations/001_add_bm25_support.sql (converted to Alembic) --- .gitignore | 3 +- migrations/001_add_bm25_support.sql | 176 - pyproject.toml | 1 + src/alembic.ini | 37 + src/alembic/env.py | 87 + src/alembic/versions/001_add_bm25_support.py | 77 + src/domain/ports/bm25_engine.py | 8 + src/main.py | 69 +- trivy-report-current.json | 4276 +----------------- 9 files changed, 412 insertions(+), 4322 deletions(-) delete mode 100644 migrations/001_add_bm25_support.sql create mode 100644 src/alembic.ini create mode 100644 src/alembic/env.py create mode 100644 src/alembic/versions/001_add_bm25_support.py diff --git a/.gitignore b/.gitignore index 4948b94..bff2863 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,5 @@ trivy-report.json trivy-report-fixed.json coverage.xml .ruff_cache -.pytest_cache \ No newline at end of file +.pytest_cache +trivy-report-current.json \ No newline at end of file diff --git a/migrations/001_add_bm25_support.sql b/migrations/001_add_bm25_support.sql deleted file mode 100644 index 63fb589..0000000 --- a/migrations/001_add_bm25_support.sql +++ /dev/null @@ -1,176 +0,0 @@ --- Migration: Add BM25 support via pg_textsearch --- Version: 001 --- Date: 2026-04-07 --- Jira: BRIC-7 - --- This migration adds BM25 full-text search capability using the pg_textsearch extension --- It creates tsvector columns and BM25 indexes for chunk content - --- ======================================== --- Prerequisites: Install pg_textsearch extension --- ======================================== - --- Install pg_textsearch extension (requires superuser privileges) --- Run this in PostgreSQL: --- CREATE EXTENSION IF NOT EXISTS pg_textsearch; - --- If pg_textsearch is not available, you can install it from: --- https://github.com/timescale/pg_textsearch - --- ======================================== --- Step 1: Add tsvector column to chunks table --- ======================================== - --- Add tsvector column for BM25 indexing -ALTER TABLE chunks ADD COLUMN IF NOT EXISTS content_tsv tsvector; - --- ======================================== --- Step 2: Create GIN index for tsvector operations --- ======================================== - --- Create GIN index for tsvector column (used for @@ and @@@ operators) -CREATE INDEX IF NOT EXISTS idx_chunks_content_tsv -ON chunks USING GIN(content_tsv); - --- ======================================== --- Step 3: Create BM25 index using pg_textsearch --- ======================================== - --- Create BM25 index using pg_textsearch <@> operator --- Note: This requires pg_textsearch extension to be installed --- If extension is not available, this will be skipped -DO $$ -BEGIN - -- Check if pg_textsearch extension is available - IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_textsearch') THEN - -- Create BM25 index for content column - -- Uses 'english' text search configuration - -- The <@> operator will be available for BM25 ranking - CREATE INDEX IF NOT EXISTS idx_chunks_bm25 - ON chunks USING bm25(content) - WITH (text_config='english'); - - RAISE NOTICE 'BM25 index created successfully'; - ELSE - RAISE NOTICE 'pg_textsearch extension not found. BM25 index creation skipped.'; - RAISE NOTICE 'Install pg_textsearch and run: CREATE EXTENSION pg_textsearch;'; - END IF; -END $$; - --- ======================================== --- Step 4: Create auto-update trigger for tsvector --- ======================================== - --- Create function to auto-update tsvector on INSERT/UPDATE -CREATE OR REPLACE FUNCTION update_chunks_tsv() -RETURNS TRIGGER AS $$ -BEGIN - -- Update tsvector column with English text configuration - -- This uses PostgreSQL's built-in to_tsvector function - NEW.content_tsv := to_tsvector('english', COALESCE(NEW.content, '')); - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - --- Create trigger for auto-indexing -DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks; -CREATE TRIGGER trg_chunks_content_tsv - BEFORE INSERT OR UPDATE ON chunks - FOR EACH ROW EXECUTE FUNCTION update_chunks_tsv(); - --- ======================================== --- Step 5: Backfill existing documents --- ======================================== - --- Backfill tsvector for existing documents (if any) -UPDATE chunks -SET content_tsv = to_tsvector('english', COALESCE(content, '')) -WHERE content_tsv IS NULL; - --- ======================================== --- Step 6: Verify migration --- ======================================== - --- Verify indexes were created -SELECT - schemaname, - tablename, - indexname, - indexdef -FROM pg_indexes -WHERE tablename = 'chunks' - AND (indexname LIKE '%tsv%' OR indexname LIKE '%bm25%') -ORDER BY indexname; - --- Verify trigger exists -SELECT - trigger_name, - event_manipulation, - action_timing, - action_statement -FROM information_schema.triggers -WHERE event_object_table = 'chunks' - AND trigger_name = 'trg_chunks_content_tsv'; - --- ======================================== --- Step 7 (Optional): Performance stats --- ======================================== - --- Check BM25 index usage (run after some queries) --- SELECT * FROM pg_stat_user_indexes WHERE indexrelid::regclass::text LIKE '%bm25%'; - --- ======================================== --- Rollback instructions (if needed) --- ======================================== - -/* --- To rollback this migration: -DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks; -DROP FUNCTION IF EXISTS update_chunks_tsv(); -DROP INDEX IF EXISTS idx_chunks_bm25; -DROP INDEX IF EXISTS idx_chunks_content_tsv; -ALTER TABLE chunks DROP COLUMN IF EXISTS content_tsv; -*/ - --- ======================================== --- Usage examples --- ======================================== - -/* --- Example 1: Basic BM25 search using pg_textsearch -SELECT - chunk_id, - content, - file_path, - content <@> websearch_to_tsquery('english', 'PostgreSQL database') AS score -FROM chunks -WHERE content_tsv @@ websearch_to_tsquery('english', 'PostgreSQL database') -ORDER BY score -LIMIT 10; - --- Example 2: BM25 search with working_dir filter -SELECT - chunk_id, - content, - file_path, - content <@> websearch_to_tsquery('english', 'search terms') AS score -FROM chunks -WHERE working_dir = 'your-project' - AND content_tsv @@ websearch_to_tsquery('english', 'search terms') -ORDER BY score -LIMIT 10; - --- Example 3: Traditional full-text search (without BM25 ranking) -SELECT - chunk_id, - content, - file_path, - ts_rank_cd(content_tsv, websearch_to_tsquery('english', 'search terms')) AS score -FROM chunks -WHERE content_tsv @@ websearch_to_tsquery('english', 'search terms') -ORDER BY score DESC -LIMIT 10; - --- Note: The <@> operator returns negative scores (lower is better) --- Convert to positive: ABS(content <@> query) -*/ \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 14e21c0..829ded9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,6 +6,7 @@ readme = "README.md" requires-python = ">=3.13" dependencies = [ "aiofiles>=24.1.0", + "alembic>=1.13.0", "asyncpg>=0.31.0", "docling>=2.84.0", "fastapi>=0.124.0", diff --git a/src/alembic.ini b/src/alembic.ini new file mode 100644 index 0000000..86d775d --- /dev/null +++ b/src/alembic.ini @@ -0,0 +1,37 @@ +[alembic] +script_location = %(here)s/alembic +prepend_sys_path = . + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S \ No newline at end of file diff --git a/src/alembic/env.py b/src/alembic/env.py new file mode 100644 index 0000000..d93846b --- /dev/null +++ b/src/alembic/env.py @@ -0,0 +1,87 @@ +"""Alembic migration environment for asyncpg (no SQLAlchemy models).""" + +import asyncio +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.ext.asyncio import async_engine_from_config + +from alembic import context + +from config import DatabaseConfig + +config = context.config + +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +# No models - using raw SQL migrations with op.execute() +target_metadata = None + + +def get_url() -> str: + """Build the database URL from application settings. + + Returns synchronous URL for Alembic (remove +asyncpg driver). + """ + db_config = DatabaseConfig() + # Convert async URL to sync URL for Alembic + # postgresql+asyncpg:// -> postgresql:// + url = db_config.DATABASE_URL + if "+asyncpg" in url: + url = url.replace("+asyncpg", "") + return url + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + Configures the context with just a URL and not an Engine. + Calls to context.execute() emit the given string to the script output. + """ + url = get_url() + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection) -> None: + """Run migrations within a synchronous connection callback.""" + context.configure(connection=connection, target_metadata=target_metadata) + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """Run migrations in 'online' mode with an async engine.""" + configuration = config.get_section(config.config_ini_section, {}) + configuration["sqlalchemy.url"] = get_url() + + connectable = async_engine_from_config( + configuration, + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() \ No newline at end of file diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py new file mode 100644 index 0000000..934f3a3 --- /dev/null +++ b/src/alembic/versions/001_add_bm25_support.py @@ -0,0 +1,77 @@ +"""Add BM25 support via pg_textsearch + +Revision ID: 001 +Revises: +Create Date: 2026-04-07 + +""" +from collections.abc import Sequence + +from alembic import op + +revision: str = "001" +down_revision: str | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + """Add tsvector column, indexes, and trigger for BM25 search.""" + # Add tsvector column + op.execute("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS content_tsv tsvector") + + # Create GIN index for tsvector + op.execute( + "CREATE INDEX IF NOT EXISTS idx_chunks_content_tsv ON chunks USING GIN(content_tsv)" + ) + + # Create BM25 index (conditional on pg_textsearch extension) + op.execute( + """ + DO $$ + BEGIN + IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_textsearch') THEN + CREATE INDEX IF NOT EXISTS idx_chunks_bm25 + ON chunks USING bm25(content) + WITH (text_config='english'); + END IF; + END $$; + """ + ) + + # Create auto-update trigger function + op.execute( + """ + CREATE OR REPLACE FUNCTION update_chunks_tsv() + RETURNS TRIGGER AS $$ + BEGIN + NEW.content_tsv := to_tsvector('english', COALESCE(NEW.content, '')); + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + """ + ) + + # Create trigger + op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks") + op.execute( + """ + CREATE TRIGGER trg_chunks_content_tsv + BEFORE INSERT OR UPDATE ON chunks + FOR EACH ROW EXECUTE FUNCTION update_chunks_tsv(); + """ + ) + + # Backfill existing documents + op.execute( + "UPDATE chunks SET content_tsv = to_tsvector('english', COALESCE(content, '')) WHERE content_tsv IS NULL" + ) + + +def downgrade() -> None: + """Remove BM25 support.""" + op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks") + op.execute("DROP FUNCTION IF EXISTS update_chunks_tsv()") + op.execute("DROP INDEX IF EXISTS idx_chunks_bm25") + op.execute("DROP INDEX IF EXISTS idx_chunks_content_tsv") + op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS content_tsv") \ No newline at end of file diff --git a/src/domain/ports/bm25_engine.py b/src/domain/ports/bm25_engine.py index 215092b..9e6bbb8 100644 --- a/src/domain/ports/bm25_engine.py +++ b/src/domain/ports/bm25_engine.py @@ -75,3 +75,11 @@ async def drop_index(self, working_dir: str) -> None: working_dir: Project/workspace directory """ pass + + @abstractmethod + async def close(self) -> None: + """Close connection pool and cleanup resources. + + Called during application shutdown. + """ + pass diff --git a/src/main.py b/src/main.py index 9813bd5..534f607 100644 --- a/src/main.py +++ b/src/main.py @@ -2,10 +2,15 @@ Simplified following hexagonal architecture pattern from pickpro_indexing_api. """ +import asyncio import logging import threading +from contextlib import asynccontextmanager +from pathlib import Path import uvicorn +from alembic import command +from alembic.config import Config from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -13,22 +18,75 @@ from application.api.indexing_routes import indexing_router from application.api.mcp_tools import mcp from application.api.query_routes import query_router -from dependencies import app_config +from dependencies import app_config, bm25_adapter logger = logging.getLogger(__name__) - MCP_PATH = "/mcp" + +def _run_alembic_upgrade() -> None: + """Run Alembic migrations to head synchronously. + + Designed to be called via asyncio.to_thread() during startup. + """ + alembic_dir = Path(__file__).parent + cfg = Config(str(alembic_dir / "alembic.ini")) + cfg.set_main_option("script_location", str(alembic_dir / "alembic")) + command.upgrade(cfg, "head") + + +@asynccontextmanager +async def db_lifespan(_app: FastAPI): + """Database migrations and cleanup lifecycle. + + - Runs Alembic migrations on startup + - Closes BM25 connection pool on shutdown + """ + logger.info("Application startup initiated") + + # Run database migrations + try: + logger.info("Running database migrations...") + await asyncio.to_thread(_run_alembic_upgrade) + logger.info("Database migrations completed") + except Exception: + logger.exception("Failed to run migrations") + + yield + + # Cleanup on shutdown + logger.info("Application shutdown initiated") + if bm25_adapter is not None: + try: + await bm25_adapter.close() + logger.info("BM25 connection pool closed") + except Exception: + logger.exception("Failed to close BM25 adapter") + logger.info("Application shutdown complete") + + +# Create FastAPI app with appropriate lifespan if app_config.MCP_TRANSPORT == "streamable": mcp_app = mcp.http_app(path="/") + + @asynccontextmanager + async def combined_lifespan(app: FastAPI): + """Combine database lifecycle with MCP lifecycle for streamable transport.""" + async with db_lifespan(app): + async with mcp_app.lifespan(app): + yield + app = FastAPI( title="RAG Anything API", - lifespan=mcp_app.lifespan, + lifespan=combined_lifespan, ) app.mount(MCP_PATH, mcp_app) else: - app = FastAPI(title="RAG Anything API") + app = FastAPI( + title="RAG Anything API", + lifespan=db_lifespan, + ) app.add_middleware( CORSMiddleware, @@ -50,6 +108,7 @@ def run_fastapi(): + """Run FastAPI server with uvicorn.""" uvicorn.run( app, host=app_config.HOST, @@ -66,4 +125,4 @@ def run_fastapi(): api_thread.start() mcp.run(transport="stdio") else: - run_fastapi() + run_fastapi() \ No newline at end of file diff --git a/trivy-report-current.json b/trivy-report-current.json index 64313ea..94ab5a9 100644 --- a/trivy-report-current.json +++ b/trivy-report-current.json @@ -1,4140 +1,136 @@ -{ - "SchemaVersion": 2, - "Trivy": { - "Version": "0.69.3" - }, - "ReportID": "019d67a8-03be-7bc3-96ad-233658ba2bdf", - "CreatedAt": "2026-04-07T13:15:57.502773+02:00", - "ArtifactID": "sha256:44de1297411e46aa253b4587f1a66eb89f732fd7bd66822db54093a6f7fc28ca", - "ArtifactName": ".", - "ArtifactType": "repository", - "Metadata": { - "RepoURL": "https://github.com/Kaiohz/mcp-raganything.git", - "Branch": "main", - "Commit": "ef601f6d3a8415d3a8329292e645d2e3d8c0e8a9", - "CommitMsg": "fix: remove conditional on SonarQube CI step, secrets are now configured (#5)", - "Author": "Yohan Gonçalves \u003cyohan.goncalves.pro@gmail.com\u003e", - "Committer": "GitHub \u003cnoreply@github.com\u003e" - }, - "Results": [ - { - "Target": "Python", - "Class": "lang-pkgs", - "Type": "python-pkg", - "Packages": [ - { - "Name": "my-test-package", - "Identifier": { - "PURL": "pkg:pypi/my-test-package@1.0", - "UID": "cf354c804175f1b1" - }, - "Version": "1.0", - "Licenses": [ - "UNKNOWN" - ], - "FilePath": ".venv/lib/python3.13/site-packages/pkg_resources/tests/data/my-test-package_zipped-egg/my_test_package-1.0-py3.7.egg", - "AnalyzedBy": "python-egg" - } - ] - }, - { - "Target": "uv.lock", - "Class": "lang-pkgs", - "Type": "uv", - "Packages": [ - { - "ID": "mcp-raganything@0.1.0", - "Name": "mcp-raganything", - "Identifier": { - "PURL": "pkg:pypi/mcp-raganything@0.1.0", - "UID": "27f0726934767eb7" - }, - "Version": "0.1.0", - "Relationship": "root", - "DependsOn": [ - "aiofiles@24.1.0", - "asyncpg@0.31.0", - "authlib@1.6.9", - "cryptography@46.0.6", - "docling@2.83.0", - "fastapi@0.135.3", - "fastmcp@3.2.0", - "httpx@0.28.1", - "lightrag-hku@1.4.12", - "mcp@1.26.0", - "minio@7.2.20", - "openai@2.30.0", - "pgvector@0.4.2", - "pydantic-settings@2.13.1", - "python-dotenv@1.2.2", - "python-multipart@0.0.22", - "raganything@1.2.10", - "sqlalchemy@2.0.48", - "uvicorn@0.42.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "aiofiles@24.1.0", - "Name": "aiofiles", - "Identifier": { - "PURL": "pkg:pypi/aiofiles@24.1.0", - "UID": "9061ccdb2ece9fc" - }, - "Version": "24.1.0", - "Relationship": "direct", - "AnalyzedBy": "uv" - }, - { - "ID": "asyncpg@0.31.0", - "Name": "asyncpg", - "Identifier": { - "PURL": "pkg:pypi/asyncpg@0.31.0", - "UID": "df2d0b70f811bf4" - }, - "Version": "0.31.0", - "Relationship": "direct", - "AnalyzedBy": "uv" - }, - { - "ID": "authlib@1.6.9", - "Name": "authlib", - "Identifier": { - "PURL": "pkg:pypi/authlib@1.6.9", - "UID": "8174ba847ea6fde3" - }, - "Version": "1.6.9", - "Relationship": "direct", - "DependsOn": [ - "cryptography@46.0.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "cryptography@46.0.6", - "Name": "cryptography", - "Identifier": { - "PURL": "pkg:pypi/cryptography@46.0.6", - "UID": "f0b0a1ef450abe6f" - }, - "Version": "46.0.6", - "Relationship": "direct", - "DependsOn": [ - "cffi@2.0.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "docling@2.83.0", - "Name": "docling", - "Identifier": { - "PURL": "pkg:pypi/docling@2.83.0", - "UID": "8c0d76ec334dff22" - }, - "Version": "2.83.0", - "Relationship": "direct", - "DependsOn": [ - "accelerate@1.13.0", - "beautifulsoup4@4.14.3", - "certifi@2026.2.25", - "defusedxml@0.7.1", - "docling-core@2.71.0", - "docling-ibm-models@3.13.0", - "docling-parse@5.7.0", - "filetype@1.2.0", - "huggingface-hub@0.36.2", - "lxml@6.0.2", - "marko@2.2.2", - "ocrmac@1.0.1", - "openpyxl@3.1.5", - "pandas@2.3.3", - "pillow@12.2.0", - "pluggy@1.6.0", - "polyfactory@3.3.0", - "pydantic-settings@2.13.1", - "pydantic@2.12.5", - "pylatexenc@2.10", - "pypdfium2@4.30.0", - "python-docx@1.2.0", - "python-pptx@1.0.2", - "rapidocr@3.7.0", - "requests@2.33.1", - "rtree@1.4.1", - "scipy@1.17.1", - "torch@2.11.0", - "torchvision@0.26.0", - "tqdm@4.67.3", - "typer@0.21.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "fastapi@0.135.3", - "Name": "fastapi", - "Identifier": { - "PURL": "pkg:pypi/fastapi@0.135.3", - "UID": "b8f9db11f45aab2c" - }, - "Version": "0.135.3", - "Relationship": "direct", - "DependsOn": [ - "annotated-doc@0.0.4", - "pydantic@2.12.5", - "starlette@0.52.1", - "typing-extensions@4.15.0", - "typing-inspection@0.4.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "fastmcp@3.2.0", - "Name": "fastmcp", - "Identifier": { - "PURL": "pkg:pypi/fastmcp@3.2.0", - "UID": "135907ad364ec8d" - }, - "Version": "3.2.0", - "Relationship": "direct", - "DependsOn": [ - "authlib@1.6.9", - "cyclopts@4.10.1", - "exceptiongroup@1.3.1", - "httpx@0.28.1", - "jsonref@1.1.0", - "jsonschema-path@0.4.5", - "mcp@1.26.0", - "openapi-pydantic@0.5.1", - "opentelemetry-api@1.40.0", - "packaging@26.0", - "platformdirs@4.9.4", - "py-key-value-aio@0.4.4", - "pydantic@2.12.5", - "pyperclip@1.11.0", - "python-dotenv@1.2.2", - "pyyaml@6.0.3", - "rich@14.3.3", - "uncalled-for@0.2.0", - "uvicorn@0.42.0", - "watchfiles@1.1.1", - "websockets@16.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "httpx@0.28.1", - "Name": "httpx", - "Identifier": { - "PURL": "pkg:pypi/httpx@0.28.1", - "UID": "4ed3fa1d663e107" - }, - "Version": "0.28.1", - "Relationship": "direct", - "DependsOn": [ - "anyio@4.13.0", - "certifi@2026.2.25", - "httpcore@1.0.9", - "idna@3.11" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "lightrag-hku@1.4.12", - "Name": "lightrag-hku", - "Identifier": { - "PURL": "pkg:pypi/lightrag-hku@1.4.12", - "UID": "70958c7c95dc56d7" - }, - "Version": "1.4.12", - "Relationship": "direct", - "DependsOn": [ - "aiofiles@24.1.0", - "aiohttp@3.13.5", - "ascii-colors@0.11.21", - "bcrypt@5.0.0", - "configparser@7.2.0", - "distro@1.9.0", - "fastapi@0.135.3", - "google-api-core@2.30.1", - "google-genai@1.70.0", - "gunicorn@25.3.0", - "httpcore@1.0.9", - "httpx@0.28.1", - "jiter@0.13.0", - "json-repair@0.58.7", - "nano-vectordb@0.0.4.3", - "networkx@3.6.1", - "numpy@2.4.4", - "openai@2.30.0", - "openpyxl@3.1.5", - "packaging@26.0", - "pandas@2.3.3", - "pipmaster@1.1.2", - "psutil@7.2.2", - "pycryptodome@3.23.0", - "pydantic@2.12.5", - "pyjwt@2.12.1", - "pypdf@6.9.2", - "pypinyin@0.55.0", - "python-docx@1.2.0", - "python-dotenv@1.2.2", - "python-jose@3.5.0", - "python-multipart@0.0.22", - "python-pptx@1.0.2", - "pytz@2026.1.post1", - "setuptools@81.0.0", - "tenacity@9.1.4", - "tiktoken@0.12.0", - "uvicorn@0.42.0", - "xlsxwriter@3.2.9" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "mcp@1.26.0", - "Name": "mcp", - "Identifier": { - "PURL": "pkg:pypi/mcp@1.26.0", - "UID": "b7e186261ef5e5a2" - }, - "Version": "1.26.0", - "Relationship": "direct", - "DependsOn": [ - "anyio@4.13.0", - "httpx-sse@0.4.3", - "httpx@0.28.1", - "jsonschema@4.26.0", - "pydantic-settings@2.13.1", - "pydantic@2.12.5", - "pyjwt@2.12.1", - "python-multipart@0.0.22", - "pywin32@311", - "sse-starlette@3.3.4", - "starlette@0.52.1", - "typing-extensions@4.15.0", - "typing-inspection@0.4.2", - "uvicorn@0.42.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "minio@7.2.20", - "Name": "minio", - "Identifier": { - "PURL": "pkg:pypi/minio@7.2.20", - "UID": "3122577c5b65260c" - }, - "Version": "7.2.20", - "Relationship": "direct", - "DependsOn": [ - "argon2-cffi@25.1.0", - "certifi@2026.2.25", - "pycryptodome@3.23.0", - "typing-extensions@4.15.0", - "urllib3@2.6.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "openai@2.30.0", - "Name": "openai", - "Identifier": { - "PURL": "pkg:pypi/openai@2.30.0", - "UID": "933f799c5d31b203" - }, - "Version": "2.30.0", - "Relationship": "direct", - "DependsOn": [ - "anyio@4.13.0", - "distro@1.9.0", - "httpx@0.28.1", - "jiter@0.13.0", - "pydantic@2.12.5", - "sniffio@1.3.1", - "tqdm@4.67.3", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pgvector@0.4.2", - "Name": "pgvector", - "Identifier": { - "PURL": "pkg:pypi/pgvector@0.4.2", - "UID": "d5952725effd5422" - }, - "Version": "0.4.2", - "Relationship": "direct", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pydantic-settings@2.13.1", - "Name": "pydantic-settings", - "Identifier": { - "PURL": "pkg:pypi/pydantic-settings@2.13.1", - "UID": "c68fc34a54e862eb" - }, - "Version": "2.13.1", - "Relationship": "direct", - "DependsOn": [ - "pydantic@2.12.5", - "python-dotenv@1.2.2", - "typing-inspection@0.4.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "python-dotenv@1.2.2", - "Name": "python-dotenv", - "Identifier": { - "PURL": "pkg:pypi/python-dotenv@1.2.2", - "UID": "a6b7b624c50e71e1" - }, - "Version": "1.2.2", - "Relationship": "direct", - "AnalyzedBy": "uv" - }, - { - "ID": "python-multipart@0.0.22", - "Name": "python-multipart", - "Identifier": { - "PURL": "pkg:pypi/python-multipart@0.0.22", - "UID": "bab34132ffe25a1c" - }, - "Version": "0.0.22", - "Relationship": "direct", - "AnalyzedBy": "uv" - }, - { - "ID": "raganything@1.2.10", - "Name": "raganything", - "Identifier": { - "PURL": "pkg:pypi/raganything@1.2.10", - "UID": "1ab5e428ac85ffcf" - }, - "Version": "1.2.10", - "Relationship": "direct", - "DependsOn": [ - "huggingface-hub@0.36.2", - "lightrag-hku@1.4.12", - "mineru@3.0.7", - "tqdm@4.67.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "sqlalchemy@2.0.48", - "Name": "sqlalchemy", - "Identifier": { - "PURL": "pkg:pypi/sqlalchemy@2.0.48", - "UID": "b8cda2dddee14f15" - }, - "Version": "2.0.48", - "Relationship": "direct", - "DependsOn": [ - "greenlet@3.3.2", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "uvicorn@0.42.0", - "Name": "uvicorn", - "Identifier": { - "PURL": "pkg:pypi/uvicorn@0.42.0", - "UID": "583a24def9762550" - }, - "Version": "0.42.0", - "Relationship": "direct", - "DependsOn": [ - "click@8.3.1", - "h11@0.16.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "accelerate@1.13.0", - "Name": "accelerate", - "Identifier": { - "PURL": "pkg:pypi/accelerate@1.13.0", - "UID": "30504da4809e4977" - }, - "Version": "1.13.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "huggingface-hub@0.36.2", - "numpy@2.4.4", - "packaging@26.0", - "psutil@7.2.2", - "pyyaml@6.0.3", - "safetensors@0.7.0", - "torch@2.11.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "aiofile@3.9.0", - "Name": "aiofile", - "Identifier": { - "PURL": "pkg:pypi/aiofile@3.9.0", - "UID": "bad121722fce55ed" - }, - "Version": "3.9.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "caio@0.9.25" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "aiohappyeyeballs@2.6.1", - "Name": "aiohappyeyeballs", - "Identifier": { - "PURL": "pkg:pypi/aiohappyeyeballs@2.6.1", - "UID": "3c1d60f4dcdea8c1" - }, - "Version": "2.6.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "aiohttp@3.13.5", - "Name": "aiohttp", - "Identifier": { - "PURL": "pkg:pypi/aiohttp@3.13.5", - "UID": "c48520515f2c120" - }, - "Version": "3.13.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "aiohappyeyeballs@2.6.1", - "aiosignal@1.4.0", - "attrs@26.1.0", - "frozenlist@1.8.0", - "multidict@6.7.1", - "propcache@0.4.1", - "yarl@1.23.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "aiosignal@1.4.0", - "Name": "aiosignal", - "Identifier": { - "PURL": "pkg:pypi/aiosignal@1.4.0", - "UID": "3951a8e4c265af22" - }, - "Version": "1.4.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "frozenlist@1.8.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "albucore@0.0.24", - "Name": "albucore", - "Identifier": { - "PURL": "pkg:pypi/albucore@0.0.24", - "UID": "24507b4c0f5dbecf" - }, - "Version": "0.0.24", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4", - "opencv-python-headless@4.13.0.92", - "simsimd@6.5.16", - "stringzilla@4.6.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "albumentations@2.0.8", - "Name": "albumentations", - "Identifier": { - "PURL": "pkg:pypi/albumentations@2.0.8", - "UID": "d08ec9cf236dc4f0" - }, - "Version": "2.0.8", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "albucore@0.0.24", - "numpy@2.4.4", - "opencv-python-headless@4.13.0.92", - "pydantic@2.12.5", - "pyyaml@6.0.3", - "scipy@1.17.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "annotated-doc@0.0.4", - "Name": "annotated-doc", - "Identifier": { - "PURL": "pkg:pypi/annotated-doc@0.0.4", - "UID": "20932edb8023b337" - }, - "Version": "0.0.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "annotated-types@0.7.0", - "Name": "annotated-types", - "Identifier": { - "PURL": "pkg:pypi/annotated-types@0.7.0", - "UID": "a4a7cc319376fb9e" - }, - "Version": "0.7.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "antlr4-python3-runtime@4.9.3", - "Name": "antlr4-python3-runtime", - "Identifier": { - "PURL": "pkg:pypi/antlr4-python3-runtime@4.9.3", - "UID": "e7dcfed38b17f332" - }, - "Version": "4.9.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "anyio@4.13.0", - "Name": "anyio", - "Identifier": { - "PURL": "pkg:pypi/anyio@4.13.0", - "UID": "261d4f4bef650b14" - }, - "Version": "4.13.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "idna@3.11" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "argon2-cffi@25.1.0", - "Name": "argon2-cffi", - "Identifier": { - "PURL": "pkg:pypi/argon2-cffi@25.1.0", - "UID": "364ffdd713d7e70a" - }, - "Version": "25.1.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "argon2-cffi-bindings@25.1.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "argon2-cffi-bindings@25.1.0", - "Name": "argon2-cffi-bindings", - "Identifier": { - "PURL": "pkg:pypi/argon2-cffi-bindings@25.1.0", - "UID": "bad715beb0c48d2a" - }, - "Version": "25.1.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cffi@2.0.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "ascii-colors@0.11.21", - "Name": "ascii-colors", - "Identifier": { - "PURL": "pkg:pypi/ascii-colors@0.11.21", - "UID": "c89a5bfe0aadca9c" - }, - "Version": "0.11.21", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "wcwidth@0.6.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "attrs@26.1.0", - "Name": "attrs", - "Identifier": { - "PURL": "pkg:pypi/attrs@26.1.0", - "UID": "2910de77ff6d92f1" - }, - "Version": "26.1.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "audioop-lts@0.2.2", - "Name": "audioop-lts", - "Identifier": { - "PURL": "pkg:pypi/audioop-lts@0.2.2", - "UID": "3859fef66155a04c" - }, - "Version": "0.2.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "av@17.0.0", - "Name": "av", - "Identifier": { - "PURL": "pkg:pypi/av@17.0.0", - "UID": "f772d68ee78da076" - }, - "Version": "17.0.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "bcrypt@5.0.0", - "Name": "bcrypt", - "Identifier": { - "PURL": "pkg:pypi/bcrypt@5.0.0", - "UID": "69f65a02a525856e" - }, - "Version": "5.0.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "beartype@0.22.9", - "Name": "beartype", - "Identifier": { - "PURL": "pkg:pypi/beartype@0.22.9", - "UID": "a52c0bee228ec41a" - }, - "Version": "0.22.9", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "beautifulsoup4@4.14.3", - "Name": "beautifulsoup4", - "Identifier": { - "PURL": "pkg:pypi/beautifulsoup4@4.14.3", - "UID": "2e0d2b86c7409e27" - }, - "Version": "4.14.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "soupsieve@2.8.3", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "boto3@1.42.80", - "Name": "boto3", - "Identifier": { - "PURL": "pkg:pypi/boto3@1.42.80", - "UID": "a90569b95c350e3f" - }, - "Version": "1.42.80", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "botocore@1.42.80", - "jmespath@1.1.0", - "s3transfer@0.16.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "botocore@1.42.80", - "Name": "botocore", - "Identifier": { - "PURL": "pkg:pypi/botocore@1.42.80", - "UID": "2856efebacb82222" - }, - "Version": "1.42.80", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "jmespath@1.1.0", - "python-dateutil@2.9.0.post0", - "urllib3@2.6.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "brotli@1.2.0", - "Name": "brotli", - "Identifier": { - "PURL": "pkg:pypi/brotli@1.2.0", - "UID": "acd9f05e8c7c4b74" - }, - "Version": "1.2.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "cachetools@7.0.5", - "Name": "cachetools", - "Identifier": { - "PURL": "pkg:pypi/cachetools@7.0.5", - "UID": "7b1250316c70f311" - }, - "Version": "7.0.5", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "caio@0.9.25", - "Name": "caio", - "Identifier": { - "PURL": "pkg:pypi/caio@0.9.25", - "UID": "babded11c91019bc" - }, - "Version": "0.9.25", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "certifi@2026.2.25", - "Name": "certifi", - "Identifier": { - "PURL": "pkg:pypi/certifi@2026.2.25", - "UID": "3bc2442a8d895e49" - }, - "Version": "2026.2.25", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "cffi@2.0.0", - "Name": "cffi", - "Identifier": { - "PURL": "pkg:pypi/cffi@2.0.0", - "UID": "a78e151c4e3c8b65" - }, - "Version": "2.0.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pycparser@3.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "charset-normalizer@3.4.6", - "Name": "charset-normalizer", - "Identifier": { - "PURL": "pkg:pypi/charset-normalizer@3.4.6", - "UID": "a0a57b3126b16243" - }, - "Version": "3.4.6", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "click@8.3.1", - "Name": "click", - "Identifier": { - "PURL": "pkg:pypi/click@8.3.1", - "UID": "5647378580693589" - }, - "Version": "8.3.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "colorama@0.4.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "cobble@0.1.4", - "Name": "cobble", - "Identifier": { - "PURL": "pkg:pypi/cobble@0.1.4", - "UID": "a0819410201f4cf2" - }, - "Version": "0.1.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "colorama@0.4.6", - "Name": "colorama", - "Identifier": { - "PURL": "pkg:pypi/colorama@0.4.6", - "UID": "a9b0f41cf3bb79cc" - }, - "Version": "0.4.6", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "colorlog@6.10.1", - "Name": "colorlog", - "Identifier": { - "PURL": "pkg:pypi/colorlog@6.10.1", - "UID": "325b80311f99d1e4" - }, - "Version": "6.10.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "colorama@0.4.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "configparser@7.2.0", - "Name": "configparser", - "Identifier": { - "PURL": "pkg:pypi/configparser@7.2.0", - "UID": "9f7029d3ea261428" - }, - "Version": "7.2.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "cuda-bindings@13.2.0", - "Name": "cuda-bindings", - "Identifier": { - "PURL": "pkg:pypi/cuda-bindings@13.2.0", - "UID": "3adb4171d89542b6" - }, - "Version": "13.2.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cuda-pathfinder@1.5.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "cuda-pathfinder@1.5.0", - "Name": "cuda-pathfinder", - "Identifier": { - "PURL": "pkg:pypi/cuda-pathfinder@1.5.0", - "UID": "2db3516b479ef704" - }, - "Version": "1.5.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "cuda-toolkit@13.0.2", - "Name": "cuda-toolkit", - "Identifier": { - "PURL": "pkg:pypi/cuda-toolkit@13.0.2", - "UID": "82542097a9427c83" - }, - "Version": "13.0.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "nvidia-cublas@13.1.0.3", - "nvidia-cuda-cupti@13.0.85", - "nvidia-cuda-nvrtc@13.0.88", - "nvidia-cuda-runtime@13.0.96", - "nvidia-cufft@12.0.0.61", - "nvidia-cufile@1.15.1.6", - "nvidia-curand@10.4.0.35", - "nvidia-cusolver@12.0.4.66", - "nvidia-cusparse@12.6.3.3", - "nvidia-nvjitlink@13.0.88", - "nvidia-nvtx@13.0.85" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "cyclopts@4.10.1", - "Name": "cyclopts", - "Identifier": { - "PURL": "pkg:pypi/cyclopts@4.10.1", - "UID": "517b2dcd3eeab46c" - }, - "Version": "4.10.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "attrs@26.1.0", - "docstring-parser@0.17.0", - "rich-rst@1.3.2", - "rich@14.3.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "defusedxml@0.7.1", - "Name": "defusedxml", - "Identifier": { - "PURL": "pkg:pypi/defusedxml@0.7.1", - "UID": "73fca51180147f95" - }, - "Version": "0.7.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "dill@0.4.1", - "Name": "dill", - "Identifier": { - "PURL": "pkg:pypi/dill@0.4.1", - "UID": "e6a0b33f7091a832" - }, - "Version": "0.4.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "distro@1.9.0", - "Name": "distro", - "Identifier": { - "PURL": "pkg:pypi/distro@1.9.0", - "UID": "22f853bfd9b90f72" - }, - "Version": "1.9.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "dnspython@2.8.0", - "Name": "dnspython", - "Identifier": { - "PURL": "pkg:pypi/dnspython@2.8.0", - "UID": "23e6fd2183b6e241" - }, - "Version": "2.8.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "docling-core@2.71.0", - "Name": "docling-core", - "Identifier": { - "PURL": "pkg:pypi/docling-core@2.71.0", - "UID": "39b174a8c21eeee2" - }, - "Version": "2.71.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "defusedxml@0.7.1", - "jsonref@1.1.0", - "jsonschema@4.26.0", - "latex2mathml@3.79.0", - "pandas@2.3.3", - "pillow@12.2.0", - "pydantic@2.12.5", - "pyyaml@6.0.3", - "semchunk@3.2.5", - "tabulate@0.10.0", - "transformers@4.57.6", - "tree-sitter-c@0.24.1", - "tree-sitter-javascript@0.25.0", - "tree-sitter-python@0.25.0", - "tree-sitter-typescript@0.23.2", - "tree-sitter@0.25.2", - "typer@0.21.2", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "docling-ibm-models@3.13.0", - "Name": "docling-ibm-models", - "Identifier": { - "PURL": "pkg:pypi/docling-ibm-models@3.13.0", - "UID": "3c596ae441987fc8" - }, - "Version": "3.13.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "accelerate@1.13.0", - "docling-core@2.71.0", - "huggingface-hub@0.36.2", - "jsonlines@4.0.0", - "numpy@2.4.4", - "pillow@12.2.0", - "pydantic@2.12.5", - "rtree@1.4.1", - "safetensors@0.7.0", - "torch@2.11.0", - "torchvision@0.26.0", - "tqdm@4.67.3", - "transformers@4.57.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "docling-parse@5.7.0", - "Name": "docling-parse", - "Identifier": { - "PURL": "pkg:pypi/docling-parse@5.7.0", - "UID": "7daef55485f7124c" - }, - "Version": "5.7.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "docling-core@2.71.0", - "pillow@12.2.0", - "pydantic@2.12.5", - "pywin32@311", - "tabulate@0.10.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "docstring-parser@0.17.0", - "Name": "docstring-parser", - "Identifier": { - "PURL": "pkg:pypi/docstring-parser@0.17.0", - "UID": "a8beca8d13b72ae1" - }, - "Version": "0.17.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "docutils@0.22.4", - "Name": "docutils", - "Identifier": { - "PURL": "pkg:pypi/docutils@0.22.4", - "UID": "e578a9b8184e1079" - }, - "Version": "0.22.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "ecdsa@0.19.2", - "Name": "ecdsa", - "Identifier": { - "PURL": "pkg:pypi/ecdsa@0.19.2", - "UID": "8468e73fc68cde5b" - }, - "Version": "0.19.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "six@1.17.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "email-validator@2.3.0", - "Name": "email-validator", - "Identifier": { - "PURL": "pkg:pypi/email-validator@2.3.0", - "UID": "3c29a86beb27858c" - }, - "Version": "2.3.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "dnspython@2.8.0", - "idna@3.11" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "et-xmlfile@2.0.0", - "Name": "et-xmlfile", - "Identifier": { - "PURL": "pkg:pypi/et-xmlfile@2.0.0", - "UID": "a12d6975f70faa69" - }, - "Version": "2.0.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "exceptiongroup@1.3.1", - "Name": "exceptiongroup", - "Identifier": { - "PURL": "pkg:pypi/exceptiongroup@1.3.1", - "UID": "f2f3b1049fe359fe" - }, - "Version": "1.3.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "faker@40.12.0", - "Name": "faker", - "Identifier": { - "PURL": "pkg:pypi/faker@40.12.0", - "UID": "ddcf7eb253fee2f6" - }, - "Version": "40.12.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "tzdata@2025.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "fast-langdetect@0.2.5", - "Name": "fast-langdetect", - "Identifier": { - "PURL": "pkg:pypi/fast-langdetect@0.2.5", - "UID": "e3ba98a44c0427af" - }, - "Version": "0.2.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "fasttext-predict@0.9.2.4", - "requests@2.33.1", - "robust-downloader@0.0.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "fasttext-predict@0.9.2.4", - "Name": "fasttext-predict", - "Identifier": { - "PURL": "pkg:pypi/fasttext-predict@0.9.2.4", - "UID": "335689f0e4abc9cf" - }, - "Version": "0.9.2.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "ffmpy@1.0.0", - "Name": "ffmpy", - "Identifier": { - "PURL": "pkg:pypi/ffmpy@1.0.0", - "UID": "413c11ff2c71de54" - }, - "Version": "1.0.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "filelock@3.25.2", - "Name": "filelock", - "Identifier": { - "PURL": "pkg:pypi/filelock@3.25.2", - "UID": "9904740d331d2f0a" - }, - "Version": "3.25.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "filetype@1.2.0", - "Name": "filetype", - "Identifier": { - "PURL": "pkg:pypi/filetype@1.2.0", - "UID": "d55d56528a8372df" - }, - "Version": "1.2.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "flatbuffers@25.12.19", - "Name": "flatbuffers", - "Identifier": { - "PURL": "pkg:pypi/flatbuffers@25.12.19", - "UID": "d81e352c767f10c1" - }, - "Version": "25.12.19", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "frozenlist@1.8.0", - "Name": "frozenlist", - "Identifier": { - "PURL": "pkg:pypi/frozenlist@1.8.0", - "UID": "6e474ed64970b17e" - }, - "Version": "1.8.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "fsspec@2026.3.0", - "Name": "fsspec", - "Identifier": { - "PURL": "pkg:pypi/fsspec@2026.3.0", - "UID": "573e2b52e1962ab0" - }, - "Version": "2026.3.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "ftfy@6.3.1", - "Name": "ftfy", - "Identifier": { - "PURL": "pkg:pypi/ftfy@6.3.1", - "UID": "dd0c2357002fc433" - }, - "Version": "6.3.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "wcwidth@0.6.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "google-api-core@2.30.1", - "Name": "google-api-core", - "Identifier": { - "PURL": "pkg:pypi/google-api-core@2.30.1", - "UID": "8c010239289bdc2b" - }, - "Version": "2.30.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "google-auth@2.49.1", - "googleapis-common-protos@1.73.1", - "proto-plus@1.27.2", - "protobuf@6.33.6", - "requests@2.33.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "google-auth@2.49.1", - "Name": "google-auth", - "Identifier": { - "PURL": "pkg:pypi/google-auth@2.49.1", - "UID": "47ebaf9e33991f5d" - }, - "Version": "2.49.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cryptography@46.0.6", - "pyasn1-modules@0.4.2", - "requests@2.33.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "google-genai@1.70.0", - "Name": "google-genai", - "Identifier": { - "PURL": "pkg:pypi/google-genai@1.70.0", - "UID": "d03e0399d6a29c89" - }, - "Version": "1.70.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "anyio@4.13.0", - "distro@1.9.0", - "google-auth@2.49.1", - "httpx@0.28.1", - "pydantic@2.12.5", - "requests@2.33.1", - "sniffio@1.3.1", - "tenacity@9.1.4", - "typing-extensions@4.15.0", - "websockets@16.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "googleapis-common-protos@1.73.1", - "Name": "googleapis-common-protos", - "Identifier": { - "PURL": "pkg:pypi/googleapis-common-protos@1.73.1", - "UID": "868849668f5b324b" - }, - "Version": "1.73.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "protobuf@6.33.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "gradio@6.8.0", - "Name": "gradio", - "Identifier": { - "PURL": "pkg:pypi/gradio@6.8.0", - "UID": "7d8e32fe89f5dfc4" - }, - "Version": "6.8.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "aiofiles@24.1.0", - "anyio@4.13.0", - "audioop-lts@0.2.2", - "brotli@1.2.0", - "fastapi@0.135.3", - "ffmpy@1.0.0", - "gradio-client@2.2.0", - "groovy@0.1.2", - "httpx@0.28.1", - "huggingface-hub@0.36.2", - "jinja2@3.1.6", - "markupsafe@3.0.3", - "numpy@2.4.4", - "orjson@3.11.8", - "packaging@26.0", - "pandas@2.3.3", - "pillow@12.2.0", - "pydantic@2.12.5", - "pydub@0.25.1", - "python-multipart@0.0.22", - "pytz@2026.1.post1", - "pyyaml@6.0.3", - "safehttpx@0.1.7", - "semantic-version@2.10.0", - "starlette@0.52.1", - "tomlkit@0.13.3", - "typer@0.21.2", - "typing-extensions@4.15.0", - "uvicorn@0.42.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "gradio-client@2.2.0", - "Name": "gradio-client", - "Identifier": { - "PURL": "pkg:pypi/gradio-client@2.2.0", - "UID": "b264b21e979d426f" - }, - "Version": "2.2.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "fsspec@2026.3.0", - "httpx@0.28.1", - "huggingface-hub@0.36.2", - "packaging@26.0", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "gradio-pdf@0.0.24", - "Name": "gradio-pdf", - "Identifier": { - "PURL": "pkg:pypi/gradio-pdf@0.0.24", - "UID": "313982e04edcf97d" - }, - "Version": "0.0.24", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "gradio@6.8.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "greenlet@3.3.2", - "Name": "greenlet", - "Identifier": { - "PURL": "pkg:pypi/greenlet@3.3.2", - "UID": "53fcad42e0243689" - }, - "Version": "3.3.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "groovy@0.1.2", - "Name": "groovy", - "Identifier": { - "PURL": "pkg:pypi/groovy@0.1.2", - "UID": "996129d6cb36776d" - }, - "Version": "0.1.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "gunicorn@25.3.0", - "Name": "gunicorn", - "Identifier": { - "PURL": "pkg:pypi/gunicorn@25.3.0", - "UID": "fa8a2dee66d72aa1" - }, - "Version": "25.3.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "packaging@26.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "h11@0.16.0", - "Name": "h11", - "Identifier": { - "PURL": "pkg:pypi/h11@0.16.0", - "UID": "d50a8db9ed31c7b5" - }, - "Version": "0.16.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "hf-xet@1.4.3", - "Name": "hf-xet", - "Identifier": { - "PURL": "pkg:pypi/hf-xet@1.4.3", - "UID": "419cf99547913f4d" - }, - "Version": "1.4.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "httpcore@1.0.9", - "Name": "httpcore", - "Identifier": { - "PURL": "pkg:pypi/httpcore@1.0.9", - "UID": "befda0419fd3d5b2" - }, - "Version": "1.0.9", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "certifi@2026.2.25", - "h11@0.16.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "httpx-retries@0.4.6", - "Name": "httpx-retries", - "Identifier": { - "PURL": "pkg:pypi/httpx-retries@0.4.6", - "UID": "14f96554dccdd211" - }, - "Version": "0.4.6", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "httpx@0.28.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "httpx-sse@0.4.3", - "Name": "httpx-sse", - "Identifier": { - "PURL": "pkg:pypi/httpx-sse@0.4.3", - "UID": "218b966602c8da98" - }, - "Version": "0.4.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "huggingface-hub@0.36.2", - "Name": "huggingface-hub", - "Identifier": { - "PURL": "pkg:pypi/huggingface-hub@0.36.2", - "UID": "37b9d11f6b855af" - }, - "Version": "0.36.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "filelock@3.25.2", - "fsspec@2026.3.0", - "hf-xet@1.4.3", - "packaging@26.0", - "pyyaml@6.0.3", - "requests@2.33.1", - "tqdm@4.67.3", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "idna@3.11", - "Name": "idna", - "Identifier": { - "PURL": "pkg:pypi/idna@3.11", - "UID": "fc2dae0aa8a11930" - }, - "Version": "3.11", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "imageio@2.37.3", - "Name": "imageio", - "Identifier": { - "PURL": "pkg:pypi/imageio@2.37.3", - "UID": "278810376b9c3b43" - }, - "Version": "2.37.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4", - "pillow@12.2.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "importlib-metadata@8.7.1", - "Name": "importlib-metadata", - "Identifier": { - "PURL": "pkg:pypi/importlib-metadata@8.7.1", - "UID": "20ee4faf7ed54391" - }, - "Version": "8.7.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "zipp@3.23.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jaraco-classes@3.4.0", - "Name": "jaraco-classes", - "Identifier": { - "PURL": "pkg:pypi/jaraco-classes@3.4.0", - "UID": "133ea18a85c6e7b1" - }, - "Version": "3.4.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "more-itertools@10.8.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jaraco-context@6.1.2", - "Name": "jaraco-context", - "Identifier": { - "PURL": "pkg:pypi/jaraco-context@6.1.2", - "UID": "8ea12dd67c8b0599" - }, - "Version": "6.1.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "jaraco-functools@4.4.0", - "Name": "jaraco-functools", - "Identifier": { - "PURL": "pkg:pypi/jaraco-functools@4.4.0", - "UID": "98dee16196d13cca" - }, - "Version": "4.4.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "more-itertools@10.8.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jeepney@0.9.0", - "Name": "jeepney", - "Identifier": { - "PURL": "pkg:pypi/jeepney@0.9.0", - "UID": "78f04c8e6818160d" - }, - "Version": "0.9.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "jinja2@3.1.6", - "Name": "jinja2", - "Identifier": { - "PURL": "pkg:pypi/jinja2@3.1.6", - "UID": "9c3befdce47c8a32" - }, - "Version": "3.1.6", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "markupsafe@3.0.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jiter@0.13.0", - "Name": "jiter", - "Identifier": { - "PURL": "pkg:pypi/jiter@0.13.0", - "UID": "a5d762e5f9fbc2a2" - }, - "Version": "0.13.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "jmespath@1.1.0", - "Name": "jmespath", - "Identifier": { - "PURL": "pkg:pypi/jmespath@1.1.0", - "UID": "2185bf5b23a3c1d4" - }, - "Version": "1.1.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "json-repair@0.58.7", - "Name": "json-repair", - "Identifier": { - "PURL": "pkg:pypi/json-repair@0.58.7", - "UID": "f605ee3a9cc3586f" - }, - "Version": "0.58.7", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "jsonlines@4.0.0", - "Name": "jsonlines", - "Identifier": { - "PURL": "pkg:pypi/jsonlines@4.0.0", - "UID": "6ef2615614a3f1ae" - }, - "Version": "4.0.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "attrs@26.1.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jsonref@1.1.0", - "Name": "jsonref", - "Identifier": { - "PURL": "pkg:pypi/jsonref@1.1.0", - "UID": "2359e80f498d98f5" - }, - "Version": "1.1.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "jsonschema@4.26.0", - "Name": "jsonschema", - "Identifier": { - "PURL": "pkg:pypi/jsonschema@4.26.0", - "UID": "f01e1a1e364e6f4f" - }, - "Version": "4.26.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "attrs@26.1.0", - "jsonschema-specifications@2025.9.1", - "referencing@0.37.0", - "rpds-py@0.30.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jsonschema-path@0.4.5", - "Name": "jsonschema-path", - "Identifier": { - "PURL": "pkg:pypi/jsonschema-path@0.4.5", - "UID": "3a0b6c51923cd2ac" - }, - "Version": "0.4.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pathable@0.5.0", - "pyyaml@6.0.3", - "referencing@0.37.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "jsonschema-specifications@2025.9.1", - "Name": "jsonschema-specifications", - "Identifier": { - "PURL": "pkg:pypi/jsonschema-specifications@2025.9.1", - "UID": "e7b469c5a674ceb2" - }, - "Version": "2025.9.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "referencing@0.37.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "keyring@25.7.0", - "Name": "keyring", - "Identifier": { - "PURL": "pkg:pypi/keyring@25.7.0", - "UID": "8918a1d2db52f405" - }, - "Version": "25.7.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "jaraco-classes@3.4.0", - "jaraco-context@6.1.2", - "jaraco-functools@4.4.0", - "jeepney@0.9.0", - "pywin32-ctypes@0.2.3", - "secretstorage@3.5.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "latex2mathml@3.79.0", - "Name": "latex2mathml", - "Identifier": { - "PURL": "pkg:pypi/latex2mathml@3.79.0", - "UID": "f835d2667d030e70" - }, - "Version": "3.79.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "lazy-loader@0.5", - "Name": "lazy-loader", - "Identifier": { - "PURL": "pkg:pypi/lazy-loader@0.5", - "UID": "1d6a1b9988e92f85" - }, - "Version": "0.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "packaging@26.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "loguru@0.7.3", - "Name": "loguru", - "Identifier": { - "PURL": "pkg:pypi/loguru@0.7.3", - "UID": "a6aa0113578e6288" - }, - "Version": "0.7.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "colorama@0.4.6", - "win32-setctime@1.2.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "lxml@6.0.2", - "Name": "lxml", - "Identifier": { - "PURL": "pkg:pypi/lxml@6.0.2", - "UID": "b13c649c28bee02" - }, - "Version": "6.0.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "magika@1.0.2", - "Name": "magika", - "Identifier": { - "PURL": "pkg:pypi/magika@1.0.2", - "UID": "248411c1d3546107" - }, - "Version": "1.0.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "click@8.3.1", - "onnxruntime@1.24.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "mammoth@1.12.0", - "Name": "mammoth", - "Identifier": { - "PURL": "pkg:pypi/mammoth@1.12.0", - "UID": "aea025c60016d316" - }, - "Version": "1.12.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cobble@0.1.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "markdown-it-py@4.0.0", - "Name": "markdown-it-py", - "Identifier": { - "PURL": "pkg:pypi/markdown-it-py@4.0.0", - "UID": "9ca6a9492cd2fedc" - }, - "Version": "4.0.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "mdurl@0.1.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "marko@2.2.2", - "Name": "marko", - "Identifier": { - "PURL": "pkg:pypi/marko@2.2.2", - "UID": "3b0ce9d1975df473" - }, - "Version": "2.2.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "markupsafe@3.0.3", - "Name": "markupsafe", - "Identifier": { - "PURL": "pkg:pypi/markupsafe@3.0.3", - "UID": "c97703c32c4879f7" - }, - "Version": "3.0.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "mdurl@0.1.2", - "Name": "mdurl", - "Identifier": { - "PURL": "pkg:pypi/mdurl@0.1.2", - "UID": "9dc632b33acb56d1" - }, - "Version": "0.1.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "mineru@3.0.7", - "Name": "mineru", - "Identifier": { - "PURL": "pkg:pypi/mineru@3.0.7", - "UID": "17c4686aa1c550a1" - }, - "Version": "3.0.7", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "accelerate@1.13.0", - "albumentations@2.0.8", - "beautifulsoup4@4.14.3", - "boto3@1.42.80", - "click@8.3.1", - "dill@0.4.1", - "fast-langdetect@0.2.5", - "fastapi@0.135.3", - "ftfy@6.3.1", - "gradio-pdf@0.0.24", - "gradio@6.8.0", - "httpx@0.28.1", - "huggingface-hub@0.36.2", - "json-repair@0.58.7", - "loguru@0.7.3", - "lxml@6.0.2", - "magika@1.0.2", - "mammoth@1.12.0", - "mineru-vl-utils@0.1.22", - "modelscope@1.35.3", - "numpy@2.4.4", - "omegaconf@2.3.0", - "onnxruntime@1.24.4", - "openai@2.30.0", - "opencv-python@4.13.0.92", - "openpyxl@3.1.5", - "pandas@2.3.3", - "pdfminer-six@20260107", - "pdftext@0.6.3", - "pillow@12.2.0", - "pyclipper@1.4.0", - "pylatexenc@2.10", - "pypdf@6.9.2", - "pypdfium2@4.30.0", - "pypptx-with-oxml@1.0.3", - "python-docx@1.2.0", - "python-multipart@0.0.22", - "pyyaml@6.0.3", - "qwen-vl-utils@0.0.14", - "reportlab@4.4.10", - "requests@2.33.1", - "scikit-image@0.26.0", - "shapely@2.1.2", - "torch@2.11.0", - "torchvision@0.26.0", - "tqdm@4.67.3", - "transformers@4.57.6", - "uvicorn@0.42.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "mineru-vl-utils@0.1.22", - "Name": "mineru-vl-utils", - "Identifier": { - "PURL": "pkg:pypi/mineru-vl-utils@0.1.22", - "UID": "506820348eef090d" - }, - "Version": "0.1.22", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "aiofiles@24.1.0", - "httpx-retries@0.4.6", - "httpx@0.28.1", - "loguru@0.7.3", - "pillow@12.2.0", - "pydantic@2.12.5" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "modelscope@1.35.3", - "Name": "modelscope", - "Identifier": { - "PURL": "pkg:pypi/modelscope@1.35.3", - "UID": "9354e8e07645ca60" - }, - "Version": "1.35.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "filelock@3.25.2", - "packaging@26.0", - "requests@2.33.1", - "setuptools@81.0.0", - "tqdm@4.67.3", - "urllib3@2.6.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "more-itertools@10.8.0", - "Name": "more-itertools", - "Identifier": { - "PURL": "pkg:pypi/more-itertools@10.8.0", - "UID": "8d31b6dc1ff95fdf" - }, - "Version": "10.8.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "mpire@2.10.2", - "Name": "mpire", - "Identifier": { - "PURL": "pkg:pypi/mpire@2.10.2", - "UID": "913ededb3610209e" - }, - "Version": "2.10.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "multiprocess@0.70.19", - "pygments@2.20.0", - "pywin32@311", - "tqdm@4.67.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "mpmath@1.3.0", - "Name": "mpmath", - "Identifier": { - "PURL": "pkg:pypi/mpmath@1.3.0", - "UID": "ae58f55affb7eaa7" - }, - "Version": "1.3.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "multidict@6.7.1", - "Name": "multidict", - "Identifier": { - "PURL": "pkg:pypi/multidict@6.7.1", - "UID": "b85b6627b2607e18" - }, - "Version": "6.7.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "multiprocess@0.70.19", - "Name": "multiprocess", - "Identifier": { - "PURL": "pkg:pypi/multiprocess@0.70.19", - "UID": "f7158bf6590fbc17" - }, - "Version": "0.70.19", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "dill@0.4.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "nano-vectordb@0.0.4.3", - "Name": "nano-vectordb", - "Identifier": { - "PURL": "pkg:pypi/nano-vectordb@0.0.4.3", - "UID": "4f5c94c278b0cf6" - }, - "Version": "0.0.4.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "networkx@3.6.1", - "Name": "networkx", - "Identifier": { - "PURL": "pkg:pypi/networkx@3.6.1", - "UID": "8d909da1598b683e" - }, - "Version": "3.6.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "numpy@2.4.4", - "Name": "numpy", - "Identifier": { - "PURL": "pkg:pypi/numpy@2.4.4", - "UID": "5f1f2658b471127c" - }, - "Version": "2.4.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cublas@13.1.0.3", - "Name": "nvidia-cublas", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cublas@13.1.0.3", - "UID": "2e4398ec1ffd3723" - }, - "Version": "13.1.0.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cuda-cupti@13.0.85", - "Name": "nvidia-cuda-cupti", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cuda-cupti@13.0.85", - "UID": "86c9eacadae7fd25" - }, - "Version": "13.0.85", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cuda-nvrtc@13.0.88", - "Name": "nvidia-cuda-nvrtc", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cuda-nvrtc@13.0.88", - "UID": "4de6d77c84e9ad85" - }, - "Version": "13.0.88", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cuda-runtime@13.0.96", - "Name": "nvidia-cuda-runtime", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cuda-runtime@13.0.96", - "UID": "d8b8a76e12c590a7" - }, - "Version": "13.0.96", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cudnn-cu13@9.19.0.56", - "Name": "nvidia-cudnn-cu13", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cudnn-cu13@9.19.0.56", - "UID": "e0a086b650a3902a" - }, - "Version": "9.19.0.56", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "nvidia-cublas@13.1.0.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cufft@12.0.0.61", - "Name": "nvidia-cufft", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cufft@12.0.0.61", - "UID": "520987fff2db58f5" - }, - "Version": "12.0.0.61", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "nvidia-nvjitlink@13.0.88" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cufile@1.15.1.6", - "Name": "nvidia-cufile", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cufile@1.15.1.6", - "UID": "e55e97544844bcb" - }, - "Version": "1.15.1.6", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-curand@10.4.0.35", - "Name": "nvidia-curand", - "Identifier": { - "PURL": "pkg:pypi/nvidia-curand@10.4.0.35", - "UID": "474f87c79e6edba8" - }, - "Version": "10.4.0.35", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cusolver@12.0.4.66", - "Name": "nvidia-cusolver", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cusolver@12.0.4.66", - "UID": "4b99e005a8f3db8a" - }, - "Version": "12.0.4.66", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "nvidia-cublas@13.1.0.3", - "nvidia-cusparse@12.6.3.3", - "nvidia-nvjitlink@13.0.88" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cusparse@12.6.3.3", - "Name": "nvidia-cusparse", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cusparse@12.6.3.3", - "UID": "322febf02f8a4597" - }, - "Version": "12.6.3.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "nvidia-nvjitlink@13.0.88" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-cusparselt-cu13@0.8.0", - "Name": "nvidia-cusparselt-cu13", - "Identifier": { - "PURL": "pkg:pypi/nvidia-cusparselt-cu13@0.8.0", - "UID": "9f3cbbf02743570d" - }, - "Version": "0.8.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-nccl-cu13@2.28.9", - "Name": "nvidia-nccl-cu13", - "Identifier": { - "PURL": "pkg:pypi/nvidia-nccl-cu13@2.28.9", - "UID": "d58c295882bcad68" - }, - "Version": "2.28.9", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-nvjitlink@13.0.88", - "Name": "nvidia-nvjitlink", - "Identifier": { - "PURL": "pkg:pypi/nvidia-nvjitlink@13.0.88", - "UID": "2d728c5cc075d731" - }, - "Version": "13.0.88", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-nvshmem-cu13@3.4.5", - "Name": "nvidia-nvshmem-cu13", - "Identifier": { - "PURL": "pkg:pypi/nvidia-nvshmem-cu13@3.4.5", - "UID": "e3aafd5a6dc04592" - }, - "Version": "3.4.5", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "nvidia-nvtx@13.0.85", - "Name": "nvidia-nvtx", - "Identifier": { - "PURL": "pkg:pypi/nvidia-nvtx@13.0.85", - "UID": "3a7feb80566be913" - }, - "Version": "13.0.85", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "ocrmac@1.0.1", - "Name": "ocrmac", - "Identifier": { - "PURL": "pkg:pypi/ocrmac@1.0.1", - "UID": "76ce014f89ad0ffa" - }, - "Version": "1.0.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "click@8.3.1", - "pillow@12.2.0", - "pyobjc-framework-vision@12.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "omegaconf@2.3.0", - "Name": "omegaconf", - "Identifier": { - "PURL": "pkg:pypi/omegaconf@2.3.0", - "UID": "a7e5dc7417fe3f52" - }, - "Version": "2.3.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "antlr4-python3-runtime@4.9.3", - "pyyaml@6.0.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "onnxruntime@1.24.4", - "Name": "onnxruntime", - "Identifier": { - "PURL": "pkg:pypi/onnxruntime@1.24.4", - "UID": "8d53a10f3b9c3b35" - }, - "Version": "1.24.4", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "flatbuffers@25.12.19", - "numpy@2.4.4", - "packaging@26.0", - "protobuf@6.33.6", - "sympy@1.14.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "openapi-pydantic@0.5.1", - "Name": "openapi-pydantic", - "Identifier": { - "PURL": "pkg:pypi/openapi-pydantic@0.5.1", - "UID": "bfb23529b97bedc0" - }, - "Version": "0.5.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pydantic@2.12.5" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "opencv-python@4.13.0.92", - "Name": "opencv-python", - "Identifier": { - "PURL": "pkg:pypi/opencv-python@4.13.0.92", - "UID": "d65a4619fab2f607" - }, - "Version": "4.13.0.92", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "opencv-python-headless@4.13.0.92", - "Name": "opencv-python-headless", - "Identifier": { - "PURL": "pkg:pypi/opencv-python-headless@4.13.0.92", - "UID": "62cf65aed9267e5e" - }, - "Version": "4.13.0.92", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "openpyxl@3.1.5", - "Name": "openpyxl", - "Identifier": { - "PURL": "pkg:pypi/openpyxl@3.1.5", - "UID": "b978db12d1edc9fd" - }, - "Version": "3.1.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "et-xmlfile@2.0.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "opentelemetry-api@1.40.0", - "Name": "opentelemetry-api", - "Identifier": { - "PURL": "pkg:pypi/opentelemetry-api@1.40.0", - "UID": "f886c17be25ecee7" - }, - "Version": "1.40.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "importlib-metadata@8.7.1", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "orjson@3.11.8", - "Name": "orjson", - "Identifier": { - "PURL": "pkg:pypi/orjson@3.11.8", - "UID": "cd1e5bfad7298883" - }, - "Version": "3.11.8", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "packaging@26.0", - "Name": "packaging", - "Identifier": { - "PURL": "pkg:pypi/packaging@26.0", - "UID": "18ff1c8b62b5ce98" - }, - "Version": "26.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pandas@2.3.3", - "Name": "pandas", - "Identifier": { - "PURL": "pkg:pypi/pandas@2.3.3", - "UID": "bd19e8d69029bbf5" - }, - "Version": "2.3.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4", - "python-dateutil@2.9.0.post0", - "pytz@2026.1.post1", - "tzdata@2025.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pathable@0.5.0", - "Name": "pathable", - "Identifier": { - "PURL": "pkg:pypi/pathable@0.5.0", - "UID": "414385b81ef1840b" - }, - "Version": "0.5.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pdfminer-six@20260107", - "Name": "pdfminer-six", - "Identifier": { - "PURL": "pkg:pypi/pdfminer-six@20260107", - "UID": "14a5434f940ee872" - }, - "Version": "20260107", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "charset-normalizer@3.4.6", - "cryptography@46.0.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pdftext@0.6.3", - "Name": "pdftext", - "Identifier": { - "PURL": "pkg:pypi/pdftext@0.6.3", - "UID": "32a4a53749eb45f5" - }, - "Version": "0.6.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "click@8.3.1", - "pydantic-settings@2.13.1", - "pydantic@2.12.5", - "pypdfium2@4.30.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pillow@12.2.0", - "Name": "pillow", - "Identifier": { - "PURL": "pkg:pypi/pillow@12.2.0", - "UID": "3a3b5de04786361" - }, - "Version": "12.2.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pipmaster@1.1.2", - "Name": "pipmaster", - "Identifier": { - "PURL": "pkg:pypi/pipmaster@1.1.2", - "UID": "1410359b5f7483d4" - }, - "Version": "1.1.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "ascii-colors@0.11.21", - "packaging@26.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "platformdirs@4.9.4", - "Name": "platformdirs", - "Identifier": { - "PURL": "pkg:pypi/platformdirs@4.9.4", - "UID": "2ce9c1e99f46fc4d" - }, - "Version": "4.9.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pluggy@1.6.0", - "Name": "pluggy", - "Identifier": { - "PURL": "pkg:pypi/pluggy@1.6.0", - "UID": "fb8af1ba97572ef7" - }, - "Version": "1.6.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "polyfactory@3.3.0", - "Name": "polyfactory", - "Identifier": { - "PURL": "pkg:pypi/polyfactory@3.3.0", - "UID": "599e19f58e5f6b8" - }, - "Version": "3.3.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "faker@40.12.0", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "propcache@0.4.1", - "Name": "propcache", - "Identifier": { - "PURL": "pkg:pypi/propcache@0.4.1", - "UID": "44918452e0d9ae67" - }, - "Version": "0.4.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "proto-plus@1.27.2", - "Name": "proto-plus", - "Identifier": { - "PURL": "pkg:pypi/proto-plus@1.27.2", - "UID": "94f80a4bf6197cc5" - }, - "Version": "1.27.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "protobuf@6.33.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "protobuf@6.33.6", - "Name": "protobuf", - "Identifier": { - "PURL": "pkg:pypi/protobuf@6.33.6", - "UID": "436c05cac17d37e7" - }, - "Version": "6.33.6", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "psutil@7.2.2", - "Name": "psutil", - "Identifier": { - "PURL": "pkg:pypi/psutil@7.2.2", - "UID": "b49c700e973259fe" - }, - "Version": "7.2.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "py-key-value-aio@0.4.4", - "Name": "py-key-value-aio", - "Identifier": { - "PURL": "pkg:pypi/py-key-value-aio@0.4.4", - "UID": "cad4efca15f4f7e" - }, - "Version": "0.4.4", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "aiofile@3.9.0", - "anyio@4.13.0", - "beartype@0.22.9", - "cachetools@7.0.5", - "keyring@25.7.0", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pyasn1@0.6.3", - "Name": "pyasn1", - "Identifier": { - "PURL": "pkg:pypi/pyasn1@0.6.3", - "UID": "c1d679e7c1ad2e5f" - }, - "Version": "0.6.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pyasn1-modules@0.4.2", - "Name": "pyasn1-modules", - "Identifier": { - "PURL": "pkg:pypi/pyasn1-modules@0.4.2", - "UID": "349adcaa364bce72" - }, - "Version": "0.4.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pyasn1@0.6.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pyclipper@1.4.0", - "Name": "pyclipper", - "Identifier": { - "PURL": "pkg:pypi/pyclipper@1.4.0", - "UID": "cc112a5b21e48200" - }, - "Version": "1.4.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pycparser@3.0", - "Name": "pycparser", - "Identifier": { - "PURL": "pkg:pypi/pycparser@3.0", - "UID": "c92284e7051d4ada" - }, - "Version": "3.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pycryptodome@3.23.0", - "Name": "pycryptodome", - "Identifier": { - "PURL": "pkg:pypi/pycryptodome@3.23.0", - "UID": "bf2a0db37e5dfc4" - }, - "Version": "3.23.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pydantic@2.12.5", - "Name": "pydantic", - "Identifier": { - "PURL": "pkg:pypi/pydantic@2.12.5", - "UID": "c53c43db4d52a3cd" - }, - "Version": "2.12.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "annotated-types@0.7.0", - "email-validator@2.3.0", - "pydantic-core@2.41.5", - "typing-extensions@4.15.0", - "typing-inspection@0.4.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pydantic-core@2.41.5", - "Name": "pydantic-core", - "Identifier": { - "PURL": "pkg:pypi/pydantic-core@2.41.5", - "UID": "ba407f9fef614bf8" - }, - "Version": "2.41.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pydub@0.25.1", - "Name": "pydub", - "Identifier": { - "PURL": "pkg:pypi/pydub@0.25.1", - "UID": "c8db42cca9f08256" - }, - "Version": "0.25.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pygments@2.20.0", - "Name": "pygments", - "Identifier": { - "PURL": "pkg:pypi/pygments@2.20.0", - "UID": "a97e8b3301b61131" - }, - "Version": "2.20.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pyjwt@2.12.1", - "Name": "pyjwt", - "Identifier": { - "PURL": "pkg:pypi/pyjwt@2.12.1", - "UID": "96e16fe1c6072e55" - }, - "Version": "2.12.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cryptography@46.0.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pylatexenc@2.10", - "Name": "pylatexenc", - "Identifier": { - "PURL": "pkg:pypi/pylatexenc@2.10", - "UID": "bcb33ac6aa8acdf2" - }, - "Version": "2.10", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pyobjc-core@12.1", - "Name": "pyobjc-core", - "Identifier": { - "PURL": "pkg:pypi/pyobjc-core@12.1", - "UID": "ba1de964ffdc3591" - }, - "Version": "12.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pyobjc-framework-cocoa@12.1", - "Name": "pyobjc-framework-cocoa", - "Identifier": { - "PURL": "pkg:pypi/pyobjc-framework-cocoa@12.1", - "UID": "da8e00f678def44c" - }, - "Version": "12.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pyobjc-core@12.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pyobjc-framework-coreml@12.1", - "Name": "pyobjc-framework-coreml", - "Identifier": { - "PURL": "pkg:pypi/pyobjc-framework-coreml@12.1", - "UID": "36641e47680c6a9b" - }, - "Version": "12.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pyobjc-core@12.1", - "pyobjc-framework-cocoa@12.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pyobjc-framework-quartz@12.1", - "Name": "pyobjc-framework-quartz", - "Identifier": { - "PURL": "pkg:pypi/pyobjc-framework-quartz@12.1", - "UID": "2af5f832c1158bf2" - }, - "Version": "12.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pyobjc-core@12.1", - "pyobjc-framework-cocoa@12.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pyobjc-framework-vision@12.1", - "Name": "pyobjc-framework-vision", - "Identifier": { - "PURL": "pkg:pypi/pyobjc-framework-vision@12.1", - "UID": "2644d2dbe681c024" - }, - "Version": "12.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pyobjc-core@12.1", - "pyobjc-framework-cocoa@12.1", - "pyobjc-framework-coreml@12.1", - "pyobjc-framework-quartz@12.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pypdf@6.9.2", - "Name": "pypdf", - "Identifier": { - "PURL": "pkg:pypi/pypdf@6.9.2", - "UID": "3010bc68b29c7779" - }, - "Version": "6.9.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pypdfium2@4.30.0", - "Name": "pypdfium2", - "Identifier": { - "PURL": "pkg:pypi/pypdfium2@4.30.0", - "UID": "d75dc8bc83009b9e" - }, - "Version": "4.30.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pyperclip@1.11.0", - "Name": "pyperclip", - "Identifier": { - "PURL": "pkg:pypi/pyperclip@1.11.0", - "UID": "61a7004745cdfd62" - }, - "Version": "1.11.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pypinyin@0.55.0", - "Name": "pypinyin", - "Identifier": { - "PURL": "pkg:pypi/pypinyin@0.55.0", - "UID": "6cb589173ed34fe1" - }, - "Version": "0.55.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pypptx-with-oxml@1.0.3", - "Name": "pypptx-with-oxml", - "Identifier": { - "PURL": "pkg:pypi/pypptx-with-oxml@1.0.3", - "UID": "ee9c2cf4474b21d" - }, - "Version": "1.0.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "lxml@6.0.2", - "pillow@12.2.0", - "typing-extensions@4.15.0", - "xlsxwriter@3.2.9" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "python-dateutil@2.9.0.post0", - "Name": "python-dateutil", - "Identifier": { - "PURL": "pkg:pypi/python-dateutil@2.9.0.post0", - "UID": "7ff8c30198898771" - }, - "Version": "2.9.0.post0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "six@1.17.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "python-docx@1.2.0", - "Name": "python-docx", - "Identifier": { - "PURL": "pkg:pypi/python-docx@1.2.0", - "UID": "a8e160cb804d4790" - }, - "Version": "1.2.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "lxml@6.0.2", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "python-jose@3.5.0", - "Name": "python-jose", - "Identifier": { - "PURL": "pkg:pypi/python-jose@3.5.0", - "UID": "a9d4364a3b6c03a8" - }, - "Version": "3.5.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cryptography@46.0.6", - "ecdsa@0.19.2", - "pyasn1@0.6.3", - "rsa@4.9.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "python-pptx@1.0.2", - "Name": "python-pptx", - "Identifier": { - "PURL": "pkg:pypi/python-pptx@1.0.2", - "UID": "1435a994d3665ac0" - }, - "Version": "1.0.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "lxml@6.0.2", - "pillow@12.2.0", - "typing-extensions@4.15.0", - "xlsxwriter@3.2.9" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "pytz@2026.1.post1", - "Name": "pytz", - "Identifier": { - "PURL": "pkg:pypi/pytz@2026.1.post1", - "UID": "a8026db830e8cf48" - }, - "Version": "2026.1.post1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pywin32@311", - "Name": "pywin32", - "Identifier": { - "PURL": "pkg:pypi/pywin32@311", - "UID": "64b74c2fc0b1955f" - }, - "Version": "311", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pywin32-ctypes@0.2.3", - "Name": "pywin32-ctypes", - "Identifier": { - "PURL": "pkg:pypi/pywin32-ctypes@0.2.3", - "UID": "cba3d635a983757d" - }, - "Version": "0.2.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "pyyaml@6.0.3", - "Name": "pyyaml", - "Identifier": { - "PURL": "pkg:pypi/pyyaml@6.0.3", - "UID": "691cc315a4054d72" - }, - "Version": "6.0.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "qwen-vl-utils@0.0.14", - "Name": "qwen-vl-utils", - "Identifier": { - "PURL": "pkg:pypi/qwen-vl-utils@0.0.14", - "UID": "37b7acec89974552" - }, - "Version": "0.0.14", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "av@17.0.0", - "packaging@26.0", - "pillow@12.2.0", - "requests@2.33.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "rapidocr@3.7.0", - "Name": "rapidocr", - "Identifier": { - "PURL": "pkg:pypi/rapidocr@3.7.0", - "UID": "a4731f048f745cd5" - }, - "Version": "3.7.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "colorlog@6.10.1", - "numpy@2.4.4", - "omegaconf@2.3.0", - "opencv-python@4.13.0.92", - "pillow@12.2.0", - "pyclipper@1.4.0", - "pyyaml@6.0.3", - "requests@2.33.1", - "shapely@2.1.2", - "six@1.17.0", - "tqdm@4.67.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "referencing@0.37.0", - "Name": "referencing", - "Identifier": { - "PURL": "pkg:pypi/referencing@0.37.0", - "UID": "95b475217a47d1ac" - }, - "Version": "0.37.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "attrs@26.1.0", - "rpds-py@0.30.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "regex@2026.3.32", - "Name": "regex", - "Identifier": { - "PURL": "pkg:pypi/regex@2026.3.32", - "UID": "69c6b38682842272" - }, - "Version": "2026.3.32", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "reportlab@4.4.10", - "Name": "reportlab", - "Identifier": { - "PURL": "pkg:pypi/reportlab@4.4.10", - "UID": "86c543d5bce3978e" - }, - "Version": "4.4.10", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "charset-normalizer@3.4.6", - "pillow@12.2.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "requests@2.33.1", - "Name": "requests", - "Identifier": { - "PURL": "pkg:pypi/requests@2.33.1", - "UID": "be2ea39cc1f29190" - }, - "Version": "2.33.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "certifi@2026.2.25", - "charset-normalizer@3.4.6", - "idna@3.11", - "urllib3@2.6.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "rich@14.3.3", - "Name": "rich", - "Identifier": { - "PURL": "pkg:pypi/rich@14.3.3", - "UID": "9f0b9bd3c379a8bc" - }, - "Version": "14.3.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "markdown-it-py@4.0.0", - "pygments@2.20.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "rich-rst@1.3.2", - "Name": "rich-rst", - "Identifier": { - "PURL": "pkg:pypi/rich-rst@1.3.2", - "UID": "bbe420a244fbd59a" - }, - "Version": "1.3.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "docutils@0.22.4", - "rich@14.3.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "robust-downloader@0.0.2", - "Name": "robust-downloader", - "Identifier": { - "PURL": "pkg:pypi/robust-downloader@0.0.2", - "UID": "547d4ffc3392e752" - }, - "Version": "0.0.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "colorlog@6.10.1", - "requests@2.33.1", - "tqdm@4.67.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "rpds-py@0.30.0", - "Name": "rpds-py", - "Identifier": { - "PURL": "pkg:pypi/rpds-py@0.30.0", - "UID": "e858ddf621f143f2" - }, - "Version": "0.30.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "rsa@4.9.1", - "Name": "rsa", - "Identifier": { - "PURL": "pkg:pypi/rsa@4.9.1", - "UID": "bf1e6ca1105cd9c7" - }, - "Version": "4.9.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "pyasn1@0.6.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "rtree@1.4.1", - "Name": "rtree", - "Identifier": { - "PURL": "pkg:pypi/rtree@1.4.1", - "UID": "136b6b7a8d362e75" - }, - "Version": "1.4.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "s3transfer@0.16.0", - "Name": "s3transfer", - "Identifier": { - "PURL": "pkg:pypi/s3transfer@0.16.0", - "UID": "edfc5fe8a44a507d" - }, - "Version": "0.16.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "botocore@1.42.80" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "safehttpx@0.1.7", - "Name": "safehttpx", - "Identifier": { - "PURL": "pkg:pypi/safehttpx@0.1.7", - "UID": "de3ee59a525585c0" - }, - "Version": "0.1.7", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "httpx@0.28.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "safetensors@0.7.0", - "Name": "safetensors", - "Identifier": { - "PURL": "pkg:pypi/safetensors@0.7.0", - "UID": "fca419eb5be720dd" - }, - "Version": "0.7.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4", - "packaging@26.0", - "torch@2.11.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "scikit-image@0.26.0", - "Name": "scikit-image", - "Identifier": { - "PURL": "pkg:pypi/scikit-image@0.26.0", - "UID": "dba1c55dd1ece5fd" - }, - "Version": "0.26.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "imageio@2.37.3", - "lazy-loader@0.5", - "networkx@3.6.1", - "numpy@2.4.4", - "packaging@26.0", - "pillow@12.2.0", - "scipy@1.17.1", - "tifffile@2026.3.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "scipy@1.17.1", - "Name": "scipy", - "Identifier": { - "PURL": "pkg:pypi/scipy@1.17.1", - "UID": "6da23db394b4d466" - }, - "Version": "1.17.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "secretstorage@3.5.0", - "Name": "secretstorage", - "Identifier": { - "PURL": "pkg:pypi/secretstorage@3.5.0", - "UID": "2e188ea519c6e323" - }, - "Version": "3.5.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cryptography@46.0.6", - "jeepney@0.9.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "semantic-version@2.10.0", - "Name": "semantic-version", - "Identifier": { - "PURL": "pkg:pypi/semantic-version@2.10.0", - "UID": "a0e70289e8614640" - }, - "Version": "2.10.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "semchunk@3.2.5", - "Name": "semchunk", - "Identifier": { - "PURL": "pkg:pypi/semchunk@3.2.5", - "UID": "8532719ede8a6888" - }, - "Version": "3.2.5", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "mpire@2.10.2", - "tqdm@4.67.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "setuptools@81.0.0", - "Name": "setuptools", - "Identifier": { - "PURL": "pkg:pypi/setuptools@81.0.0", - "UID": "62efd152cc0bcb22" - }, - "Version": "81.0.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "shapely@2.1.2", - "Name": "shapely", - "Identifier": { - "PURL": "pkg:pypi/shapely@2.1.2", - "UID": "50b519f8a488cd1f" - }, - "Version": "2.1.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "shellingham@1.5.4", - "Name": "shellingham", - "Identifier": { - "PURL": "pkg:pypi/shellingham@1.5.4", - "UID": "7e72312c22e72a3" - }, - "Version": "1.5.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "simsimd@6.5.16", - "Name": "simsimd", - "Identifier": { - "PURL": "pkg:pypi/simsimd@6.5.16", - "UID": "52826999b71a6a6" - }, - "Version": "6.5.16", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "six@1.17.0", - "Name": "six", - "Identifier": { - "PURL": "pkg:pypi/six@1.17.0", - "UID": "ac79dc21f2d40ee4" - }, - "Version": "1.17.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "sniffio@1.3.1", - "Name": "sniffio", - "Identifier": { - "PURL": "pkg:pypi/sniffio@1.3.1", - "UID": "2b2bc555d7ea120" - }, - "Version": "1.3.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "soupsieve@2.8.3", - "Name": "soupsieve", - "Identifier": { - "PURL": "pkg:pypi/soupsieve@2.8.3", - "UID": "824234a21cd9210e" - }, - "Version": "2.8.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "sse-starlette@3.3.4", - "Name": "sse-starlette", - "Identifier": { - "PURL": "pkg:pypi/sse-starlette@3.3.4", - "UID": "6d5d1d8fcde1709f" - }, - "Version": "3.3.4", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "anyio@4.13.0", - "starlette@0.52.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "starlette@0.52.1", - "Name": "starlette", - "Identifier": { - "PURL": "pkg:pypi/starlette@0.52.1", - "UID": "bb0c8678769c3ad1" - }, - "Version": "0.52.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "anyio@4.13.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "stringzilla@4.6.0", - "Name": "stringzilla", - "Identifier": { - "PURL": "pkg:pypi/stringzilla@4.6.0", - "UID": "fcec02b7460a7904" - }, - "Version": "4.6.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "sympy@1.14.0", - "Name": "sympy", - "Identifier": { - "PURL": "pkg:pypi/sympy@1.14.0", - "UID": "9ff03251ae05c292" - }, - "Version": "1.14.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "mpmath@1.3.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tabulate@0.10.0", - "Name": "tabulate", - "Identifier": { - "PURL": "pkg:pypi/tabulate@0.10.0", - "UID": "c95cd1797c5936f6" - }, - "Version": "0.10.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "tenacity@9.1.4", - "Name": "tenacity", - "Identifier": { - "PURL": "pkg:pypi/tenacity@9.1.4", - "UID": "f43afc0974e70bf8" - }, - "Version": "9.1.4", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "tifffile@2026.3.3", - "Name": "tifffile", - "Identifier": { - "PURL": "pkg:pypi/tifffile@2026.3.3", - "UID": "19e4e2fc557e7447" - }, - "Version": "2026.3.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tiktoken@0.12.0", - "Name": "tiktoken", - "Identifier": { - "PURL": "pkg:pypi/tiktoken@0.12.0", - "UID": "4d5f964d574a6210" - }, - "Version": "0.12.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "regex@2026.3.32", - "requests@2.33.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tokenizers@0.22.2", - "Name": "tokenizers", - "Identifier": { - "PURL": "pkg:pypi/tokenizers@0.22.2", - "UID": "68484714ca924d69" - }, - "Version": "0.22.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "huggingface-hub@0.36.2" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tomlkit@0.13.3", - "Name": "tomlkit", - "Identifier": { - "PURL": "pkg:pypi/tomlkit@0.13.3", - "UID": "ed1e60932d874d10" - }, - "Version": "0.13.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "torch@2.11.0", - "Name": "torch", - "Identifier": { - "PURL": "pkg:pypi/torch@2.11.0", - "UID": "55bda6c93dac8f8c" - }, - "Version": "2.11.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "cuda-bindings@13.2.0", - "cuda-toolkit@13.0.2", - "filelock@3.25.2", - "fsspec@2026.3.0", - "jinja2@3.1.6", - "networkx@3.6.1", - "nvidia-cudnn-cu13@9.19.0.56", - "nvidia-cusparselt-cu13@0.8.0", - "nvidia-nccl-cu13@2.28.9", - "nvidia-nvshmem-cu13@3.4.5", - "setuptools@81.0.0", - "sympy@1.14.0", - "triton@3.6.0", - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "torchvision@0.26.0", - "Name": "torchvision", - "Identifier": { - "PURL": "pkg:pypi/torchvision@0.26.0", - "UID": "2c96da25e41cbdcc" - }, - "Version": "0.26.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "numpy@2.4.4", - "pillow@12.2.0", - "torch@2.11.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tqdm@4.67.3", - "Name": "tqdm", - "Identifier": { - "PURL": "pkg:pypi/tqdm@4.67.3", - "UID": "c66695e708b9f512" - }, - "Version": "4.67.3", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "colorama@0.4.6" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "transformers@4.57.6", - "Name": "transformers", - "Identifier": { - "PURL": "pkg:pypi/transformers@4.57.6", - "UID": "a15e4c4f47c5b56f" - }, - "Version": "4.57.6", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "filelock@3.25.2", - "huggingface-hub@0.36.2", - "numpy@2.4.4", - "packaging@26.0", - "pyyaml@6.0.3", - "regex@2026.3.32", - "requests@2.33.1", - "safetensors@0.7.0", - "tokenizers@0.22.2", - "tqdm@4.67.3" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tree-sitter@0.25.2", - "Name": "tree-sitter", - "Identifier": { - "PURL": "pkg:pypi/tree-sitter@0.25.2", - "UID": "c6f116d013ae171f" - }, - "Version": "0.25.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "tree-sitter-c@0.24.1", - "Name": "tree-sitter-c", - "Identifier": { - "PURL": "pkg:pypi/tree-sitter-c@0.24.1", - "UID": "f2e7ff812dfef3dd" - }, - "Version": "0.24.1", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "tree-sitter-javascript@0.25.0", - "Name": "tree-sitter-javascript", - "Identifier": { - "PURL": "pkg:pypi/tree-sitter-javascript@0.25.0", - "UID": "2cb94636d27c8b1d" - }, - "Version": "0.25.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "tree-sitter-python@0.25.0", - "Name": "tree-sitter-python", - "Identifier": { - "PURL": "pkg:pypi/tree-sitter-python@0.25.0", - "UID": "a5563e4980cd406e" - }, - "Version": "0.25.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "tree-sitter-typescript@0.23.2", - "Name": "tree-sitter-typescript", - "Identifier": { - "PURL": "pkg:pypi/tree-sitter-typescript@0.23.2", - "UID": "595454645d4b0be8" - }, - "Version": "0.23.2", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "triton@3.6.0", - "Name": "triton", - "Identifier": { - "PURL": "pkg:pypi/triton@3.6.0", - "UID": "4011f59186a45e3b" - }, - "Version": "3.6.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "typer@0.21.2", - "Name": "typer", - "Identifier": { - "PURL": "pkg:pypi/typer@0.21.2", - "UID": "eb519b8473fa4e50" - }, - "Version": "0.21.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "annotated-doc@0.0.4", - "click@8.3.1", - "rich@14.3.3", - "shellingham@1.5.4" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "typing-extensions@4.15.0", - "Name": "typing-extensions", - "Identifier": { - "PURL": "pkg:pypi/typing-extensions@4.15.0", - "UID": "67cbda23a41e6bb9" - }, - "Version": "4.15.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "typing-inspection@0.4.2", - "Name": "typing-inspection", - "Identifier": { - "PURL": "pkg:pypi/typing-inspection@0.4.2", - "UID": "e141c01a2a6a5097" - }, - "Version": "0.4.2", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "typing-extensions@4.15.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "tzdata@2025.3", - "Name": "tzdata", - "Identifier": { - "PURL": "pkg:pypi/tzdata@2025.3", - "UID": "1a9c1c1e17973a68" - }, - "Version": "2025.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "uncalled-for@0.2.0", - "Name": "uncalled-for", - "Identifier": { - "PURL": "pkg:pypi/uncalled-for@0.2.0", - "UID": "a678bf9724184b38" - }, - "Version": "0.2.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "urllib3@2.6.3", - "Name": "urllib3", - "Identifier": { - "PURL": "pkg:pypi/urllib3@2.6.3", - "UID": "a517307e92b05a4" - }, - "Version": "2.6.3", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "watchfiles@1.1.1", - "Name": "watchfiles", - "Identifier": { - "PURL": "pkg:pypi/watchfiles@1.1.1", - "UID": "8d6958da1a73155b" - }, - "Version": "1.1.1", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "anyio@4.13.0" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "wcwidth@0.6.0", - "Name": "wcwidth", - "Identifier": { - "PURL": "pkg:pypi/wcwidth@0.6.0", - "UID": "79a0993370dc6abd" - }, - "Version": "0.6.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "websockets@16.0", - "Name": "websockets", - "Identifier": { - "PURL": "pkg:pypi/websockets@16.0", - "UID": "2a16316553f5ea6c" - }, - "Version": "16.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "win32-setctime@1.2.0", - "Name": "win32-setctime", - "Identifier": { - "PURL": "pkg:pypi/win32-setctime@1.2.0", - "UID": "8cd7957ae8f8938d" - }, - "Version": "1.2.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "xlsxwriter@3.2.9", - "Name": "xlsxwriter", - "Identifier": { - "PURL": "pkg:pypi/xlsxwriter@3.2.9", - "UID": "93db8085a9c67f76" - }, - "Version": "3.2.9", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - }, - { - "ID": "yarl@1.23.0", - "Name": "yarl", - "Identifier": { - "PURL": "pkg:pypi/yarl@1.23.0", - "UID": "b299f1be18ad3fc9" - }, - "Version": "1.23.0", - "Indirect": true, - "Relationship": "indirect", - "DependsOn": [ - "idna@3.11", - "multidict@6.7.1", - "propcache@0.4.1" - ], - "AnalyzedBy": "uv" - }, - { - "ID": "zipp@3.23.0", - "Name": "zipp", - "Identifier": { - "PURL": "pkg:pypi/zipp@3.23.0", - "UID": "3aaf2c8c9213899c" - }, - "Version": "3.23.0", - "Indirect": true, - "Relationship": "indirect", - "AnalyzedBy": "uv" - } - ], - "Vulnerabilities": [ - { - "VulnerabilityID": "CVE-2026-30762", - "VendorIDs": [ - "GHSA-mcww-4hxq-hfr3" - ], - "PkgID": "lightrag-hku@1.4.12", - "PkgName": "lightrag-hku", - "PkgIdentifier": { - "PURL": "pkg:pypi/lightrag-hku@1.4.12", - "UID": "70958c7c95dc56d7" - }, - "InstalledVersion": "1.4.12", - "FixedVersion": "1.4.13", - "Status": "fixed", - "SeveritySource": "ghsa", - "PrimaryURL": "https://avd.aquasec.com/nvd/cve-2026-30762", - "DataSource": { - "ID": "ghsa", - "Name": "GitHub Security Advisory pip", - "URL": "https://github.com/advisories?query=type%3Areviewed+ecosystem%3Apip" - }, - "Fingerprint": "sha256:2e3d703f6045ff5badd8fef003b641bfd5be6083555af203173539f322c7247f", - "Title": "LightRAG: Hardcoded JWT Signing Secret Allows Authentication Bypass", - "Description": "Summary:\nThe file lightrag/api/config.py (line 397) uses a default JWT secret \"lightrag-jwt-default-secret\" when the TOKEN_SECRET environment variable is not set. The AuthHandler in lightrag/api/auth.py (lines 24-25) uses this secret to sign and verify tokens. An unauthenticated attacker can forge valid JWT tokens using the publicly known default secret and gain access to any protected endpoint.\n\nReproduction:\n1. Install LightRAG v1.4.10 with AUTH_ACCOUNTS configured but no TOKEN_SECRET set\n2. Use PyJWT to sign a token: jwt.encode({\"sub\": \"admin\", \"role\": \"user\"}, \"lightrag-jwt-default-secret\", algorithm=\"HS256\")\n3. Send a request to any protected endpoint with the header: Authorization: Bearer \u003cforged_token\u003e\n4. Access is granted without valid credentials\n\nSuggested Fix:\nRequire TOKEN_SECRET to be explicitly set when AUTH_ACCOUNTS is configured. Refuse to start the API server if authentication is enabled but no custom secret is provided.\n\n---\nVenkata Avinash Taduturi\ntaduturivenkata@gmail.com", - "Severity": "HIGH", - "VendorSeverity": { - "ghsa": 3 - }, - "CVSS": { - "ghsa": { - "V3Vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", - "V3Score": 7.5 - } - }, - "References": [ - "https://github.com/HKUDS/LightRAG", - "https://github.com/HKUDS/LightRAG/security/advisories/GHSA-mcww-4hxq-hfr3" - ] - } - ] - }, - { - "Target": ".venv/lib/python3.13/site-packages/skimage/data/_fetchers.py", - "Class": "secret", - "Secrets": [ - { - "RuleID": "jwt-token", - "Category": "JWT", - "Severity": "MEDIUM", - "Title": "JWT token", - "StartLine": 528, - "EndLine": 528, - "Code": { - "Lines": [ - { - "Number": 526, - "Content": " \u003e\u003e\u003e import requests", - "IsCause": false, - "Annotation": "", - "Truncated": false, - "Highlighted": " \u003e\u003e\u003e import requests", - "FirstCause": false, - "LastCause": false - }, - { - "Number": 527, - "Content": " \u003e\u003e\u003e import zipfile", - "IsCause": false, - "Annotation": "", - "Truncated": false, - "Highlighted": " \u003e\u003e\u003e import zipfile", - "FirstCause": false, - "LastCause": false - }, - { - "Number": 528, - "Content": "9-be36-26ec9bc0df3b.jpg?token=****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************'", - "IsCause": true, - "Annotation": "", - "Truncated": false, - "Highlighted": "9-be36-26ec9bc0df3b.jpg?token=****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************'", - "FirstCause": true, - "LastCause": true - }, - { - "Number": 529, - "Content": " \u003e\u003e\u003e r = requests.get(url)", - "IsCause": false, - "Annotation": "", - "Truncated": false, - "Highlighted": " \u003e\u003e\u003e r = requests.get(url)", - "FirstCause": false, - "LastCause": false - } - ] - }, - "Match": "9-be36-26ec9bc0df3b.jpg?token=****************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************'", - "Offset": 17236 - } - ] - } - ] -} + +Report Summary + +┌──────────────────────────────────────────────────────────────────────────────────┬────────────────┬─────────────────┬───────────────────┐ +│ Target │ Type │ Vulnerabilities │ Misconfigurations │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/paddleocr/ppstructure/kie/requirements.txt │ pip │ 0 │ - │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/pkg_resources/tests/data/my-test-package_zip- │ python-pkg │ 0 │ - │ +│ ped-egg/my_test_package-1.0-py3.7.egg │ │ │ │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ uv.lock │ uv │ 0 │ - │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/cloudformation/2010-05-15/resourc- │ cloudformation │ - │ 0 │ +│ es-1.json │ │ │ │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/cloudwatch/2010-08-01/resources-1- │ cloudformation │ - │ 0 │ +│ .json │ │ │ │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/dynamodb/2012-08-10/resources-1.j- │ cloudformation │ - │ 0 │ +│ son │ │ │ │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2014-10-01/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2015-03-01/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2015-04-15/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2015-10-01/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2016-04-01/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2016-09-15/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2016-11-15/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/glacier/2012-06-01/resources-1.js- │ cloudformation │ - │ 0 │ +│ on │ │ │ │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/iam/2010-05-08/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/s3/2006-03-01/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/sns/2010-03-31/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ .venv/lib/python3.14/site-packages/boto3/data/sqs/2012-11-05/resources-1.json │ cloudformation │ - │ 0 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ Dockerfile │ dockerfile │ - │ 1 │ +├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ +│ Dockerfile.db │ dockerfile │ - │ 3 │ +└──────────────────────────────────────────────────────────────────────────────────┴────────────────┴─────────────────┴───────────────────┘ +Legend: +- '-': Not scanned +- '0': Clean (no security findings detected) + + +Dockerfile (dockerfile) +======================= +Tests: 24 (SUCCESSES: 23, FAILURES: 1) +Failures: 1 (MEDIUM: 0, HIGH: 1, CRITICAL: 0) + +DS-0029 (HIGH): '--no-install-recommends' flag is missed: 'apt-get update && apt-get install -y libgomp1 libgl1 git tesseract-ocr && rm -rf /var/lib/apt/lists/*' +════════════════════════════════════════ +'apt-get' install should use '--no-install-recommends' to minimize image size. + +See https://avd.aquasec.com/misconfig/ds-0029 +──────────────────────────────────────── + Dockerfile:20-25 +──────────────────────────────────────── + 20 ┌ RUN apt-get update && apt-get install -y \ + 21 │ libgomp1 \ + 22 │ libgl1 \ + 23 │ git \ + 24 │ tesseract-ocr \ + 25 └ && rm -rf /var/lib/apt/lists/* +──────────────────────────────────────── + + + +Dockerfile.db (dockerfile) +========================== +Tests: 25 (SUCCESSES: 22, FAILURES: 3) +Failures: 3 (MEDIUM: 2, HIGH: 1, CRITICAL: 0) + +DS-0013 (MEDIUM): RUN should not be used to change directory: 'cd /tmp && git clone --branch PG17/v1.6.0-rc0 https://github.com/apache/age.git && cd age && make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || (echo "Failed to build AGE" && exit 1)'. Use 'WORKDIR' statement instead. +════════════════════════════════════════ +Use WORKDIR instead of proliferating instructions like 'RUN cd … && do-something', which are hard to read, troubleshoot, and maintain. + +See https://avd.aquasec.com/misconfig/ds-0013 +──────────────────────────────────────── + Dockerfile.db:14-18 +──────────────────────────────────────── + 14 ┌ RUN cd /tmp && \ + 15 │ git clone --branch PG17/v1.6.0-rc0 https://github.com/apache/age.git && \ + 16 │ cd age && \ + 17 │ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || \ + 18 └ (echo "Failed to build AGE" && exit 1) +──────────────────────────────────────── + + +DS-0013 (MEDIUM): RUN should not be used to change directory: 'cd /tmp && git clone https://github.com/timescale/pg_textsearch.git && cd pg_textsearch && make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config || (echo "Failed to build pg_textsearch" && exit 1) && make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || (echo "Failed to install pg_textsearch" && exit 1)'. Use 'WORKDIR' statement instead. +════════════════════════════════════════ +Use WORKDIR instead of proliferating instructions like 'RUN cd … && do-something', which are hard to read, troubleshoot, and maintain. + +See https://avd.aquasec.com/misconfig/ds-0013 +──────────────────────────────────────── + Dockerfile.db:21-27 +──────────────────────────────────────── + 21 ┌ RUN cd /tmp && \ + 22 │ git clone https://github.com/timescale/pg_textsearch.git && \ + 23 │ cd pg_textsearch && \ + 24 │ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config || \ + 25 │ (echo "Failed to build pg_textsearch" && exit 1) && \ + 26 │ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || \ + 27 └ (echo "Failed to install pg_textsearch" && exit 1) +──────────────────────────────────────── + + +DS-0029 (HIGH): '--no-install-recommends' flag is missed: 'apt-get update && apt-get install -y build-essential git postgresql-server-dev-17 flex bison && rm -rf /var/lib/apt/lists/*' +════════════════════════════════════════ +'apt-get' install should use '--no-install-recommends' to minimize image size. + +See https://avd.aquasec.com/misconfig/ds-0029 +──────────────────────────────────────── + Dockerfile.db:5-11 +──────────────────────────────────────── + 5 ┌ RUN apt-get update && apt-get install -y \ + 6 │ build-essential \ + 7 │ git \ + 8 │ postgresql-server-dev-17 \ + 9 │ flex \ + 10 │ bison \ + 11 └ && rm -rf /var/lib/apt/lists/* +──────────────────────────────────────── + + From bdbcda9129bec3a1aed78d60d088d23a6e2ad16a Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 20:40:24 +0200 Subject: [PATCH 04/17] test: Add tests for Alembic lifespan, BM25 modes, and close() method (BRIC-7) - Add test_lifespan.py: lifespan startup/shutdown, migration errors, BM25 pool close - Add test_alembic_config.py: config files, URL conversion, migration validation - Add hybrid+, bm25-only, and bm25-unavailable tests to test_query_use_case.py - Add close() tests to test_pg_textsearch_adapter.py --- .../bm25/test_pg_textsearch_adapter.py | 25 ++++- tests/unit/test_alembic_config.py | 55 +++++++++++ tests/unit/test_lifespan.py | 91 +++++++++++++++++++ tests/unit/test_query_use_case.py | 66 ++++++++++++++ 4 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 tests/unit/test_alembic_config.py create mode 100644 tests/unit/test_lifespan.py diff --git a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py index f68cc82..a363332 100644 --- a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py +++ b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py @@ -133,7 +133,7 @@ async def test_drop_index_clears_tsvector(mock_pool, mock_connection): adapter = PostgresBM25Adapter(db_url="postgresql://test") adapter._pool = mock_pool mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) + mock_pool.acquire.return_value.__aexit__ = AsyncMock(return_value=None) await adapter.drop_index("workspace1") @@ -142,3 +142,26 @@ async def test_drop_index_clears_tsvector(mock_pool, mock_connection): call_args = mock_connection.execute.call_args[0] assert "UPDATE chunks" in call_args[0] assert "content_tsv = NULL" in call_args[0] + + +@pytest.mark.asyncio +async def test_close_closes_pool(mock_pool): + """Close should close connection pool.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + + await adapter.close() + + mock_pool.close.assert_called_once() + assert adapter._pool is None + + +@pytest.mark.asyncio +async def test_close_with_no_pool(): + """Close should handle None pool gracefully.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = None + + await adapter.close() + + assert adapter._pool is None diff --git a/tests/unit/test_alembic_config.py b/tests/unit/test_alembic_config.py new file mode 100644 index 0000000..c44d707 --- /dev/null +++ b/tests/unit/test_alembic_config.py @@ -0,0 +1,55 @@ +"""Tests for Alembic configuration and environment.""" + +import importlib.util +from pathlib import Path + +import pytest + +ALEMBIC_DIR = Path(__file__).parent.parent.parent / "src" / "alembic" +SRC_DIR = Path(__file__).parent.parent.parent / "src" + + +class TestAlembicConfig: + """Tests for Alembic configuration files.""" + + def test_alembic_ini_exists(self): + alembic_ini = SRC_DIR / "alembic.ini" + assert alembic_ini.exists(), f"alembic.ini should exist at {alembic_ini}" + + def test_alembic_versions_dir_exists(self): + versions_dir = ALEMBIC_DIR / "versions" + assert versions_dir.is_dir(), "alembic/versions directory should exist" + + def test_get_url_converts_asyncpg_to_sync(self): + """Should convert postgresql+asyncpg:// to postgresql://.""" + test_cases = [ + ("postgresql+asyncpg://user:pass@host/db", "postgresql://user:pass@host/db"), + ("postgresql://user:pass@host/db", "postgresql://user:pass@host/db"), + ] + for input_url, expected in test_cases: + result = input_url.replace("+asyncpg", "") + assert result == expected + + def test_target_metadata_is_none(self): + env_path = ALEMBIC_DIR / "env.py" + content = env_path.read_text() + assert "target_metadata = None" in content + + def test_migration_001_exists(self): + migration = ALEMBIC_DIR / "versions" / "001_add_bm25_support.py" + assert migration.exists(), f"Migration 001 should exist at {migration}" + + def test_migration_001_has_upgrade_and_downgrade(self): + migration_path = ALEMBIC_DIR / "versions" / "001_add_bm25_support.py" + spec = importlib.util.spec_from_file_location("migration_001", migration_path) + assert spec is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore[union-attr] + assert hasattr(module, "upgrade"), "Migration should have upgrade()" + assert hasattr(module, "downgrade"), "Migration should have downgrade()" + + def test_env_py_has_async_support(self): + env_path = ALEMBIC_DIR / "env.py" + content = env_path.read_text() + assert "async_engine_from_config" in content + assert "run_async_migrations" in content \ No newline at end of file diff --git a/tests/unit/test_lifespan.py b/tests/unit/test_lifespan.py new file mode 100644 index 0000000..f6f8e42 --- /dev/null +++ b/tests/unit/test_lifespan.py @@ -0,0 +1,91 @@ +"""Tests for FastAPI lifespan management.""" + +from contextlib import asynccontextmanager +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +class TestLifespan: + """Tests for lifespan context managers in main.py.""" + + @pytest.mark.asyncio + async def test_db_lifespan_runs_migrations_on_startup(self): + """Should run Alembic migrations on startup.""" + from main import db_lifespan + + mock_app = MagicMock() + + with patch("main.bm25_adapter", None): + with patch("main.asyncio.to_thread") as mock_to_thread: + mock_to_thread.return_value = None + async with db_lifespan(mock_app): + pass + mock_to_thread.assert_called_once() + + @pytest.mark.asyncio + async def test_db_lifespan_closes_bm25_pool_on_shutdown(self): + """Should close BM25 adapter connection pool on shutdown.""" + from main import db_lifespan + + mock_app = MagicMock() + mock_bm25 = AsyncMock() + + with patch("main.bm25_adapter", mock_bm25): + with patch("main.asyncio.to_thread"): + async with db_lifespan(mock_app): + pass + mock_bm25.close.assert_called_once() + + @pytest.mark.asyncio + async def test_db_lifespan_handles_no_bm25_adapter(self): + """Should handle gracefully when bm25_adapter is None.""" + from main import db_lifespan + + mock_app = MagicMock() + + with patch("main.bm25_adapter", None): + with patch("main.asyncio.to_thread"): + async with db_lifespan(mock_app): + pass + + @pytest.mark.asyncio + async def test_db_lifespan_handles_migration_failure(self): + """Should not crash if migrations fail.""" + from main import db_lifespan + + mock_app = MagicMock() + + with patch("main.bm25_adapter", None): + with patch("main.asyncio.to_thread") as mock_to_thread: + mock_to_thread.side_effect = Exception("Migration failed") + async with db_lifespan(mock_app): + pass + mock_to_thread.assert_called_once() + + @pytest.mark.asyncio + async def test_db_lifespan_handles_close_failure(self): + """Should not crash if BM25 close fails.""" + from main import db_lifespan + + mock_app = MagicMock() + mock_bm25 = AsyncMock() + mock_bm25.close = AsyncMock(side_effect=Exception("Close failed")) + + with patch("main.bm25_adapter", mock_bm25): + with patch("main.asyncio.to_thread"): + async with db_lifespan(mock_app): + pass + mock_bm25.close.assert_called_once() + + @pytest.mark.asyncio + async def test_run_alembic_upgrade_calls_command(self): + """Should call alembic command.upgrade with head.""" + with patch("main.command.upgrade") as mock_upgrade: + with patch("main.Config") as mock_config_cls: + mock_cfg = MagicMock() + mock_config_cls.return_value = mock_cfg + from main import _run_alembic_upgrade + + _run_alembic_upgrade() + mock_upgrade.assert_called_once_with(mock_cfg, "head") \ No newline at end of file diff --git a/tests/unit/test_query_use_case.py b/tests/unit/test_query_use_case.py index 8302e1c..3d443dd 100644 --- a/tests/unit/test_query_use_case.py +++ b/tests/unit/test_query_use_case.py @@ -1,6 +1,7 @@ from unittest.mock import AsyncMock from application.use_cases.query_use_case import QueryUseCase +from domain.ports.bm25_engine import BM25SearchResult class TestQueryUseCase: @@ -99,3 +100,68 @@ async def test_execute_with_mix_mode( top_k=5, working_dir="/tmp/rag/test", ) + + async def test_execute_hybrid_plus_with_bm25(self, mock_rag_engine: AsyncMock) -> None: + """hybrid+ mode should execute parallel BM25 + vector search.""" + mock_bm25 = AsyncMock() + mock_bm25.search.return_value = [ + BM25SearchResult( + chunk_id="1", content="bm25 result", file_path="/a.pdf", score=5.0, metadata={} + ) + ] + mock_rag_engine.query.return_value = { + "data": {"chunks": [{"reference_id": "2", "content": "vector result", "file_path": "/b.pdf"}]} + } + use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=mock_bm25) + + result = await use_case.execute( + working_dir="/tmp/rag/test", query="search", mode="hybrid+", top_k=10 + ) + + mock_bm25.search.assert_called_once() + mock_rag_engine.query.assert_called() + assert result["status"] == "success" + assert result["metadata"]["query_mode"] == "hybrid+" + + async def test_execute_hybrid_plus_without_bm25_falls_back(self, mock_rag_engine: AsyncMock) -> None: + """hybrid+ mode without BM25 should fall back to naive vector search.""" + mock_rag_engine.query.return_value = {"status": "success", "data": {}} + use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=None) + + await use_case.execute( + working_dir="/tmp/rag/test", query="search", mode="hybrid+", top_k=10 + ) + + mock_rag_engine.query.assert_called_once_with( + query="search", mode="naive", top_k=10, working_dir="/tmp/rag/test" + ) + + async def test_execute_bm25_only_mode(self, mock_rag_engine: AsyncMock) -> None: + """bm25 mode should only use BM25 search without vector.""" + mock_bm25 = AsyncMock() + mock_bm25.search.return_value = [ + BM25SearchResult( + chunk_id="1", content="test", file_path="/a.pdf", score=5.0, metadata={} + ) + ] + use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=mock_bm25) + + result = await use_case.execute( + working_dir="/tmp/rag/test", query="search", mode="bm25", top_k=10 + ) + + mock_bm25.search.assert_called_once_with("search", "/tmp/rag/test", 10) + mock_rag_engine.query.assert_not_called() + assert result["status"] == "success" + assert result["metadata"]["query_mode"] == "bm25" + + async def test_execute_bm25_mode_without_bm25_returns_error(self, mock_rag_engine: AsyncMock) -> None: + """bm25 mode without BM25 engine should return error.""" + use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=None) + + result = await use_case.execute( + working_dir="/tmp/rag/test", query="search", mode="bm25", top_k=10 + ) + + assert result["status"] == "error" + assert "BM25 engine not available" in result["message"] From 804912d25d48c99756e9e48155532254a125851a Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 20:42:25 +0200 Subject: [PATCH 05/17] style: Fix lint issues - combine with statements, trailing whitespace (BRIC-7) --- src/alembic/env.py | 3 +- src/alembic/versions/001_add_bm25_support.py | 5 +- .../bm25/pg_textsearch_adapter.py | 12 +++- src/main.py | 9 ++- tests/domain/ports/test_bm25_engine.py | 1 - .../hybrid/test_rrf_combiner.py | 1 - tests/unit/test_alembic_config.py | 9 +-- tests/unit/test_lifespan.py | 63 +++++++++---------- tests/unit/test_query_use_case.py | 28 +++++++-- uv.lock | 28 +++++++++ 10 files changed, 101 insertions(+), 58 deletions(-) diff --git a/src/alembic/env.py b/src/alembic/env.py index d93846b..927f90f 100644 --- a/src/alembic/env.py +++ b/src/alembic/env.py @@ -7,7 +7,6 @@ from sqlalchemy.ext.asyncio import async_engine_from_config from alembic import context - from config import DatabaseConfig config = context.config @@ -84,4 +83,4 @@ def run_migrations_online() -> None: if context.is_offline_mode(): run_migrations_offline() else: - run_migrations_online() \ No newline at end of file + run_migrations_online() diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index 934f3a3..fdcfc85 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -1,10 +1,11 @@ """Add BM25 support via pg_textsearch Revision ID: 001 -Revises: +Revises: Create Date: 2026-04-07 """ + from collections.abc import Sequence from alembic import op @@ -74,4 +75,4 @@ def downgrade() -> None: op.execute("DROP FUNCTION IF EXISTS update_chunks_tsv()") op.execute("DROP INDEX IF EXISTS idx_chunks_bm25") op.execute("DROP INDEX IF EXISTS idx_chunks_content_tsv") - op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS content_tsv") \ No newline at end of file + op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS content_tsv") diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py index 9685b0c..db74c4f 100644 --- a/src/infrastructure/bm25/pg_textsearch_adapter.py +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -158,7 +158,9 @@ async def index_document( metadata or {}, ) except Exception as e: - logger.error(f"BM25 document indexing failed: {e}", extra={"chunk_id": chunk_id}) + logger.error( + f"BM25 document indexing failed: {e}", extra={"chunk_id": chunk_id} + ) raise async def create_index(self, working_dir: str) -> None: @@ -186,7 +188,9 @@ async def create_index(self, working_dir: str) -> None: working_dir, ) except Exception as e: - logger.error(f"BM25 index creation failed: {e}", extra={"working_dir": working_dir}) + logger.error( + f"BM25 index creation failed: {e}", extra={"working_dir": working_dir} + ) raise async def drop_index(self, working_dir: str) -> None: @@ -205,5 +209,7 @@ async def drop_index(self, working_dir: str) -> None: working_dir, ) except Exception as e: - logger.error(f"BM25 index drop failed: {e}", extra={"working_dir": working_dir}) + logger.error( + f"BM25 index drop failed: {e}", extra={"working_dir": working_dir} + ) raise diff --git a/src/main.py b/src/main.py index 534f607..534fcd3 100644 --- a/src/main.py +++ b/src/main.py @@ -9,11 +9,11 @@ from pathlib import Path import uvicorn -from alembic import command from alembic.config import Config from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware +from alembic import command from application.api.health_routes import health_router from application.api.indexing_routes import indexing_router from application.api.mcp_tools import mcp @@ -73,9 +73,8 @@ async def db_lifespan(_app: FastAPI): @asynccontextmanager async def combined_lifespan(app: FastAPI): """Combine database lifecycle with MCP lifecycle for streamable transport.""" - async with db_lifespan(app): - async with mcp_app.lifespan(app): - yield + async with db_lifespan(app), mcp_app.lifespan(app): + yield app = FastAPI( title="RAG Anything API", @@ -125,4 +124,4 @@ def run_fastapi(): api_thread.start() mcp.run(transport="stdio") else: - run_fastapi() \ No newline at end of file + run_fastapi() diff --git a/tests/domain/ports/test_bm25_engine.py b/tests/domain/ports/test_bm25_engine.py index a1e95bf..ee9cdc4 100644 --- a/tests/domain/ports/test_bm25_engine.py +++ b/tests/domain/ports/test_bm25_engine.py @@ -1,6 +1,5 @@ """Tests for BM25EnginePort interface.""" - import pytest from domain.ports.bm25_engine import BM25EnginePort, BM25SearchResult diff --git a/tests/infrastructure/hybrid/test_rrf_combiner.py b/tests/infrastructure/hybrid/test_rrf_combiner.py index d450120..a430bab 100644 --- a/tests/infrastructure/hybrid/test_rrf_combiner.py +++ b/tests/infrastructure/hybrid/test_rrf_combiner.py @@ -1,6 +1,5 @@ """Tests for Reciprocal Rank Fusion combiner.""" - from domain.ports.bm25_engine import BM25SearchResult from infrastructure.hybrid.rrf_combiner import RRFCombiner diff --git a/tests/unit/test_alembic_config.py b/tests/unit/test_alembic_config.py index c44d707..d7878cd 100644 --- a/tests/unit/test_alembic_config.py +++ b/tests/unit/test_alembic_config.py @@ -3,8 +3,6 @@ import importlib.util from pathlib import Path -import pytest - ALEMBIC_DIR = Path(__file__).parent.parent.parent / "src" / "alembic" SRC_DIR = Path(__file__).parent.parent.parent / "src" @@ -23,7 +21,10 @@ def test_alembic_versions_dir_exists(self): def test_get_url_converts_asyncpg_to_sync(self): """Should convert postgresql+asyncpg:// to postgresql://.""" test_cases = [ - ("postgresql+asyncpg://user:pass@host/db", "postgresql://user:pass@host/db"), + ( + "postgresql+asyncpg://user:pass@host/db", + "postgresql://user:pass@host/db", + ), ("postgresql://user:pass@host/db", "postgresql://user:pass@host/db"), ] for input_url, expected in test_cases: @@ -52,4 +53,4 @@ def test_env_py_has_async_support(self): env_path = ALEMBIC_DIR / "env.py" content = env_path.read_text() assert "async_engine_from_config" in content - assert "run_async_migrations" in content \ No newline at end of file + assert "run_async_migrations" in content diff --git a/tests/unit/test_lifespan.py b/tests/unit/test_lifespan.py index f6f8e42..9e09580 100644 --- a/tests/unit/test_lifespan.py +++ b/tests/unit/test_lifespan.py @@ -1,6 +1,5 @@ """Tests for FastAPI lifespan management.""" -from contextlib import asynccontextmanager from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -16,12 +15,11 @@ async def test_db_lifespan_runs_migrations_on_startup(self): mock_app = MagicMock() - with patch("main.bm25_adapter", None): - with patch("main.asyncio.to_thread") as mock_to_thread: - mock_to_thread.return_value = None - async with db_lifespan(mock_app): - pass - mock_to_thread.assert_called_once() + with patch("main.bm25_adapter", None), patch("main.asyncio.to_thread") as mock_to_thread: + mock_to_thread.return_value = None + async with db_lifespan(mock_app): + pass + mock_to_thread.assert_called_once() @pytest.mark.asyncio async def test_db_lifespan_closes_bm25_pool_on_shutdown(self): @@ -31,11 +29,10 @@ async def test_db_lifespan_closes_bm25_pool_on_shutdown(self): mock_app = MagicMock() mock_bm25 = AsyncMock() - with patch("main.bm25_adapter", mock_bm25): - with patch("main.asyncio.to_thread"): - async with db_lifespan(mock_app): - pass - mock_bm25.close.assert_called_once() + with patch("main.bm25_adapter", mock_bm25), patch("main.asyncio.to_thread"): + async with db_lifespan(mock_app): + pass + mock_bm25.close.assert_called_once() @pytest.mark.asyncio async def test_db_lifespan_handles_no_bm25_adapter(self): @@ -44,10 +41,9 @@ async def test_db_lifespan_handles_no_bm25_adapter(self): mock_app = MagicMock() - with patch("main.bm25_adapter", None): - with patch("main.asyncio.to_thread"): - async with db_lifespan(mock_app): - pass + with patch("main.bm25_adapter", None), patch("main.asyncio.to_thread"): + async with db_lifespan(mock_app): + pass @pytest.mark.asyncio async def test_db_lifespan_handles_migration_failure(self): @@ -56,12 +52,11 @@ async def test_db_lifespan_handles_migration_failure(self): mock_app = MagicMock() - with patch("main.bm25_adapter", None): - with patch("main.asyncio.to_thread") as mock_to_thread: - mock_to_thread.side_effect = Exception("Migration failed") - async with db_lifespan(mock_app): - pass - mock_to_thread.assert_called_once() + with patch("main.bm25_adapter", None), patch("main.asyncio.to_thread") as mock_to_thread: + mock_to_thread.side_effect = Exception("Migration failed") + async with db_lifespan(mock_app): + pass + mock_to_thread.assert_called_once() @pytest.mark.asyncio async def test_db_lifespan_handles_close_failure(self): @@ -72,20 +67,18 @@ async def test_db_lifespan_handles_close_failure(self): mock_bm25 = AsyncMock() mock_bm25.close = AsyncMock(side_effect=Exception("Close failed")) - with patch("main.bm25_adapter", mock_bm25): - with patch("main.asyncio.to_thread"): - async with db_lifespan(mock_app): - pass - mock_bm25.close.assert_called_once() + with patch("main.bm25_adapter", mock_bm25), patch("main.asyncio.to_thread"): + async with db_lifespan(mock_app): + pass + mock_bm25.close.assert_called_once() @pytest.mark.asyncio async def test_run_alembic_upgrade_calls_command(self): """Should call alembic command.upgrade with head.""" - with patch("main.command.upgrade") as mock_upgrade: - with patch("main.Config") as mock_config_cls: - mock_cfg = MagicMock() - mock_config_cls.return_value = mock_cfg - from main import _run_alembic_upgrade - - _run_alembic_upgrade() - mock_upgrade.assert_called_once_with(mock_cfg, "head") \ No newline at end of file + with patch("main.command.upgrade") as mock_upgrade, patch("main.Config") as mock_config_cls: + mock_cfg = MagicMock() + mock_config_cls.return_value = mock_cfg + from main import _run_alembic_upgrade + + _run_alembic_upgrade() + mock_upgrade.assert_called_once_with(mock_cfg, "head") diff --git a/tests/unit/test_query_use_case.py b/tests/unit/test_query_use_case.py index 3d443dd..44f6e62 100644 --- a/tests/unit/test_query_use_case.py +++ b/tests/unit/test_query_use_case.py @@ -101,16 +101,30 @@ async def test_execute_with_mix_mode( working_dir="/tmp/rag/test", ) - async def test_execute_hybrid_plus_with_bm25(self, mock_rag_engine: AsyncMock) -> None: + async def test_execute_hybrid_plus_with_bm25( + self, mock_rag_engine: AsyncMock + ) -> None: """hybrid+ mode should execute parallel BM25 + vector search.""" mock_bm25 = AsyncMock() mock_bm25.search.return_value = [ BM25SearchResult( - chunk_id="1", content="bm25 result", file_path="/a.pdf", score=5.0, metadata={} + chunk_id="1", + content="bm25 result", + file_path="/a.pdf", + score=5.0, + metadata={}, ) ] mock_rag_engine.query.return_value = { - "data": {"chunks": [{"reference_id": "2", "content": "vector result", "file_path": "/b.pdf"}]} + "data": { + "chunks": [ + { + "reference_id": "2", + "content": "vector result", + "file_path": "/b.pdf", + } + ] + } } use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=mock_bm25) @@ -123,7 +137,9 @@ async def test_execute_hybrid_plus_with_bm25(self, mock_rag_engine: AsyncMock) - assert result["status"] == "success" assert result["metadata"]["query_mode"] == "hybrid+" - async def test_execute_hybrid_plus_without_bm25_falls_back(self, mock_rag_engine: AsyncMock) -> None: + async def test_execute_hybrid_plus_without_bm25_falls_back( + self, mock_rag_engine: AsyncMock + ) -> None: """hybrid+ mode without BM25 should fall back to naive vector search.""" mock_rag_engine.query.return_value = {"status": "success", "data": {}} use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=None) @@ -155,7 +171,9 @@ async def test_execute_bm25_only_mode(self, mock_rag_engine: AsyncMock) -> None: assert result["status"] == "success" assert result["metadata"]["query_mode"] == "bm25" - async def test_execute_bm25_mode_without_bm25_returns_error(self, mock_rag_engine: AsyncMock) -> None: + async def test_execute_bm25_mode_without_bm25_returns_error( + self, mock_rag_engine: AsyncMock + ) -> None: """bm25 mode without BM25 engine should return error.""" use_case = QueryUseCase(rag_engine=mock_rag_engine, bm25_engine=None) diff --git a/uv.lock b/uv.lock index 43a4a31..c8cfc55 100644 --- a/uv.lock +++ b/uv.lock @@ -166,6 +166,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/64/013409c451a44b61310fb757af4527f3de57fc98a00f40448de28b864290/albumentations-2.0.8-py3-none-any.whl", hash = "sha256:c4c4259aaf04a7386ad85c7fdcb73c6c7146ca3057446b745cc035805acb1017", size = 369423, upload-time = "2025-05-27T21:23:15.609Z" }, ] +[[package]] +name = "alembic" +version = "1.18.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mako" }, + { name = "sqlalchemy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/13/8b084e0f2efb0275a1d534838844926f798bd766566b1375174e2448cd31/alembic-1.18.4.tar.gz", hash = "sha256:cb6e1fd84b6174ab8dbb2329f86d631ba9559dd78df550b57804d607672cedbc", size = 2056725, upload-time = "2026-02-10T16:00:47.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/29/6533c317b74f707ea28f8d633734dbda2119bbadfc61b2f3640ba835d0f7/alembic-1.18.4-py3-none-any.whl", hash = "sha256:a5ed4adcf6d8a4cb575f3d759f071b03cd6e5c7618eb796cb52497be25bfe19a", size = 263893, upload-time = "2026-02-10T16:00:49.997Z" }, +] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -2285,6 +2299,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/12/185a8822994a2f7b5e7d88d19a88d80637917bbb0a6f3f59a2564aabc125/magika-1.0.2-py3-none-win_amd64.whl", hash = "sha256:4937e876d55642423d6416e5db4e5ca7523ab7f855cbc5389efdeac1d149df04", size = 13099543, upload-time = "2026-02-25T16:07:01.942Z" }, ] +[[package]] +name = "mako" +version = "1.3.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload-time = "2025-04-10T12:44:31.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" }, +] + [[package]] name = "mammoth" version = "1.12.0" @@ -2410,6 +2436,7 @@ version = "0.1.0" source = { virtual = "." } dependencies = [ { name = "aiofiles" }, + { name = "alembic" }, { name = "asyncpg" }, { name = "authlib" }, { name = "cryptography" }, @@ -2442,6 +2469,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "aiofiles", specifier = ">=24.1.0" }, + { name = "alembic", specifier = ">=1.13.0" }, { name = "asyncpg", specifier = ">=0.31.0" }, { name = "authlib", specifier = ">=1.6.9" }, { name = "cryptography", specifier = ">=46.0.5" }, From 050e18b21bb1ee1e1bae696f4559d1e04712d5d9 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 20:51:39 +0200 Subject: [PATCH 06/17] fix: Address code review critical and high-severity issues (BRIC-7) Critical: - C1: Lifespan now raises on migration failure instead of silently continuing - C2: Add asyncio.Lock to prevent race condition in BM25 pool initialization High: - H2: Handle asyncio.gather exceptions in hybrid+ mode with return_exceptions=True - H4: Add WARNING comment about backfill on large tables in migration Suggestions: - S5: Add ge=1 validation on BM25_RRF_K config field - S7: Add close() method assertion in BM25EnginePort test --- src/alembic/versions/001_add_bm25_support.py | 3 +- src/application/use_cases/query_use_case.py | 31 ++++++++++++------- src/config.py | 4 ++- .../bm25/pg_textsearch_adapter.py | 11 +++++-- src/main.py | 4 +-- tests/domain/ports/test_bm25_engine.py | 1 + tests/unit/test_lifespan.py | 24 +++++++++----- 7 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index fdcfc85..2f00500 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -63,7 +63,8 @@ def upgrade() -> None: """ ) - # Backfill existing documents + # WARNING: This UPDATE scans the entire table. For tables with >100K rows, + # consider running this as a separate manual batch operation instead. op.execute( "UPDATE chunks SET content_tsv = to_tsvector('english', COALESCE(content, '')) WHERE content_tsv IS NULL" ) diff --git a/src/application/use_cases/query_use_case.py b/src/application/use_cases/query_use_case.py index 3467adf..85d0407 100644 --- a/src/application/use_cases/query_use_case.py +++ b/src/application/use_cases/query_use_case.py @@ -1,12 +1,15 @@ """Query use case with hybrid+ mode support.""" import asyncio +import logging from typing import Literal -from domain.ports.bm25_engine import BM25EnginePort +from domain.ports.bm25_engine import BM25EnginePort, BM25SearchResult from domain.ports.rag_engine import RAGEnginePort from infrastructure.hybrid.rrf_combiner import RRFCombiner +logger = logging.getLogger(__name__) + class QueryUseCase: """Use case for querying the RAG knowledge base.""" @@ -56,10 +59,8 @@ async def execute( Returns: Search results """ - # Initialize RAG engine self.rag_engine.init_project(working_dir) - # Handle BM25-only mode if mode == "bm25": if self.bm25_engine is None: return { @@ -71,15 +72,12 @@ async def execute( results = await self.bm25_engine.search(query, working_dir, top_k) return self._format_bm25_results(results) - # Handle hybrid+ mode (parallel BM25 + vector) if mode == "hybrid+": if self.bm25_engine is None: - # Fall back to regular vector search return await self.rag_engine.query( query=query, mode="naive", top_k=top_k, working_dir=working_dir ) - # Execute BM25 and vector search in parallel bm25_task = asyncio.create_task( self.bm25_engine.search(query, working_dir, top_k=top_k * 2) ) @@ -89,12 +87,24 @@ async def execute( ) ) - # Wait for both to complete - bm25_results, vector_results = await asyncio.gather( - bm25_task, vector_task, return_exceptions=False + bm25_results_raw, vector_results_raw = await asyncio.gather( + bm25_task, vector_task, return_exceptions=True + ) + + bm25_results: list[BM25SearchResult] = ( + bm25_results_raw if isinstance(bm25_results_raw, list) else [] ) + if isinstance(bm25_results_raw, Exception): + logger.error("BM25 search failed in hybrid+ mode: %s", bm25_results_raw) + + if isinstance(vector_results_raw, Exception): + logger.error( + "Vector search failed in hybrid+ mode: %s", vector_results_raw + ) + raise vector_results_raw + + vector_results: dict = vector_results_raw - # Combine using RRF combined_results = self.rrf_combiner.combine( bm25_results=bm25_results, vector_results=vector_results, @@ -103,7 +113,6 @@ async def execute( return self._format_hybrid_results(combined_results) - # Default: use RAG engine return await self.rag_engine.query( query=query, mode=mode, top_k=top_k, working_dir=working_dir ) diff --git a/src/config.py b/src/config.py index 6e52b07..e5989d1 100644 --- a/src/config.py +++ b/src/config.py @@ -116,7 +116,9 @@ class BM25Config(BaseSettings): BM25_TEXT_CONFIG: str = Field( default="english", description="PostgreSQL text search configuration" ) - BM25_RRF_K: int = Field(default=60, description="RRF constant K for hybrid search") + BM25_RRF_K: int = Field( + default=60, ge=1, description="RRF constant K for hybrid search" + ) class MinioConfig(BaseSettings): diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py index db74c4f..ee3c8e7 100644 --- a/src/infrastructure/bm25/pg_textsearch_adapter.py +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -1,5 +1,6 @@ """PostgreSQL BM25 adapter using pg_textsearch extension.""" +import asyncio import logging from typing import Any @@ -28,13 +29,17 @@ def __init__(self, db_url: str): """ self.db_url = db_url self._pool: asyncpg.Pool | None = None + self._pool_lock = asyncio.Lock() async def _get_pool(self) -> asyncpg.Pool: - """Get or create database connection pool.""" - if self._pool is None: + """Get or create database connection pool with double-checked locking.""" + if self._pool is not None: + return self._pool + async with self._pool_lock: + if self._pool is not None: + return self._pool self._pool = await asyncpg.create_pool(self.db_url) - # Validate pg_textsearch extension async with self._pool.acquire() as conn: try: result = await conn.fetchval( diff --git a/src/main.py b/src/main.py index 534fcd3..217c21b 100644 --- a/src/main.py +++ b/src/main.py @@ -45,13 +45,13 @@ async def db_lifespan(_app: FastAPI): """ logger.info("Application startup initiated") - # Run database migrations try: logger.info("Running database migrations...") await asyncio.to_thread(_run_alembic_upgrade) logger.info("Database migrations completed") except Exception: - logger.exception("Failed to run migrations") + logger.exception("Failed to run migrations — refusing to start") + raise yield diff --git a/tests/domain/ports/test_bm25_engine.py b/tests/domain/ports/test_bm25_engine.py index ee9cdc4..f752c02 100644 --- a/tests/domain/ports/test_bm25_engine.py +++ b/tests/domain/ports/test_bm25_engine.py @@ -17,6 +17,7 @@ def test_bm25_engine_port_has_required_methods(): assert hasattr(BM25EnginePort, "index_document") assert hasattr(BM25EnginePort, "create_index") assert hasattr(BM25EnginePort, "drop_index") + assert hasattr(BM25EnginePort, "close") def test_bm25_search_result_dataclass(): diff --git a/tests/unit/test_lifespan.py b/tests/unit/test_lifespan.py index 9e09580..cd89212 100644 --- a/tests/unit/test_lifespan.py +++ b/tests/unit/test_lifespan.py @@ -15,7 +15,10 @@ async def test_db_lifespan_runs_migrations_on_startup(self): mock_app = MagicMock() - with patch("main.bm25_adapter", None), patch("main.asyncio.to_thread") as mock_to_thread: + with ( + patch("main.bm25_adapter", None), + patch("main.asyncio.to_thread") as mock_to_thread, + ): mock_to_thread.return_value = None async with db_lifespan(mock_app): pass @@ -46,16 +49,20 @@ async def test_db_lifespan_handles_no_bm25_adapter(self): pass @pytest.mark.asyncio - async def test_db_lifespan_handles_migration_failure(self): - """Should not crash if migrations fail.""" + async def test_db_lifespan_raises_on_migration_failure(self): + """Should raise if migrations fail — refusing to start with broken schema.""" from main import db_lifespan mock_app = MagicMock() - with patch("main.bm25_adapter", None), patch("main.asyncio.to_thread") as mock_to_thread: + with ( + patch("main.bm25_adapter", None), + patch("main.asyncio.to_thread") as mock_to_thread, + ): mock_to_thread.side_effect = Exception("Migration failed") - async with db_lifespan(mock_app): - pass + with pytest.raises(Exception, match="Migration failed"): + async with db_lifespan(mock_app): + pass mock_to_thread.assert_called_once() @pytest.mark.asyncio @@ -75,7 +82,10 @@ async def test_db_lifespan_handles_close_failure(self): @pytest.mark.asyncio async def test_run_alembic_upgrade_calls_command(self): """Should call alembic command.upgrade with head.""" - with patch("main.command.upgrade") as mock_upgrade, patch("main.Config") as mock_config_cls: + with ( + patch("main.command.upgrade") as mock_upgrade, + patch("main.Config") as mock_config_cls, + ): mock_cfg = MagicMock() mock_config_cls.return_value = mock_cfg from main import _run_alembic_upgrade From 9c9f72279715d520287df4153ad74db2808f7bd5 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 21:01:05 +0200 Subject: [PATCH 07/17] refactor: Simplify code after code review (BRIC-7) - Reduce verbose docstrings in adapter, use cleaner type annotations - Simplify dependencies.py (remove redundant comments) - Streamline query_use_case.py (reduce nested conditionals) - Simplify main.py lifespan (remove redundant comments) - Clean up env.py and config.py --- src/alembic/env.py | 12 +-- src/alembic/versions/001_add_bm25_support.py | 2 +- src/application/use_cases/query_use_case.py | 34 +++------ src/config.py | 12 +-- src/dependencies.py | 7 -- .../bm25/pg_textsearch_adapter.py | 73 ++++++------------- src/infrastructure/hybrid/rrf_combiner.py | 16 +--- src/main.py | 15 +--- 8 files changed, 47 insertions(+), 124 deletions(-) diff --git a/src/alembic/env.py b/src/alembic/env.py index 927f90f..d375951 100644 --- a/src/alembic/env.py +++ b/src/alembic/env.py @@ -19,17 +19,9 @@ def get_url() -> str: - """Build the database URL from application settings. - - Returns synchronous URL for Alembic (remove +asyncpg driver). - """ + """Build the database URL from application settings (sync driver for Alembic).""" db_config = DatabaseConfig() - # Convert async URL to sync URL for Alembic - # postgresql+asyncpg:// -> postgresql:// - url = db_config.DATABASE_URL - if "+asyncpg" in url: - url = url.replace("+asyncpg", "") - return url + return db_config.DATABASE_URL.replace("+asyncpg", "") def run_migrations_offline() -> None: diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index 2f00500..2559bf3 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -64,7 +64,7 @@ def upgrade() -> None: ) # WARNING: This UPDATE scans the entire table. For tables with >100K rows, - # consider running this as a separate manual batch operation instead. + # consider running as a separate manual batch operation instead. op.execute( "UPDATE chunks SET content_tsv = to_tsvector('english', COALESCE(content, '')) WHERE content_tsv IS NULL" ) diff --git a/src/application/use_cases/query_use_case.py b/src/application/use_cases/query_use_case.py index 85d0407..0f10ad9 100644 --- a/src/application/use_cases/query_use_case.py +++ b/src/application/use_cases/query_use_case.py @@ -78,35 +78,26 @@ async def execute( query=query, mode="naive", top_k=top_k, working_dir=working_dir ) - bm25_task = asyncio.create_task( - self.bm25_engine.search(query, working_dir, top_k=top_k * 2) - ) - vector_task = asyncio.create_task( + bm25_results, vector_results = await asyncio.gather( + self.bm25_engine.search(query, working_dir, top_k=top_k * 2), self.rag_engine.query( query=query, mode="naive", top_k=top_k * 2, working_dir=working_dir - ) + ), + return_exceptions=True, ) - bm25_results_raw, vector_results_raw = await asyncio.gather( - bm25_task, vector_task, return_exceptions=True + bm25_hits: list[BM25SearchResult] = ( + bm25_results if isinstance(bm25_results, list) else [] ) + if isinstance(bm25_results, Exception): + logger.error("BM25 search failed in hybrid+ mode: %s", bm25_results) - bm25_results: list[BM25SearchResult] = ( - bm25_results_raw if isinstance(bm25_results_raw, list) else [] - ) - if isinstance(bm25_results_raw, Exception): - logger.error("BM25 search failed in hybrid+ mode: %s", bm25_results_raw) - - if isinstance(vector_results_raw, Exception): - logger.error( - "Vector search failed in hybrid+ mode: %s", vector_results_raw - ) - raise vector_results_raw - - vector_results: dict = vector_results_raw + if isinstance(vector_results, Exception): + logger.error("Vector search failed in hybrid+ mode: %s", vector_results) + raise vector_results combined_results = self.rrf_combiner.combine( - bm25_results=bm25_results, + bm25_results=bm25_hits, vector_results=vector_results, top_k=top_k, ) @@ -145,7 +136,6 @@ def _format_bm25_results(self, results: list) -> dict: def _format_hybrid_results(self, results: list) -> dict: """Format hybrid results to match API response format.""" - return { "status": "success", "message": "", diff --git a/src/config.py b/src/config.py index e5989d1..6b1f89a 100644 --- a/src/config.py +++ b/src/config.py @@ -25,9 +25,7 @@ class AppConfig(BaseSettings): class DatabaseConfig(BaseSettings): - """ - Database connection configuration. - """ + """Database connection configuration.""" POSTGRES_USER: str = Field(default="raganything") POSTGRES_PASSWORD: str = Field(default="raganything") @@ -42,9 +40,7 @@ def DATABASE_URL(self) -> str: class LLMConfig(BaseSettings): - """ - Large Language Model configuration. - """ + """Large Language Model configuration.""" OPEN_ROUTER_API_KEY: str | None = Field(default=None) OPENROUTER_API_KEY: str | None = Field(default=None) @@ -82,9 +78,7 @@ def api_base_url(self) -> str: class RAGConfig(BaseSettings): - """ - RAG-specific configuration for LightRAG. - """ + """RAG-specific configuration for LightRAG.""" COSINE_THRESHOLD: float = Field( default=0.2, description="Similarity threshold for vector search (0.0-1.0)" diff --git a/src/dependencies.py b/src/dependencies.py index 7e1c7f7..ad9115a 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -19,8 +19,6 @@ from infrastructure.rag.lightrag_adapter import LightRAGAdapter from infrastructure.storage.minio_adapter import MinioAdapter -# ============= CONFIG ============= - app_config = AppConfig() # type: ignore llm_config = LLMConfig() # type: ignore rag_config = RAGConfig() # type: ignore @@ -30,8 +28,6 @@ os.makedirs(app_config.OUTPUT_DIR, exist_ok=True) -# ============= ADAPTERS ============= - rag_adapter = LightRAGAdapter(llm_config, rag_config) minio_adapter = MinioAdapter( host=minio_config.MINIO_HOST, @@ -40,7 +36,6 @@ secure=minio_config.MINIO_SECURE, ) -# BM25 adapter (optional) bm25_adapter: BM25EnginePort | None = None if bm25_config.BM25_ENABLED: try: @@ -51,8 +46,6 @@ print(f"WARNING: BM25 adapter initialization failed: {e}") bm25_adapter = None -# ============= USE CASE PROVIDERS ============= - def get_index_file_use_case() -> IndexFileUseCase: return IndexFileUseCase( diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py index ee3c8e7..6449bb2 100644 --- a/src/infrastructure/bm25/pg_textsearch_adapter.py +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -22,11 +22,6 @@ class PostgresBM25Adapter(BM25EnginePort): """ def __init__(self, db_url: str): - """Initialize adapter with database URL. - - Args: - db_url: PostgreSQL connection string - """ self.db_url = db_url self._pool: asyncpg.Pool | None = None self._pool_lock = asyncio.Lock() @@ -39,22 +34,24 @@ async def _get_pool(self) -> asyncpg.Pool: if self._pool is not None: return self._pool self._pool = await asyncpg.create_pool(self.db_url) + await self._check_extension() + return self._pool - async with self._pool.acquire() as conn: - try: - result = await conn.fetchval( - "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname='pg_textsearch')" + async def _check_extension(self) -> None: + """Warn if pg_textsearch extension is not installed.""" + async with self._pool.acquire() as conn: + try: + result = await conn.fetchval( + "SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname='pg_textsearch')" + ) + if not result: + logger.warning( + "pg_textsearch extension not installed. " + "BM25 ranking <@> operator will not work. " + "Run: CREATE EXTENSION pg_textsearch;" ) - if not result: - logger.warning( - "pg_textsearch extension not installed. " - "BM25 ranking <@> operator will not work. " - "Run: CREATE EXTENSION pg_textsearch;" - ) - except Exception as e: - logger.warning(f"Could not check pg_textsearch extension: {e}") - - return self._pool + except Exception as e: + logger.warning("Could not check pg_textsearch extension: %s", e) async def close(self) -> None: """Close connection pool on shutdown.""" @@ -85,10 +82,6 @@ async def search( try: async with pool.acquire() as conn: - # Use websearch_to_tsquery for user-friendly query syntax - # and <@> operator for BM25 ranking - # Note: <@> returns negative scores (lower is better) - # We convert to positive and sort ASC sql = """ SELECT chunk_id, @@ -102,25 +95,20 @@ async def search( ORDER BY score LIMIT $3 """ - results = await conn.fetch(sql, query, working_dir, top_k) - # Convert negative scores to positive (lower negative -> higher relevance) return [ BM25SearchResult( chunk_id=row["chunk_id"], content=row["content"], file_path=row["file_path"], - score=abs(row["score"]), # Convert to positive + score=abs(row["score"]), metadata=row["metadata"] or {}, ) for row in results ] except Exception as e: - logger.error( - f"BM25 search failed: {e}", - extra={"query": query, "working_dir": working_dir}, - ) + logger.error("BM25 search failed: %s", e, extra={"query": query, "working_dir": working_dir}) raise async def index_document( @@ -163,17 +151,13 @@ async def index_document( metadata or {}, ) except Exception as e: - logger.error( - f"BM25 document indexing failed: {e}", extra={"chunk_id": chunk_id} - ) + logger.error("BM25 document indexing failed: %s", e, extra={"chunk_id": chunk_id}) raise async def create_index(self, working_dir: str) -> None: """Create BM25 index for workspace. - Note: The index is created automatically via the trigger - defined in the migration. This method is for explicit - re-indexing if needed. + The index is auto-updated via trigger; this method is for explicit re-indexing. Args: working_dir: Project/workspace directory @@ -182,8 +166,6 @@ async def create_index(self, working_dir: str) -> None: try: async with pool.acquire() as conn: - # Index is created automatically via trigger - # This is just for explicit re-indexing await conn.execute( """ UPDATE chunks @@ -193,28 +175,19 @@ async def create_index(self, working_dir: str) -> None: working_dir, ) except Exception as e: - logger.error( - f"BM25 index creation failed: {e}", extra={"working_dir": working_dir} - ) + logger.error("BM25 index creation failed: %s", e, extra={"working_dir": working_dir}) raise async def drop_index(self, working_dir: str) -> None: - """Drop BM25 index for workspace. - - Args: - working_dir: Project/workspace directory - """ + """Drop BM25 index for workspace.""" pool = await self._get_pool() try: async with pool.acquire() as conn: - # Clear tsvector for this workspace await conn.execute( "UPDATE chunks SET content_tsv = NULL WHERE working_dir = $1", working_dir, ) except Exception as e: - logger.error( - f"BM25 index drop failed: {e}", extra={"working_dir": working_dir} - ) + logger.error("BM25 index drop failed: %s", e, extra={"working_dir": working_dir}) raise diff --git a/src/infrastructure/hybrid/rrf_combiner.py b/src/infrastructure/hybrid/rrf_combiner.py index 631ebe9..1e5eb51 100644 --- a/src/infrastructure/hybrid/rrf_combiner.py +++ b/src/infrastructure/hybrid/rrf_combiner.py @@ -71,12 +71,9 @@ def combine( "vector_rank": None, } else: - # If chunk already exists, keep the best rank (smallest number) scores[chunk_id]["bm25_rank"] = min(scores[chunk_id]["bm25_rank"], rank) - # Calculate BM25 RRF score - actual_rank = scores[chunk_id]["bm25_rank"] - scores[chunk_id]["bm25_score"] = 1.0 / (self.k + actual_rank) + scores[chunk_id]["bm25_score"] = 1.0 / (self.k + scores[chunk_id]["bm25_rank"]) # Process vector results chunks = vector_results.get("data", {}).get("chunks", []) @@ -96,14 +93,9 @@ def combine( "vector_rank": rank, } else: - # If chunk already exists, keep the best rank (smallest number) - existing_rank = scores[chunk_id]["vector_rank"] - if existing_rank is not None: - scores[chunk_id]["vector_rank"] = min(existing_rank, rank) - else: - scores[chunk_id]["vector_rank"] = rank - - # Calculate vector RRF score + existing = scores[chunk_id]["vector_rank"] + scores[chunk_id]["vector_rank"] = min(existing, rank) if existing is not None else rank + actual_rank = scores[chunk_id]["vector_rank"] if actual_rank is not None: scores[chunk_id]["vector_score"] = 1.0 / (self.k + actual_rank) diff --git a/src/main.py b/src/main.py index 217c21b..8abb43e 100644 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,4 @@ -"""Main entry point for the RAGAnything API. -Simplified following hexagonal architecture pattern from pickpro_indexing_api. -""" +"""Main entry point for the RAGAnything API.""" import asyncio import logging @@ -26,10 +24,7 @@ def _run_alembic_upgrade() -> None: - """Run Alembic migrations to head synchronously. - - Designed to be called via asyncio.to_thread() during startup. - """ + """Run Alembic migrations to head (called via asyncio.to_thread).""" alembic_dir = Path(__file__).parent cfg = Config(str(alembic_dir / "alembic.ini")) cfg.set_main_option("script_location", str(alembic_dir / "alembic")) @@ -60,7 +55,6 @@ async def db_lifespan(_app: FastAPI): if bm25_adapter is not None: try: await bm25_adapter.close() - logger.info("BM25 connection pool closed") except Exception: logger.exception("Failed to close BM25 adapter") logger.info("Application shutdown complete") @@ -95,16 +89,11 @@ async def combined_lifespan(app: FastAPI): allow_headers=["*"], ) -# ============= REST API ROUTES ============= - REST_PATH = "/api/v1" - app.include_router(indexing_router, prefix=REST_PATH) app.include_router(health_router, prefix=REST_PATH) app.include_router(query_router, prefix=REST_PATH) -# ============= MAIN ============= - def run_fastapi(): """Run FastAPI server with uvicorn.""" From c3e5cb316291d7fc0082899f8ae23523e8cbb82e Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 21:04:59 +0200 Subject: [PATCH 08/17] refactor: Reduce cognitive complexity in RRF combiner (sonar S3776) (BRIC-7) --- src/infrastructure/hybrid/rrf_combiner.py | 108 ++++++++++------------ 1 file changed, 47 insertions(+), 61 deletions(-) diff --git a/src/infrastructure/hybrid/rrf_combiner.py b/src/infrastructure/hybrid/rrf_combiner.py index 1e5eb51..db8ceae 100644 --- a/src/infrastructure/hybrid/rrf_combiner.py +++ b/src/infrastructure/hybrid/rrf_combiner.py @@ -26,81 +26,69 @@ class RRFCombiner: RRF formula: score = Σ (1 / (k + rank_i)) where k is a constant (default 60) and rank_i is the rank in list i. - - This is a simple and effective method for combining ranked lists - that doesn't require score normalization. """ def __init__(self, k: int = 60): - """Initialize RRF combiner. - - Args: - k: RRF constant (default 60, industry standard) - """ self.k = k + def _add_bm25_result( + self, scores: dict[str, dict[str, Any]], rank: int, result: BM25SearchResult + ) -> None: + chunk_id = result.chunk_id + if chunk_id not in scores: + scores[chunk_id] = { + "content": result.content, + "file_path": result.file_path, + "metadata": result.metadata, + "bm25_score": 0.0, + "vector_score": 0.0, + "bm25_rank": rank, + "vector_rank": None, + } + else: + scores[chunk_id]["bm25_rank"] = min(scores[chunk_id]["bm25_rank"], rank) + scores[chunk_id]["bm25_score"] = 1.0 / (self.k + scores[chunk_id]["bm25_rank"]) + + def _add_vector_result( + self, scores: dict[str, dict[str, Any]], rank: int, chunk: dict[str, Any] + ) -> None: + chunk_id = chunk.get("reference_id") or chunk.get("chunk_id") + if chunk_id is None: + return + if chunk_id not in scores: + scores[chunk_id] = { + "content": chunk.get("content", ""), + "file_path": chunk.get("file_path", ""), + "metadata": chunk.get("metadata", {}), + "bm25_score": 0.0, + "vector_score": 0.0, + "bm25_rank": None, + "vector_rank": rank, + } + else: + existing = scores[chunk_id]["vector_rank"] + scores[chunk_id]["vector_rank"] = min(existing, rank) if existing is not None else rank + + actual_rank = scores[chunk_id]["vector_rank"] + if actual_rank is not None: + scores[chunk_id]["vector_score"] = 1.0 / (self.k + actual_rank) + def combine( self, bm25_results: list[BM25SearchResult], vector_results: dict, top_k: int = 10, ) -> list[HybridSearchResult]: - """Combine BM25 and vector search results using RRF. - - Args: - bm25_results: Results from BM25 search (already ranked) - vector_results: Results from vector search (already ranked) - top_k: Number of results to return - - Returns: - Combined results sorted by combined_score descending - """ + """Combine BM25 and vector search results using RRF.""" scores: dict[str, dict[str, Any]] = {} - # Process BM25 results for rank, result in enumerate(bm25_results, start=1): - chunk_id = result.chunk_id - if chunk_id not in scores: - scores[chunk_id] = { - "content": result.content, - "file_path": result.file_path, - "metadata": result.metadata, - "bm25_score": 0.0, - "vector_score": 0.0, - "bm25_rank": rank, - "vector_rank": None, - } - else: - scores[chunk_id]["bm25_rank"] = min(scores[chunk_id]["bm25_rank"], rank) - - scores[chunk_id]["bm25_score"] = 1.0 / (self.k + scores[chunk_id]["bm25_rank"]) - - # Process vector results + self._add_bm25_result(scores, rank, result) + chunks = vector_results.get("data", {}).get("chunks", []) for rank, chunk in enumerate(chunks, start=1): - chunk_id = chunk.get("reference_id") or chunk.get("chunk_id") - if chunk_id is None: - continue - - if chunk_id not in scores: - scores[chunk_id] = { - "content": chunk.get("content", ""), - "file_path": chunk.get("file_path", ""), - "metadata": chunk.get("metadata", {}), - "bm25_score": 0.0, - "vector_score": 0.0, - "bm25_rank": None, - "vector_rank": rank, - } - else: - existing = scores[chunk_id]["vector_rank"] - scores[chunk_id]["vector_rank"] = min(existing, rank) if existing is not None else rank - - actual_rank = scores[chunk_id]["vector_rank"] - if actual_rank is not None: - scores[chunk_id]["vector_score"] = 1.0 / (self.k + actual_rank) - - # Calculate combined scores and create results + self._add_vector_result(scores, rank, chunk) + results = [ HybridSearchResult( chunk_id=chunk_id, @@ -116,7 +104,5 @@ def combine( for chunk_id, data in scores.items() ] - # Sort by combined score (descending) results.sort(key=lambda x: x.combined_score, reverse=True) - return results[:top_k] From c8751d59bedc5692abe13520571c6dbe9a78c218 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Tue, 7 Apr 2026 21:31:27 +0200 Subject: [PATCH 09/17] chore: Update uv.lock for alembic dependency (BRIC-7) --- trivy-report-current.json | 4511 +++++++++++++++++++++++++++++++++++-- 1 file changed, 4375 insertions(+), 136 deletions(-) diff --git a/trivy-report-current.json b/trivy-report-current.json index 94ab5a9..60ed006 100644 --- a/trivy-report-current.json +++ b/trivy-report-current.json @@ -1,136 +1,4375 @@ - -Report Summary - -┌──────────────────────────────────────────────────────────────────────────────────┬────────────────┬─────────────────┬───────────────────┐ -│ Target │ Type │ Vulnerabilities │ Misconfigurations │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/paddleocr/ppstructure/kie/requirements.txt │ pip │ 0 │ - │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/pkg_resources/tests/data/my-test-package_zip- │ python-pkg │ 0 │ - │ -│ ped-egg/my_test_package-1.0-py3.7.egg │ │ │ │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ uv.lock │ uv │ 0 │ - │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/cloudformation/2010-05-15/resourc- │ cloudformation │ - │ 0 │ -│ es-1.json │ │ │ │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/cloudwatch/2010-08-01/resources-1- │ cloudformation │ - │ 0 │ -│ .json │ │ │ │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/dynamodb/2012-08-10/resources-1.j- │ cloudformation │ - │ 0 │ -│ son │ │ │ │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2014-10-01/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2015-03-01/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2015-04-15/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2015-10-01/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2016-04-01/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2016-09-15/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/ec2/2016-11-15/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/glacier/2012-06-01/resources-1.js- │ cloudformation │ - │ 0 │ -│ on │ │ │ │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/iam/2010-05-08/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/s3/2006-03-01/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/sns/2010-03-31/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ .venv/lib/python3.14/site-packages/boto3/data/sqs/2012-11-05/resources-1.json │ cloudformation │ - │ 0 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ Dockerfile │ dockerfile │ - │ 1 │ -├──────────────────────────────────────────────────────────────────────────────────┼────────────────┼─────────────────┼───────────────────┤ -│ Dockerfile.db │ dockerfile │ - │ 3 │ -└──────────────────────────────────────────────────────────────────────────────────┴────────────────┴─────────────────┴───────────────────┘ -Legend: -- '-': Not scanned -- '0': Clean (no security findings detected) - - -Dockerfile (dockerfile) -======================= -Tests: 24 (SUCCESSES: 23, FAILURES: 1) -Failures: 1 (MEDIUM: 0, HIGH: 1, CRITICAL: 0) - -DS-0029 (HIGH): '--no-install-recommends' flag is missed: 'apt-get update && apt-get install -y libgomp1 libgl1 git tesseract-ocr && rm -rf /var/lib/apt/lists/*' -════════════════════════════════════════ -'apt-get' install should use '--no-install-recommends' to minimize image size. - -See https://avd.aquasec.com/misconfig/ds-0029 -──────────────────────────────────────── - Dockerfile:20-25 -──────────────────────────────────────── - 20 ┌ RUN apt-get update && apt-get install -y \ - 21 │ libgomp1 \ - 22 │ libgl1 \ - 23 │ git \ - 24 │ tesseract-ocr \ - 25 └ && rm -rf /var/lib/apt/lists/* -──────────────────────────────────────── - - - -Dockerfile.db (dockerfile) -========================== -Tests: 25 (SUCCESSES: 22, FAILURES: 3) -Failures: 3 (MEDIUM: 2, HIGH: 1, CRITICAL: 0) - -DS-0013 (MEDIUM): RUN should not be used to change directory: 'cd /tmp && git clone --branch PG17/v1.6.0-rc0 https://github.com/apache/age.git && cd age && make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || (echo "Failed to build AGE" && exit 1)'. Use 'WORKDIR' statement instead. -════════════════════════════════════════ -Use WORKDIR instead of proliferating instructions like 'RUN cd … && do-something', which are hard to read, troubleshoot, and maintain. - -See https://avd.aquasec.com/misconfig/ds-0013 -──────────────────────────────────────── - Dockerfile.db:14-18 -──────────────────────────────────────── - 14 ┌ RUN cd /tmp && \ - 15 │ git clone --branch PG17/v1.6.0-rc0 https://github.com/apache/age.git && \ - 16 │ cd age && \ - 17 │ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || \ - 18 └ (echo "Failed to build AGE" && exit 1) -──────────────────────────────────────── - - -DS-0013 (MEDIUM): RUN should not be used to change directory: 'cd /tmp && git clone https://github.com/timescale/pg_textsearch.git && cd pg_textsearch && make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config || (echo "Failed to build pg_textsearch" && exit 1) && make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || (echo "Failed to install pg_textsearch" && exit 1)'. Use 'WORKDIR' statement instead. -════════════════════════════════════════ -Use WORKDIR instead of proliferating instructions like 'RUN cd … && do-something', which are hard to read, troubleshoot, and maintain. - -See https://avd.aquasec.com/misconfig/ds-0013 -──────────────────────────────────────── - Dockerfile.db:21-27 -──────────────────────────────────────── - 21 ┌ RUN cd /tmp && \ - 22 │ git clone https://github.com/timescale/pg_textsearch.git && \ - 23 │ cd pg_textsearch && \ - 24 │ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config || \ - 25 │ (echo "Failed to build pg_textsearch" && exit 1) && \ - 26 │ make PG_CONFIG=/usr/lib/postgresql/17/bin/pg_config install || \ - 27 └ (echo "Failed to install pg_textsearch" && exit 1) -──────────────────────────────────────── - - -DS-0029 (HIGH): '--no-install-recommends' flag is missed: 'apt-get update && apt-get install -y build-essential git postgresql-server-dev-17 flex bison && rm -rf /var/lib/apt/lists/*' -════════════════════════════════════════ -'apt-get' install should use '--no-install-recommends' to minimize image size. - -See https://avd.aquasec.com/misconfig/ds-0029 -──────────────────────────────────────── - Dockerfile.db:5-11 -──────────────────────────────────────── - 5 ┌ RUN apt-get update && apt-get install -y \ - 6 │ build-essential \ - 7 │ git \ - 8 │ postgresql-server-dev-17 \ - 9 │ flex \ - 10 │ bison \ - 11 └ && rm -rf /var/lib/apt/lists/* -──────────────────────────────────────── - - +{ + "SchemaVersion": 2, + "Trivy": { + "Version": "0.69.3" + }, + "ReportID": "019d695b-0697-7d65-9dbe-387e1f838d1c", + "CreatedAt": "2026-04-07T21:11:06.39188+02:00", + "ArtifactID": "sha256:9829ca31ff653ff8fe9be186152bf65041835de0e419a8b4359bf2b7189673c3", + "ArtifactName": ".", + "ArtifactType": "repository", + "Metadata": { + "RepoURL": "https://github.com/Kaiohz/mcp-raganything.git", + "Branch": "BRIC-7/add-bm25-pg-textsearch", + "Commit": "c3e5cb316291d7fc0082899f8ae23523e8cbb82e", + "CommitMsg": "refactor: Reduce cognitive complexity in RRF combiner (sonar S3776) (BRIC-7)", + "Author": "Kaiohz \u003cyohan.goncalves@cosigma.io\u003e", + "Committer": "Kaiohz \u003cyohan.goncalves@cosigma.io\u003e" + }, + "Results": [ + { + "Target": ".venv/lib/python3.14/site-packages/paddleocr/ppstructure/kie/requirements.txt", + "Class": "lang-pkgs", + "Type": "pip", + "Packages": [ + { + "Name": "paddlenlp", + "Identifier": { + "PURL": "pkg:pypi/paddlenlp@2.5.2", + "UID": "2b35cf3d8063c65e" + }, + "Version": "2.5.2", + "Locations": [ + { + "StartLine": 7, + "EndLine": 7 + } + ], + "AnalyzedBy": "pip" + } + ] + }, + { + "Target": "Python", + "Class": "lang-pkgs", + "Type": "python-pkg", + "Packages": [ + { + "Name": "my-test-package", + "Identifier": { + "PURL": "pkg:pypi/my-test-package@1.0", + "UID": "b9a06bfcca9bf672" + }, + "Version": "1.0", + "Licenses": [ + "UNKNOWN" + ], + "FilePath": ".venv/lib/python3.14/site-packages/pkg_resources/tests/data/my-test-package_zipped-egg/my_test_package-1.0-py3.7.egg", + "AnalyzedBy": "python-egg" + } + ] + }, + { + "Target": "uv.lock", + "Class": "lang-pkgs", + "Type": "uv", + "Packages": [ + { + "ID": "mcp-raganything@0.1.0", + "Name": "mcp-raganything", + "Identifier": { + "PURL": "pkg:pypi/mcp-raganything@0.1.0", + "UID": "c7408d4962c500ca" + }, + "Version": "0.1.0", + "Relationship": "root", + "DependsOn": [ + "aiofiles@24.1.0", + "alembic@1.18.4", + "asyncpg@0.31.0", + "authlib@1.6.9", + "cryptography@46.0.6", + "docling@2.84.0", + "fastapi@0.135.3", + "fastmcp@3.2.0", + "httpx@0.28.1", + "lightrag-hku@1.4.13", + "mcp@1.26.0", + "minio@7.2.20", + "openai@2.30.0", + "pgvector@0.4.2", + "pydantic-settings@2.13.1", + "python-dotenv@1.2.2", + "python-multipart@0.0.22", + "raganything@1.2.10", + "sqlalchemy@2.0.48", + "uvicorn@0.42.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "aiofiles@24.1.0", + "Name": "aiofiles", + "Identifier": { + "PURL": "pkg:pypi/aiofiles@24.1.0", + "UID": "9061ccdb2ece9fc" + }, + "Version": "24.1.0", + "Relationship": "direct", + "AnalyzedBy": "uv" + }, + { + "ID": "alembic@1.18.4", + "Name": "alembic", + "Identifier": { + "PURL": "pkg:pypi/alembic@1.18.4", + "UID": "cdc5b7a87334bcc2" + }, + "Version": "1.18.4", + "Relationship": "direct", + "DependsOn": [ + "mako@1.3.10", + "sqlalchemy@2.0.48", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "asyncpg@0.31.0", + "Name": "asyncpg", + "Identifier": { + "PURL": "pkg:pypi/asyncpg@0.31.0", + "UID": "df2d0b70f811bf4" + }, + "Version": "0.31.0", + "Relationship": "direct", + "AnalyzedBy": "uv" + }, + { + "ID": "authlib@1.6.9", + "Name": "authlib", + "Identifier": { + "PURL": "pkg:pypi/authlib@1.6.9", + "UID": "8174ba847ea6fde3" + }, + "Version": "1.6.9", + "Relationship": "direct", + "DependsOn": [ + "cryptography@46.0.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cryptography@46.0.6", + "Name": "cryptography", + "Identifier": { + "PURL": "pkg:pypi/cryptography@46.0.6", + "UID": "f0b0a1ef450abe6f" + }, + "Version": "46.0.6", + "Relationship": "direct", + "DependsOn": [ + "cffi@2.0.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "docling@2.84.0", + "Name": "docling", + "Identifier": { + "PURL": "pkg:pypi/docling@2.84.0", + "UID": "f6f6b94364dcf6fc" + }, + "Version": "2.84.0", + "Relationship": "direct", + "DependsOn": [ + "accelerate@1.13.0", + "beautifulsoup4@4.14.3", + "certifi@2026.2.25", + "defusedxml@0.7.1", + "docling-core@2.71.0", + "docling-ibm-models@3.13.0", + "docling-parse@5.7.0", + "filetype@1.2.0", + "huggingface-hub@0.36.2", + "lxml@6.0.2", + "marko@2.2.2", + "ocrmac@1.0.1", + "openpyxl@3.1.5", + "pandas@2.3.3", + "pillow@12.2.0", + "pluggy@1.6.0", + "polyfactory@3.3.0", + "pydantic-settings@2.13.1", + "pydantic@2.12.5", + "pylatexenc@2.10", + "pypdfium2@4.30.0", + "python-docx@1.2.0", + "python-pptx@1.0.2", + "rapidocr@3.7.0", + "requests@2.33.1", + "rtree@1.4.1", + "scipy@1.17.1", + "torch@2.11.0", + "torchvision@0.26.0", + "tqdm@4.67.3", + "typer@0.21.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "fastapi@0.135.3", + "Name": "fastapi", + "Identifier": { + "PURL": "pkg:pypi/fastapi@0.135.3", + "UID": "b8f9db11f45aab2c" + }, + "Version": "0.135.3", + "Relationship": "direct", + "DependsOn": [ + "annotated-doc@0.0.4", + "pydantic@2.12.5", + "starlette@0.52.1", + "typing-extensions@4.15.0", + "typing-inspection@0.4.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "fastmcp@3.2.0", + "Name": "fastmcp", + "Identifier": { + "PURL": "pkg:pypi/fastmcp@3.2.0", + "UID": "135907ad364ec8d" + }, + "Version": "3.2.0", + "Relationship": "direct", + "DependsOn": [ + "authlib@1.6.9", + "cyclopts@4.10.1", + "exceptiongroup@1.3.1", + "httpx@0.28.1", + "jsonref@1.1.0", + "jsonschema-path@0.4.5", + "mcp@1.26.0", + "openapi-pydantic@0.5.1", + "opentelemetry-api@1.40.0", + "packaging@26.0", + "platformdirs@4.9.4", + "py-key-value-aio@0.4.4", + "pydantic@2.12.5", + "pyperclip@1.11.0", + "python-dotenv@1.2.2", + "pyyaml@6.0.3", + "rich@14.3.3", + "uncalled-for@0.2.0", + "uvicorn@0.42.0", + "watchfiles@1.1.1", + "websockets@16.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "httpx@0.28.1", + "Name": "httpx", + "Identifier": { + "PURL": "pkg:pypi/httpx@0.28.1", + "UID": "4ed3fa1d663e107" + }, + "Version": "0.28.1", + "Relationship": "direct", + "DependsOn": [ + "anyio@4.13.0", + "certifi@2026.2.25", + "httpcore@1.0.9", + "idna@3.11" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "lightrag-hku@1.4.13", + "Name": "lightrag-hku", + "Identifier": { + "PURL": "pkg:pypi/lightrag-hku@1.4.13", + "UID": "41086f8911af13ed" + }, + "Version": "1.4.13", + "Relationship": "direct", + "DependsOn": [ + "aiofiles@24.1.0", + "aiohttp@3.13.5", + "ascii-colors@0.11.21", + "bcrypt@5.0.0", + "configparser@7.2.0", + "distro@1.9.0", + "fastapi@0.135.3", + "google-api-core@2.30.1", + "google-genai@1.70.0", + "gunicorn@25.3.0", + "httpcore@1.0.9", + "httpx@0.28.1", + "jiter@0.13.0", + "json-repair@0.58.7", + "nano-vectordb@0.0.4.3", + "networkx@3.6.1", + "numpy@2.4.4", + "openai@2.30.0", + "openpyxl@3.1.5", + "packaging@26.0", + "pandas@2.3.3", + "pipmaster@1.1.2", + "psutil@7.2.2", + "pycryptodome@3.23.0", + "pydantic@2.12.5", + "pyjwt@2.12.1", + "pypdf@6.9.2", + "pypinyin@0.55.0", + "python-docx@1.2.0", + "python-dotenv@1.2.2", + "python-jose@3.5.0", + "python-multipart@0.0.22", + "python-pptx@1.0.2", + "pytz@2026.1.post1", + "setuptools@81.0.0", + "tenacity@9.1.4", + "tiktoken@0.12.0", + "uvicorn@0.42.0", + "xlsxwriter@3.2.9" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "mcp@1.26.0", + "Name": "mcp", + "Identifier": { + "PURL": "pkg:pypi/mcp@1.26.0", + "UID": "b7e186261ef5e5a2" + }, + "Version": "1.26.0", + "Relationship": "direct", + "DependsOn": [ + "anyio@4.13.0", + "httpx-sse@0.4.3", + "httpx@0.28.1", + "jsonschema@4.26.0", + "pydantic-settings@2.13.1", + "pydantic@2.12.5", + "pyjwt@2.12.1", + "python-multipart@0.0.22", + "pywin32@311", + "sse-starlette@3.3.4", + "starlette@0.52.1", + "typing-extensions@4.15.0", + "typing-inspection@0.4.2", + "uvicorn@0.42.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "minio@7.2.20", + "Name": "minio", + "Identifier": { + "PURL": "pkg:pypi/minio@7.2.20", + "UID": "3122577c5b65260c" + }, + "Version": "7.2.20", + "Relationship": "direct", + "DependsOn": [ + "argon2-cffi@25.1.0", + "certifi@2026.2.25", + "pycryptodome@3.23.0", + "typing-extensions@4.15.0", + "urllib3@2.6.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "openai@2.30.0", + "Name": "openai", + "Identifier": { + "PURL": "pkg:pypi/openai@2.30.0", + "UID": "933f799c5d31b203" + }, + "Version": "2.30.0", + "Relationship": "direct", + "DependsOn": [ + "anyio@4.13.0", + "distro@1.9.0", + "httpx@0.28.1", + "jiter@0.13.0", + "pydantic@2.12.5", + "sniffio@1.3.1", + "tqdm@4.67.3", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pgvector@0.4.2", + "Name": "pgvector", + "Identifier": { + "PURL": "pkg:pypi/pgvector@0.4.2", + "UID": "d5952725effd5422" + }, + "Version": "0.4.2", + "Relationship": "direct", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pydantic-settings@2.13.1", + "Name": "pydantic-settings", + "Identifier": { + "PURL": "pkg:pypi/pydantic-settings@2.13.1", + "UID": "c68fc34a54e862eb" + }, + "Version": "2.13.1", + "Relationship": "direct", + "DependsOn": [ + "pydantic@2.12.5", + "python-dotenv@1.2.2", + "typing-inspection@0.4.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "python-dotenv@1.2.2", + "Name": "python-dotenv", + "Identifier": { + "PURL": "pkg:pypi/python-dotenv@1.2.2", + "UID": "a6b7b624c50e71e1" + }, + "Version": "1.2.2", + "Relationship": "direct", + "AnalyzedBy": "uv" + }, + { + "ID": "python-multipart@0.0.22", + "Name": "python-multipart", + "Identifier": { + "PURL": "pkg:pypi/python-multipart@0.0.22", + "UID": "bab34132ffe25a1c" + }, + "Version": "0.0.22", + "Relationship": "direct", + "AnalyzedBy": "uv" + }, + { + "ID": "raganything@1.2.10", + "Name": "raganything", + "Identifier": { + "PURL": "pkg:pypi/raganything@1.2.10", + "UID": "c9ebb114e1b109ba" + }, + "Version": "1.2.10", + "Relationship": "direct", + "DependsOn": [ + "huggingface-hub@0.36.2", + "lightrag-hku@1.4.13", + "markdown@3.10.2", + "mineru@3.0.7", + "paddleocr@2.10.0", + "pillow@12.2.0", + "pygments@2.20.0", + "pypdfium2@4.30.0", + "reportlab@4.4.10", + "tqdm@4.67.3", + "weasyprint@68.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "sqlalchemy@2.0.48", + "Name": "sqlalchemy", + "Identifier": { + "PURL": "pkg:pypi/sqlalchemy@2.0.48", + "UID": "b8cda2dddee14f15" + }, + "Version": "2.0.48", + "Relationship": "direct", + "DependsOn": [ + "greenlet@3.3.2", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "uvicorn@0.42.0", + "Name": "uvicorn", + "Identifier": { + "PURL": "pkg:pypi/uvicorn@0.42.0", + "UID": "583a24def9762550" + }, + "Version": "0.42.0", + "Relationship": "direct", + "DependsOn": [ + "click@8.3.1", + "h11@0.16.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "accelerate@1.13.0", + "Name": "accelerate", + "Identifier": { + "PURL": "pkg:pypi/accelerate@1.13.0", + "UID": "30504da4809e4977" + }, + "Version": "1.13.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "huggingface-hub@0.36.2", + "numpy@2.4.4", + "packaging@26.0", + "psutil@7.2.2", + "pyyaml@6.0.3", + "safetensors@0.7.0", + "torch@2.11.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "aiofile@3.9.0", + "Name": "aiofile", + "Identifier": { + "PURL": "pkg:pypi/aiofile@3.9.0", + "UID": "bad121722fce55ed" + }, + "Version": "3.9.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "caio@0.9.25" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "aiohappyeyeballs@2.6.1", + "Name": "aiohappyeyeballs", + "Identifier": { + "PURL": "pkg:pypi/aiohappyeyeballs@2.6.1", + "UID": "3c1d60f4dcdea8c1" + }, + "Version": "2.6.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "aiohttp@3.13.5", + "Name": "aiohttp", + "Identifier": { + "PURL": "pkg:pypi/aiohttp@3.13.5", + "UID": "c48520515f2c120" + }, + "Version": "3.13.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "aiohappyeyeballs@2.6.1", + "aiosignal@1.4.0", + "attrs@26.1.0", + "frozenlist@1.8.0", + "multidict@6.7.1", + "propcache@0.4.1", + "yarl@1.23.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "aiosignal@1.4.0", + "Name": "aiosignal", + "Identifier": { + "PURL": "pkg:pypi/aiosignal@1.4.0", + "UID": "3951a8e4c265af22" + }, + "Version": "1.4.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "frozenlist@1.8.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "albucore@0.0.24", + "Name": "albucore", + "Identifier": { + "PURL": "pkg:pypi/albucore@0.0.24", + "UID": "24507b4c0f5dbecf" + }, + "Version": "0.0.24", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4", + "opencv-python-headless@4.13.0.92", + "simsimd@6.5.16", + "stringzilla@4.6.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "albumentations@2.0.8", + "Name": "albumentations", + "Identifier": { + "PURL": "pkg:pypi/albumentations@2.0.8", + "UID": "d08ec9cf236dc4f0" + }, + "Version": "2.0.8", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "albucore@0.0.24", + "numpy@2.4.4", + "opencv-python-headless@4.13.0.92", + "pydantic@2.12.5", + "pyyaml@6.0.3", + "scipy@1.17.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "annotated-doc@0.0.4", + "Name": "annotated-doc", + "Identifier": { + "PURL": "pkg:pypi/annotated-doc@0.0.4", + "UID": "20932edb8023b337" + }, + "Version": "0.0.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "annotated-types@0.7.0", + "Name": "annotated-types", + "Identifier": { + "PURL": "pkg:pypi/annotated-types@0.7.0", + "UID": "a4a7cc319376fb9e" + }, + "Version": "0.7.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "antlr4-python3-runtime@4.9.3", + "Name": "antlr4-python3-runtime", + "Identifier": { + "PURL": "pkg:pypi/antlr4-python3-runtime@4.9.3", + "UID": "e7dcfed38b17f332" + }, + "Version": "4.9.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "anyio@4.13.0", + "Name": "anyio", + "Identifier": { + "PURL": "pkg:pypi/anyio@4.13.0", + "UID": "261d4f4bef650b14" + }, + "Version": "4.13.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "idna@3.11" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "argon2-cffi@25.1.0", + "Name": "argon2-cffi", + "Identifier": { + "PURL": "pkg:pypi/argon2-cffi@25.1.0", + "UID": "364ffdd713d7e70a" + }, + "Version": "25.1.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "argon2-cffi-bindings@25.1.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "argon2-cffi-bindings@25.1.0", + "Name": "argon2-cffi-bindings", + "Identifier": { + "PURL": "pkg:pypi/argon2-cffi-bindings@25.1.0", + "UID": "bad715beb0c48d2a" + }, + "Version": "25.1.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cffi@2.0.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "ascii-colors@0.11.21", + "Name": "ascii-colors", + "Identifier": { + "PURL": "pkg:pypi/ascii-colors@0.11.21", + "UID": "c89a5bfe0aadca9c" + }, + "Version": "0.11.21", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "wcwidth@0.6.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "attrs@26.1.0", + "Name": "attrs", + "Identifier": { + "PURL": "pkg:pypi/attrs@26.1.0", + "UID": "2910de77ff6d92f1" + }, + "Version": "26.1.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "audioop-lts@0.2.2", + "Name": "audioop-lts", + "Identifier": { + "PURL": "pkg:pypi/audioop-lts@0.2.2", + "UID": "3859fef66155a04c" + }, + "Version": "0.2.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "av@17.0.0", + "Name": "av", + "Identifier": { + "PURL": "pkg:pypi/av@17.0.0", + "UID": "f772d68ee78da076" + }, + "Version": "17.0.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "bcrypt@5.0.0", + "Name": "bcrypt", + "Identifier": { + "PURL": "pkg:pypi/bcrypt@5.0.0", + "UID": "69f65a02a525856e" + }, + "Version": "5.0.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "beartype@0.22.9", + "Name": "beartype", + "Identifier": { + "PURL": "pkg:pypi/beartype@0.22.9", + "UID": "a52c0bee228ec41a" + }, + "Version": "0.22.9", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "beautifulsoup4@4.14.3", + "Name": "beautifulsoup4", + "Identifier": { + "PURL": "pkg:pypi/beautifulsoup4@4.14.3", + "UID": "2e0d2b86c7409e27" + }, + "Version": "4.14.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "soupsieve@2.8.3", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "boto3@1.42.80", + "Name": "boto3", + "Identifier": { + "PURL": "pkg:pypi/boto3@1.42.80", + "UID": "a90569b95c350e3f" + }, + "Version": "1.42.80", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "botocore@1.42.80", + "jmespath@1.1.0", + "s3transfer@0.16.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "botocore@1.42.80", + "Name": "botocore", + "Identifier": { + "PURL": "pkg:pypi/botocore@1.42.80", + "UID": "2856efebacb82222" + }, + "Version": "1.42.80", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "jmespath@1.1.0", + "python-dateutil@2.9.0.post0", + "urllib3@2.6.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "brotli@1.2.0", + "Name": "brotli", + "Identifier": { + "PURL": "pkg:pypi/brotli@1.2.0", + "UID": "acd9f05e8c7c4b74" + }, + "Version": "1.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "brotlicffi@1.2.0.1", + "Name": "brotlicffi", + "Identifier": { + "PURL": "pkg:pypi/brotlicffi@1.2.0.1", + "UID": "e8b5d9c1977eb78e" + }, + "Version": "1.2.0.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cffi@2.0.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cachetools@7.0.5", + "Name": "cachetools", + "Identifier": { + "PURL": "pkg:pypi/cachetools@7.0.5", + "UID": "7b1250316c70f311" + }, + "Version": "7.0.5", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "caio@0.9.25", + "Name": "caio", + "Identifier": { + "PURL": "pkg:pypi/caio@0.9.25", + "UID": "babded11c91019bc" + }, + "Version": "0.9.25", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "certifi@2026.2.25", + "Name": "certifi", + "Identifier": { + "PURL": "pkg:pypi/certifi@2026.2.25", + "UID": "3bc2442a8d895e49" + }, + "Version": "2026.2.25", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "cffi@2.0.0", + "Name": "cffi", + "Identifier": { + "PURL": "pkg:pypi/cffi@2.0.0", + "UID": "a78e151c4e3c8b65" + }, + "Version": "2.0.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pycparser@3.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "charset-normalizer@3.4.6", + "Name": "charset-normalizer", + "Identifier": { + "PURL": "pkg:pypi/charset-normalizer@3.4.6", + "UID": "a0a57b3126b16243" + }, + "Version": "3.4.6", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "click@8.3.1", + "Name": "click", + "Identifier": { + "PURL": "pkg:pypi/click@8.3.1", + "UID": "5647378580693589" + }, + "Version": "8.3.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "colorama@0.4.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cobble@0.1.4", + "Name": "cobble", + "Identifier": { + "PURL": "pkg:pypi/cobble@0.1.4", + "UID": "a0819410201f4cf2" + }, + "Version": "0.1.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "colorama@0.4.6", + "Name": "colorama", + "Identifier": { + "PURL": "pkg:pypi/colorama@0.4.6", + "UID": "a9b0f41cf3bb79cc" + }, + "Version": "0.4.6", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "colorlog@6.10.1", + "Name": "colorlog", + "Identifier": { + "PURL": "pkg:pypi/colorlog@6.10.1", + "UID": "325b80311f99d1e4" + }, + "Version": "6.10.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "colorama@0.4.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "configparser@7.2.0", + "Name": "configparser", + "Identifier": { + "PURL": "pkg:pypi/configparser@7.2.0", + "UID": "9f7029d3ea261428" + }, + "Version": "7.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "cssselect2@0.9.0", + "Name": "cssselect2", + "Identifier": { + "PURL": "pkg:pypi/cssselect2@0.9.0", + "UID": "70afa14bae638fe1" + }, + "Version": "0.9.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "tinycss2@1.5.1", + "webencodings@0.5.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cuda-bindings@13.2.0", + "Name": "cuda-bindings", + "Identifier": { + "PURL": "pkg:pypi/cuda-bindings@13.2.0", + "UID": "3adb4171d89542b6" + }, + "Version": "13.2.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cuda-pathfinder@1.5.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cuda-pathfinder@1.5.0", + "Name": "cuda-pathfinder", + "Identifier": { + "PURL": "pkg:pypi/cuda-pathfinder@1.5.0", + "UID": "2db3516b479ef704" + }, + "Version": "1.5.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "cuda-toolkit@13.0.2", + "Name": "cuda-toolkit", + "Identifier": { + "PURL": "pkg:pypi/cuda-toolkit@13.0.2", + "UID": "82542097a9427c83" + }, + "Version": "13.0.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "nvidia-cublas@13.1.0.3", + "nvidia-cuda-cupti@13.0.85", + "nvidia-cuda-nvrtc@13.0.88", + "nvidia-cuda-runtime@13.0.96", + "nvidia-cufft@12.0.0.61", + "nvidia-cufile@1.15.1.6", + "nvidia-curand@10.4.0.35", + "nvidia-cusolver@12.0.4.66", + "nvidia-cusparse@12.6.3.3", + "nvidia-nvjitlink@13.0.88", + "nvidia-nvtx@13.0.85" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cyclopts@4.10.1", + "Name": "cyclopts", + "Identifier": { + "PURL": "pkg:pypi/cyclopts@4.10.1", + "UID": "517b2dcd3eeab46c" + }, + "Version": "4.10.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "attrs@26.1.0", + "docstring-parser@0.17.0", + "rich-rst@1.3.2", + "rich@14.3.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "cython@3.2.4", + "Name": "cython", + "Identifier": { + "PURL": "pkg:pypi/cython@3.2.4", + "UID": "fe8b7d655ea803c6" + }, + "Version": "3.2.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "defusedxml@0.7.1", + "Name": "defusedxml", + "Identifier": { + "PURL": "pkg:pypi/defusedxml@0.7.1", + "UID": "73fca51180147f95" + }, + "Version": "0.7.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "dill@0.4.1", + "Name": "dill", + "Identifier": { + "PURL": "pkg:pypi/dill@0.4.1", + "UID": "e6a0b33f7091a832" + }, + "Version": "0.4.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "distro@1.9.0", + "Name": "distro", + "Identifier": { + "PURL": "pkg:pypi/distro@1.9.0", + "UID": "22f853bfd9b90f72" + }, + "Version": "1.9.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "dnspython@2.8.0", + "Name": "dnspython", + "Identifier": { + "PURL": "pkg:pypi/dnspython@2.8.0", + "UID": "23e6fd2183b6e241" + }, + "Version": "2.8.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "docling-core@2.71.0", + "Name": "docling-core", + "Identifier": { + "PURL": "pkg:pypi/docling-core@2.71.0", + "UID": "39b174a8c21eeee2" + }, + "Version": "2.71.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "defusedxml@0.7.1", + "jsonref@1.1.0", + "jsonschema@4.26.0", + "latex2mathml@3.79.0", + "pandas@2.3.3", + "pillow@12.2.0", + "pydantic@2.12.5", + "pyyaml@6.0.3", + "semchunk@3.2.5", + "tabulate@0.10.0", + "transformers@4.57.6", + "tree-sitter-c@0.24.1", + "tree-sitter-javascript@0.25.0", + "tree-sitter-python@0.25.0", + "tree-sitter-typescript@0.23.2", + "tree-sitter@0.25.2", + "typer@0.21.2", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "docling-ibm-models@3.13.0", + "Name": "docling-ibm-models", + "Identifier": { + "PURL": "pkg:pypi/docling-ibm-models@3.13.0", + "UID": "3c596ae441987fc8" + }, + "Version": "3.13.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "accelerate@1.13.0", + "docling-core@2.71.0", + "huggingface-hub@0.36.2", + "jsonlines@4.0.0", + "numpy@2.4.4", + "pillow@12.2.0", + "pydantic@2.12.5", + "rtree@1.4.1", + "safetensors@0.7.0", + "torch@2.11.0", + "torchvision@0.26.0", + "tqdm@4.67.3", + "transformers@4.57.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "docling-parse@5.7.0", + "Name": "docling-parse", + "Identifier": { + "PURL": "pkg:pypi/docling-parse@5.7.0", + "UID": "7daef55485f7124c" + }, + "Version": "5.7.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "docling-core@2.71.0", + "pillow@12.2.0", + "pydantic@2.12.5", + "pywin32@311", + "tabulate@0.10.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "docstring-parser@0.17.0", + "Name": "docstring-parser", + "Identifier": { + "PURL": "pkg:pypi/docstring-parser@0.17.0", + "UID": "a8beca8d13b72ae1" + }, + "Version": "0.17.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "docutils@0.22.4", + "Name": "docutils", + "Identifier": { + "PURL": "pkg:pypi/docutils@0.22.4", + "UID": "e578a9b8184e1079" + }, + "Version": "0.22.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "ecdsa@0.19.2", + "Name": "ecdsa", + "Identifier": { + "PURL": "pkg:pypi/ecdsa@0.19.2", + "UID": "8468e73fc68cde5b" + }, + "Version": "0.19.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "six@1.17.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "email-validator@2.3.0", + "Name": "email-validator", + "Identifier": { + "PURL": "pkg:pypi/email-validator@2.3.0", + "UID": "3c29a86beb27858c" + }, + "Version": "2.3.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "dnspython@2.8.0", + "idna@3.11" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "et-xmlfile@2.0.0", + "Name": "et-xmlfile", + "Identifier": { + "PURL": "pkg:pypi/et-xmlfile@2.0.0", + "UID": "a12d6975f70faa69" + }, + "Version": "2.0.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "exceptiongroup@1.3.1", + "Name": "exceptiongroup", + "Identifier": { + "PURL": "pkg:pypi/exceptiongroup@1.3.1", + "UID": "f2f3b1049fe359fe" + }, + "Version": "1.3.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "faker@40.12.0", + "Name": "faker", + "Identifier": { + "PURL": "pkg:pypi/faker@40.12.0", + "UID": "ddcf7eb253fee2f6" + }, + "Version": "40.12.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "tzdata@2025.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "fast-langdetect@0.2.5", + "Name": "fast-langdetect", + "Identifier": { + "PURL": "pkg:pypi/fast-langdetect@0.2.5", + "UID": "e3ba98a44c0427af" + }, + "Version": "0.2.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "fasttext-predict@0.9.2.4", + "requests@2.33.1", + "robust-downloader@0.0.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "fasttext-predict@0.9.2.4", + "Name": "fasttext-predict", + "Identifier": { + "PURL": "pkg:pypi/fasttext-predict@0.9.2.4", + "UID": "335689f0e4abc9cf" + }, + "Version": "0.9.2.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "ffmpy@1.0.0", + "Name": "ffmpy", + "Identifier": { + "PURL": "pkg:pypi/ffmpy@1.0.0", + "UID": "413c11ff2c71de54" + }, + "Version": "1.0.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "filelock@3.25.2", + "Name": "filelock", + "Identifier": { + "PURL": "pkg:pypi/filelock@3.25.2", + "UID": "9904740d331d2f0a" + }, + "Version": "3.25.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "filetype@1.2.0", + "Name": "filetype", + "Identifier": { + "PURL": "pkg:pypi/filetype@1.2.0", + "UID": "d55d56528a8372df" + }, + "Version": "1.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "fire@0.7.1", + "Name": "fire", + "Identifier": { + "PURL": "pkg:pypi/fire@0.7.1", + "UID": "1290e6fb2c44d6d3" + }, + "Version": "0.7.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "termcolor@3.3.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "flatbuffers@25.12.19", + "Name": "flatbuffers", + "Identifier": { + "PURL": "pkg:pypi/flatbuffers@25.12.19", + "UID": "d81e352c767f10c1" + }, + "Version": "25.12.19", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "fonttools@4.62.1", + "Name": "fonttools", + "Identifier": { + "PURL": "pkg:pypi/fonttools@4.62.1", + "UID": "a148a8969201fe5c" + }, + "Version": "4.62.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "brotli@1.2.0", + "brotlicffi@1.2.0.1", + "zopfli@0.4.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "frozenlist@1.8.0", + "Name": "frozenlist", + "Identifier": { + "PURL": "pkg:pypi/frozenlist@1.8.0", + "UID": "6e474ed64970b17e" + }, + "Version": "1.8.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "fsspec@2026.3.0", + "Name": "fsspec", + "Identifier": { + "PURL": "pkg:pypi/fsspec@2026.3.0", + "UID": "573e2b52e1962ab0" + }, + "Version": "2026.3.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "ftfy@6.3.1", + "Name": "ftfy", + "Identifier": { + "PURL": "pkg:pypi/ftfy@6.3.1", + "UID": "dd0c2357002fc433" + }, + "Version": "6.3.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "wcwidth@0.6.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "google-api-core@2.30.1", + "Name": "google-api-core", + "Identifier": { + "PURL": "pkg:pypi/google-api-core@2.30.1", + "UID": "8c010239289bdc2b" + }, + "Version": "2.30.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "google-auth@2.49.1", + "googleapis-common-protos@1.73.1", + "proto-plus@1.27.2", + "protobuf@6.33.6", + "requests@2.33.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "google-auth@2.49.1", + "Name": "google-auth", + "Identifier": { + "PURL": "pkg:pypi/google-auth@2.49.1", + "UID": "47ebaf9e33991f5d" + }, + "Version": "2.49.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cryptography@46.0.6", + "pyasn1-modules@0.4.2", + "requests@2.33.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "google-genai@1.70.0", + "Name": "google-genai", + "Identifier": { + "PURL": "pkg:pypi/google-genai@1.70.0", + "UID": "d03e0399d6a29c89" + }, + "Version": "1.70.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "anyio@4.13.0", + "distro@1.9.0", + "google-auth@2.49.1", + "httpx@0.28.1", + "pydantic@2.12.5", + "requests@2.33.1", + "sniffio@1.3.1", + "tenacity@9.1.4", + "typing-extensions@4.15.0", + "websockets@16.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "googleapis-common-protos@1.73.1", + "Name": "googleapis-common-protos", + "Identifier": { + "PURL": "pkg:pypi/googleapis-common-protos@1.73.1", + "UID": "868849668f5b324b" + }, + "Version": "1.73.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "protobuf@6.33.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "gradio@6.8.0", + "Name": "gradio", + "Identifier": { + "PURL": "pkg:pypi/gradio@6.8.0", + "UID": "7d8e32fe89f5dfc4" + }, + "Version": "6.8.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "aiofiles@24.1.0", + "anyio@4.13.0", + "audioop-lts@0.2.2", + "brotli@1.2.0", + "fastapi@0.135.3", + "ffmpy@1.0.0", + "gradio-client@2.2.0", + "groovy@0.1.2", + "httpx@0.28.1", + "huggingface-hub@0.36.2", + "jinja2@3.1.6", + "markupsafe@3.0.3", + "numpy@2.4.4", + "orjson@3.11.8", + "packaging@26.0", + "pandas@2.3.3", + "pillow@12.2.0", + "pydantic@2.12.5", + "pydub@0.25.1", + "python-multipart@0.0.22", + "pytz@2026.1.post1", + "pyyaml@6.0.3", + "safehttpx@0.1.7", + "semantic-version@2.10.0", + "starlette@0.52.1", + "tomlkit@0.13.3", + "typer@0.21.2", + "typing-extensions@4.15.0", + "uvicorn@0.42.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "gradio-client@2.2.0", + "Name": "gradio-client", + "Identifier": { + "PURL": "pkg:pypi/gradio-client@2.2.0", + "UID": "b264b21e979d426f" + }, + "Version": "2.2.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "fsspec@2026.3.0", + "httpx@0.28.1", + "huggingface-hub@0.36.2", + "packaging@26.0", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "gradio-pdf@0.0.24", + "Name": "gradio-pdf", + "Identifier": { + "PURL": "pkg:pypi/gradio-pdf@0.0.24", + "UID": "313982e04edcf97d" + }, + "Version": "0.0.24", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "gradio@6.8.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "greenlet@3.3.2", + "Name": "greenlet", + "Identifier": { + "PURL": "pkg:pypi/greenlet@3.3.2", + "UID": "53fcad42e0243689" + }, + "Version": "3.3.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "groovy@0.1.2", + "Name": "groovy", + "Identifier": { + "PURL": "pkg:pypi/groovy@0.1.2", + "UID": "996129d6cb36776d" + }, + "Version": "0.1.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "gunicorn@25.3.0", + "Name": "gunicorn", + "Identifier": { + "PURL": "pkg:pypi/gunicorn@25.3.0", + "UID": "fa8a2dee66d72aa1" + }, + "Version": "25.3.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "packaging@26.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "h11@0.16.0", + "Name": "h11", + "Identifier": { + "PURL": "pkg:pypi/h11@0.16.0", + "UID": "d50a8db9ed31c7b5" + }, + "Version": "0.16.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "hf-xet@1.4.3", + "Name": "hf-xet", + "Identifier": { + "PURL": "pkg:pypi/hf-xet@1.4.3", + "UID": "419cf99547913f4d" + }, + "Version": "1.4.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "httpcore@1.0.9", + "Name": "httpcore", + "Identifier": { + "PURL": "pkg:pypi/httpcore@1.0.9", + "UID": "befda0419fd3d5b2" + }, + "Version": "1.0.9", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "certifi@2026.2.25", + "h11@0.16.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "httpx-retries@0.4.6", + "Name": "httpx-retries", + "Identifier": { + "PURL": "pkg:pypi/httpx-retries@0.4.6", + "UID": "14f96554dccdd211" + }, + "Version": "0.4.6", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "httpx@0.28.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "httpx-sse@0.4.3", + "Name": "httpx-sse", + "Identifier": { + "PURL": "pkg:pypi/httpx-sse@0.4.3", + "UID": "218b966602c8da98" + }, + "Version": "0.4.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "huggingface-hub@0.36.2", + "Name": "huggingface-hub", + "Identifier": { + "PURL": "pkg:pypi/huggingface-hub@0.36.2", + "UID": "37b9d11f6b855af" + }, + "Version": "0.36.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "filelock@3.25.2", + "fsspec@2026.3.0", + "hf-xet@1.4.3", + "packaging@26.0", + "pyyaml@6.0.3", + "requests@2.33.1", + "tqdm@4.67.3", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "idna@3.11", + "Name": "idna", + "Identifier": { + "PURL": "pkg:pypi/idna@3.11", + "UID": "fc2dae0aa8a11930" + }, + "Version": "3.11", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "imageio@2.37.3", + "Name": "imageio", + "Identifier": { + "PURL": "pkg:pypi/imageio@2.37.3", + "UID": "278810376b9c3b43" + }, + "Version": "2.37.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4", + "pillow@12.2.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "importlib-metadata@8.7.1", + "Name": "importlib-metadata", + "Identifier": { + "PURL": "pkg:pypi/importlib-metadata@8.7.1", + "UID": "20ee4faf7ed54391" + }, + "Version": "8.7.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "zipp@3.23.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jaraco-classes@3.4.0", + "Name": "jaraco-classes", + "Identifier": { + "PURL": "pkg:pypi/jaraco-classes@3.4.0", + "UID": "133ea18a85c6e7b1" + }, + "Version": "3.4.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "more-itertools@10.8.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jaraco-context@6.1.2", + "Name": "jaraco-context", + "Identifier": { + "PURL": "pkg:pypi/jaraco-context@6.1.2", + "UID": "8ea12dd67c8b0599" + }, + "Version": "6.1.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "jaraco-functools@4.4.0", + "Name": "jaraco-functools", + "Identifier": { + "PURL": "pkg:pypi/jaraco-functools@4.4.0", + "UID": "98dee16196d13cca" + }, + "Version": "4.4.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "more-itertools@10.8.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jeepney@0.9.0", + "Name": "jeepney", + "Identifier": { + "PURL": "pkg:pypi/jeepney@0.9.0", + "UID": "78f04c8e6818160d" + }, + "Version": "0.9.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "jinja2@3.1.6", + "Name": "jinja2", + "Identifier": { + "PURL": "pkg:pypi/jinja2@3.1.6", + "UID": "9c3befdce47c8a32" + }, + "Version": "3.1.6", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "markupsafe@3.0.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jiter@0.13.0", + "Name": "jiter", + "Identifier": { + "PURL": "pkg:pypi/jiter@0.13.0", + "UID": "a5d762e5f9fbc2a2" + }, + "Version": "0.13.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "jmespath@1.1.0", + "Name": "jmespath", + "Identifier": { + "PURL": "pkg:pypi/jmespath@1.1.0", + "UID": "2185bf5b23a3c1d4" + }, + "Version": "1.1.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "json-repair@0.58.7", + "Name": "json-repair", + "Identifier": { + "PURL": "pkg:pypi/json-repair@0.58.7", + "UID": "f605ee3a9cc3586f" + }, + "Version": "0.58.7", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "jsonlines@4.0.0", + "Name": "jsonlines", + "Identifier": { + "PURL": "pkg:pypi/jsonlines@4.0.0", + "UID": "6ef2615614a3f1ae" + }, + "Version": "4.0.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "attrs@26.1.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jsonref@1.1.0", + "Name": "jsonref", + "Identifier": { + "PURL": "pkg:pypi/jsonref@1.1.0", + "UID": "2359e80f498d98f5" + }, + "Version": "1.1.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "jsonschema@4.26.0", + "Name": "jsonschema", + "Identifier": { + "PURL": "pkg:pypi/jsonschema@4.26.0", + "UID": "f01e1a1e364e6f4f" + }, + "Version": "4.26.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "attrs@26.1.0", + "jsonschema-specifications@2025.9.1", + "referencing@0.37.0", + "rpds-py@0.30.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jsonschema-path@0.4.5", + "Name": "jsonschema-path", + "Identifier": { + "PURL": "pkg:pypi/jsonschema-path@0.4.5", + "UID": "3a0b6c51923cd2ac" + }, + "Version": "0.4.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pathable@0.5.0", + "pyyaml@6.0.3", + "referencing@0.37.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "jsonschema-specifications@2025.9.1", + "Name": "jsonschema-specifications", + "Identifier": { + "PURL": "pkg:pypi/jsonschema-specifications@2025.9.1", + "UID": "e7b469c5a674ceb2" + }, + "Version": "2025.9.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "referencing@0.37.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "keyring@25.7.0", + "Name": "keyring", + "Identifier": { + "PURL": "pkg:pypi/keyring@25.7.0", + "UID": "8918a1d2db52f405" + }, + "Version": "25.7.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "jaraco-classes@3.4.0", + "jaraco-context@6.1.2", + "jaraco-functools@4.4.0", + "jeepney@0.9.0", + "pywin32-ctypes@0.2.3", + "secretstorage@3.5.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "latex2mathml@3.79.0", + "Name": "latex2mathml", + "Identifier": { + "PURL": "pkg:pypi/latex2mathml@3.79.0", + "UID": "f835d2667d030e70" + }, + "Version": "3.79.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "lazy-loader@0.5", + "Name": "lazy-loader", + "Identifier": { + "PURL": "pkg:pypi/lazy-loader@0.5", + "UID": "1d6a1b9988e92f85" + }, + "Version": "0.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "packaging@26.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "lmdb@2.2.0", + "Name": "lmdb", + "Identifier": { + "PURL": "pkg:pypi/lmdb@2.2.0", + "UID": "ccfd487f051dc543" + }, + "Version": "2.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "loguru@0.7.3", + "Name": "loguru", + "Identifier": { + "PURL": "pkg:pypi/loguru@0.7.3", + "UID": "a6aa0113578e6288" + }, + "Version": "0.7.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "colorama@0.4.6", + "win32-setctime@1.2.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "lxml@6.0.2", + "Name": "lxml", + "Identifier": { + "PURL": "pkg:pypi/lxml@6.0.2", + "UID": "b13c649c28bee02" + }, + "Version": "6.0.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "magika@1.0.2", + "Name": "magika", + "Identifier": { + "PURL": "pkg:pypi/magika@1.0.2", + "UID": "248411c1d3546107" + }, + "Version": "1.0.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "click@8.3.1", + "onnxruntime@1.24.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "mako@1.3.10", + "Name": "mako", + "Identifier": { + "PURL": "pkg:pypi/mako@1.3.10", + "UID": "c3f5442c690bcb7e" + }, + "Version": "1.3.10", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "markupsafe@3.0.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "mammoth@1.12.0", + "Name": "mammoth", + "Identifier": { + "PURL": "pkg:pypi/mammoth@1.12.0", + "UID": "aea025c60016d316" + }, + "Version": "1.12.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cobble@0.1.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "markdown@3.10.2", + "Name": "markdown", + "Identifier": { + "PURL": "pkg:pypi/markdown@3.10.2", + "UID": "8d04ca3112f1b5c2" + }, + "Version": "3.10.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "markdown-it-py@4.0.0", + "Name": "markdown-it-py", + "Identifier": { + "PURL": "pkg:pypi/markdown-it-py@4.0.0", + "UID": "9ca6a9492cd2fedc" + }, + "Version": "4.0.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "mdurl@0.1.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "marko@2.2.2", + "Name": "marko", + "Identifier": { + "PURL": "pkg:pypi/marko@2.2.2", + "UID": "3b0ce9d1975df473" + }, + "Version": "2.2.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "markupsafe@3.0.3", + "Name": "markupsafe", + "Identifier": { + "PURL": "pkg:pypi/markupsafe@3.0.3", + "UID": "c97703c32c4879f7" + }, + "Version": "3.0.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "mdurl@0.1.2", + "Name": "mdurl", + "Identifier": { + "PURL": "pkg:pypi/mdurl@0.1.2", + "UID": "9dc632b33acb56d1" + }, + "Version": "0.1.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "mineru@3.0.7", + "Name": "mineru", + "Identifier": { + "PURL": "pkg:pypi/mineru@3.0.7", + "UID": "17c4686aa1c550a1" + }, + "Version": "3.0.7", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "accelerate@1.13.0", + "albumentations@2.0.8", + "beautifulsoup4@4.14.3", + "boto3@1.42.80", + "click@8.3.1", + "dill@0.4.1", + "fast-langdetect@0.2.5", + "fastapi@0.135.3", + "ftfy@6.3.1", + "gradio-pdf@0.0.24", + "gradio@6.8.0", + "httpx@0.28.1", + "huggingface-hub@0.36.2", + "json-repair@0.58.7", + "loguru@0.7.3", + "lxml@6.0.2", + "magika@1.0.2", + "mammoth@1.12.0", + "mineru-vl-utils@0.1.22", + "modelscope@1.35.3", + "numpy@2.4.4", + "omegaconf@2.3.0", + "onnxruntime@1.24.4", + "openai@2.30.0", + "opencv-python@4.13.0.92", + "openpyxl@3.1.5", + "pandas@2.3.3", + "pdfminer-six@20260107", + "pdftext@0.6.3", + "pillow@12.2.0", + "pyclipper@1.4.0", + "pylatexenc@2.10", + "pypdf@6.9.2", + "pypdfium2@4.30.0", + "pypptx-with-oxml@1.0.3", + "python-docx@1.2.0", + "python-multipart@0.0.22", + "pyyaml@6.0.3", + "qwen-vl-utils@0.0.14", + "reportlab@4.4.10", + "requests@2.33.1", + "scikit-image@0.26.0", + "shapely@2.1.2", + "torch@2.11.0", + "torchvision@0.26.0", + "tqdm@4.67.3", + "transformers@4.57.6", + "uvicorn@0.42.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "mineru-vl-utils@0.1.22", + "Name": "mineru-vl-utils", + "Identifier": { + "PURL": "pkg:pypi/mineru-vl-utils@0.1.22", + "UID": "506820348eef090d" + }, + "Version": "0.1.22", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "aiofiles@24.1.0", + "httpx-retries@0.4.6", + "httpx@0.28.1", + "loguru@0.7.3", + "pillow@12.2.0", + "pydantic@2.12.5" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "modelscope@1.35.3", + "Name": "modelscope", + "Identifier": { + "PURL": "pkg:pypi/modelscope@1.35.3", + "UID": "9354e8e07645ca60" + }, + "Version": "1.35.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "filelock@3.25.2", + "packaging@26.0", + "requests@2.33.1", + "setuptools@81.0.0", + "tqdm@4.67.3", + "urllib3@2.6.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "more-itertools@10.8.0", + "Name": "more-itertools", + "Identifier": { + "PURL": "pkg:pypi/more-itertools@10.8.0", + "UID": "8d31b6dc1ff95fdf" + }, + "Version": "10.8.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "mpire@2.10.2", + "Name": "mpire", + "Identifier": { + "PURL": "pkg:pypi/mpire@2.10.2", + "UID": "913ededb3610209e" + }, + "Version": "2.10.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "multiprocess@0.70.19", + "pygments@2.20.0", + "pywin32@311", + "tqdm@4.67.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "mpmath@1.3.0", + "Name": "mpmath", + "Identifier": { + "PURL": "pkg:pypi/mpmath@1.3.0", + "UID": "ae58f55affb7eaa7" + }, + "Version": "1.3.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "multidict@6.7.1", + "Name": "multidict", + "Identifier": { + "PURL": "pkg:pypi/multidict@6.7.1", + "UID": "b85b6627b2607e18" + }, + "Version": "6.7.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "multiprocess@0.70.19", + "Name": "multiprocess", + "Identifier": { + "PURL": "pkg:pypi/multiprocess@0.70.19", + "UID": "f7158bf6590fbc17" + }, + "Version": "0.70.19", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "dill@0.4.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "nano-vectordb@0.0.4.3", + "Name": "nano-vectordb", + "Identifier": { + "PURL": "pkg:pypi/nano-vectordb@0.0.4.3", + "UID": "4f5c94c278b0cf6" + }, + "Version": "0.0.4.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "networkx@3.6.1", + "Name": "networkx", + "Identifier": { + "PURL": "pkg:pypi/networkx@3.6.1", + "UID": "8d909da1598b683e" + }, + "Version": "3.6.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "numpy@2.4.4", + "Name": "numpy", + "Identifier": { + "PURL": "pkg:pypi/numpy@2.4.4", + "UID": "5f1f2658b471127c" + }, + "Version": "2.4.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cublas@13.1.0.3", + "Name": "nvidia-cublas", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cublas@13.1.0.3", + "UID": "2e4398ec1ffd3723" + }, + "Version": "13.1.0.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cuda-cupti@13.0.85", + "Name": "nvidia-cuda-cupti", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cuda-cupti@13.0.85", + "UID": "86c9eacadae7fd25" + }, + "Version": "13.0.85", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cuda-nvrtc@13.0.88", + "Name": "nvidia-cuda-nvrtc", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cuda-nvrtc@13.0.88", + "UID": "4de6d77c84e9ad85" + }, + "Version": "13.0.88", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cuda-runtime@13.0.96", + "Name": "nvidia-cuda-runtime", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cuda-runtime@13.0.96", + "UID": "d8b8a76e12c590a7" + }, + "Version": "13.0.96", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cudnn-cu13@9.19.0.56", + "Name": "nvidia-cudnn-cu13", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cudnn-cu13@9.19.0.56", + "UID": "e0a086b650a3902a" + }, + "Version": "9.19.0.56", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "nvidia-cublas@13.1.0.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cufft@12.0.0.61", + "Name": "nvidia-cufft", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cufft@12.0.0.61", + "UID": "520987fff2db58f5" + }, + "Version": "12.0.0.61", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "nvidia-nvjitlink@13.0.88" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cufile@1.15.1.6", + "Name": "nvidia-cufile", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cufile@1.15.1.6", + "UID": "e55e97544844bcb" + }, + "Version": "1.15.1.6", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-curand@10.4.0.35", + "Name": "nvidia-curand", + "Identifier": { + "PURL": "pkg:pypi/nvidia-curand@10.4.0.35", + "UID": "474f87c79e6edba8" + }, + "Version": "10.4.0.35", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cusolver@12.0.4.66", + "Name": "nvidia-cusolver", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cusolver@12.0.4.66", + "UID": "4b99e005a8f3db8a" + }, + "Version": "12.0.4.66", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "nvidia-cublas@13.1.0.3", + "nvidia-cusparse@12.6.3.3", + "nvidia-nvjitlink@13.0.88" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cusparse@12.6.3.3", + "Name": "nvidia-cusparse", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cusparse@12.6.3.3", + "UID": "322febf02f8a4597" + }, + "Version": "12.6.3.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "nvidia-nvjitlink@13.0.88" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-cusparselt-cu13@0.8.0", + "Name": "nvidia-cusparselt-cu13", + "Identifier": { + "PURL": "pkg:pypi/nvidia-cusparselt-cu13@0.8.0", + "UID": "9f3cbbf02743570d" + }, + "Version": "0.8.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-nccl-cu13@2.28.9", + "Name": "nvidia-nccl-cu13", + "Identifier": { + "PURL": "pkg:pypi/nvidia-nccl-cu13@2.28.9", + "UID": "d58c295882bcad68" + }, + "Version": "2.28.9", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-nvjitlink@13.0.88", + "Name": "nvidia-nvjitlink", + "Identifier": { + "PURL": "pkg:pypi/nvidia-nvjitlink@13.0.88", + "UID": "2d728c5cc075d731" + }, + "Version": "13.0.88", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-nvshmem-cu13@3.4.5", + "Name": "nvidia-nvshmem-cu13", + "Identifier": { + "PURL": "pkg:pypi/nvidia-nvshmem-cu13@3.4.5", + "UID": "e3aafd5a6dc04592" + }, + "Version": "3.4.5", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "nvidia-nvtx@13.0.85", + "Name": "nvidia-nvtx", + "Identifier": { + "PURL": "pkg:pypi/nvidia-nvtx@13.0.85", + "UID": "3a7feb80566be913" + }, + "Version": "13.0.85", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "ocrmac@1.0.1", + "Name": "ocrmac", + "Identifier": { + "PURL": "pkg:pypi/ocrmac@1.0.1", + "UID": "76ce014f89ad0ffa" + }, + "Version": "1.0.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "click@8.3.1", + "pillow@12.2.0", + "pyobjc-framework-vision@12.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "omegaconf@2.3.0", + "Name": "omegaconf", + "Identifier": { + "PURL": "pkg:pypi/omegaconf@2.3.0", + "UID": "a7e5dc7417fe3f52" + }, + "Version": "2.3.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "antlr4-python3-runtime@4.9.3", + "pyyaml@6.0.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "onnxruntime@1.24.4", + "Name": "onnxruntime", + "Identifier": { + "PURL": "pkg:pypi/onnxruntime@1.24.4", + "UID": "8d53a10f3b9c3b35" + }, + "Version": "1.24.4", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "flatbuffers@25.12.19", + "numpy@2.4.4", + "packaging@26.0", + "protobuf@6.33.6", + "sympy@1.14.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "openapi-pydantic@0.5.1", + "Name": "openapi-pydantic", + "Identifier": { + "PURL": "pkg:pypi/openapi-pydantic@0.5.1", + "UID": "bfb23529b97bedc0" + }, + "Version": "0.5.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pydantic@2.12.5" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "opencv-contrib-python@4.13.0.92", + "Name": "opencv-contrib-python", + "Identifier": { + "PURL": "pkg:pypi/opencv-contrib-python@4.13.0.92", + "UID": "4cfd654bb7bbabba" + }, + "Version": "4.13.0.92", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "opencv-python@4.13.0.92", + "Name": "opencv-python", + "Identifier": { + "PURL": "pkg:pypi/opencv-python@4.13.0.92", + "UID": "d65a4619fab2f607" + }, + "Version": "4.13.0.92", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "opencv-python-headless@4.13.0.92", + "Name": "opencv-python-headless", + "Identifier": { + "PURL": "pkg:pypi/opencv-python-headless@4.13.0.92", + "UID": "62cf65aed9267e5e" + }, + "Version": "4.13.0.92", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "openpyxl@3.1.5", + "Name": "openpyxl", + "Identifier": { + "PURL": "pkg:pypi/openpyxl@3.1.5", + "UID": "b978db12d1edc9fd" + }, + "Version": "3.1.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "et-xmlfile@2.0.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "opentelemetry-api@1.40.0", + "Name": "opentelemetry-api", + "Identifier": { + "PURL": "pkg:pypi/opentelemetry-api@1.40.0", + "UID": "f886c17be25ecee7" + }, + "Version": "1.40.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "importlib-metadata@8.7.1", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "orjson@3.11.8", + "Name": "orjson", + "Identifier": { + "PURL": "pkg:pypi/orjson@3.11.8", + "UID": "cd1e5bfad7298883" + }, + "Version": "3.11.8", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "packaging@26.0", + "Name": "packaging", + "Identifier": { + "PURL": "pkg:pypi/packaging@26.0", + "UID": "18ff1c8b62b5ce98" + }, + "Version": "26.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "paddleocr@2.10.0", + "Name": "paddleocr", + "Identifier": { + "PURL": "pkg:pypi/paddleocr@2.10.0", + "UID": "1b504a5dbdb8859e" + }, + "Version": "2.10.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "albucore@0.0.24", + "albumentations@2.0.8", + "beautifulsoup4@4.14.3", + "cython@3.2.4", + "fire@0.7.1", + "fonttools@4.62.1", + "lmdb@2.2.0", + "numpy@2.4.4", + "opencv-contrib-python@4.13.0.92", + "opencv-python@4.13.0.92", + "pillow@12.2.0", + "pyclipper@1.4.0", + "python-docx@1.2.0", + "pyyaml@6.0.3", + "rapidfuzz@3.14.5", + "requests@2.33.1", + "scikit-image@0.26.0", + "shapely@2.1.2", + "tqdm@4.67.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pandas@2.3.3", + "Name": "pandas", + "Identifier": { + "PURL": "pkg:pypi/pandas@2.3.3", + "UID": "bd19e8d69029bbf5" + }, + "Version": "2.3.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4", + "python-dateutil@2.9.0.post0", + "pytz@2026.1.post1", + "tzdata@2025.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pathable@0.5.0", + "Name": "pathable", + "Identifier": { + "PURL": "pkg:pypi/pathable@0.5.0", + "UID": "414385b81ef1840b" + }, + "Version": "0.5.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pdfminer-six@20260107", + "Name": "pdfminer-six", + "Identifier": { + "PURL": "pkg:pypi/pdfminer-six@20260107", + "UID": "14a5434f940ee872" + }, + "Version": "20260107", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "charset-normalizer@3.4.6", + "cryptography@46.0.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pdftext@0.6.3", + "Name": "pdftext", + "Identifier": { + "PURL": "pkg:pypi/pdftext@0.6.3", + "UID": "32a4a53749eb45f5" + }, + "Version": "0.6.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "click@8.3.1", + "pydantic-settings@2.13.1", + "pydantic@2.12.5", + "pypdfium2@4.30.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pillow@12.2.0", + "Name": "pillow", + "Identifier": { + "PURL": "pkg:pypi/pillow@12.2.0", + "UID": "3a3b5de04786361" + }, + "Version": "12.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pipmaster@1.1.2", + "Name": "pipmaster", + "Identifier": { + "PURL": "pkg:pypi/pipmaster@1.1.2", + "UID": "1410359b5f7483d4" + }, + "Version": "1.1.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "ascii-colors@0.11.21", + "packaging@26.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "platformdirs@4.9.4", + "Name": "platformdirs", + "Identifier": { + "PURL": "pkg:pypi/platformdirs@4.9.4", + "UID": "2ce9c1e99f46fc4d" + }, + "Version": "4.9.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pluggy@1.6.0", + "Name": "pluggy", + "Identifier": { + "PURL": "pkg:pypi/pluggy@1.6.0", + "UID": "fb8af1ba97572ef7" + }, + "Version": "1.6.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "polyfactory@3.3.0", + "Name": "polyfactory", + "Identifier": { + "PURL": "pkg:pypi/polyfactory@3.3.0", + "UID": "599e19f58e5f6b8" + }, + "Version": "3.3.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "faker@40.12.0", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "propcache@0.4.1", + "Name": "propcache", + "Identifier": { + "PURL": "pkg:pypi/propcache@0.4.1", + "UID": "44918452e0d9ae67" + }, + "Version": "0.4.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "proto-plus@1.27.2", + "Name": "proto-plus", + "Identifier": { + "PURL": "pkg:pypi/proto-plus@1.27.2", + "UID": "94f80a4bf6197cc5" + }, + "Version": "1.27.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "protobuf@6.33.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "protobuf@6.33.6", + "Name": "protobuf", + "Identifier": { + "PURL": "pkg:pypi/protobuf@6.33.6", + "UID": "436c05cac17d37e7" + }, + "Version": "6.33.6", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "psutil@7.2.2", + "Name": "psutil", + "Identifier": { + "PURL": "pkg:pypi/psutil@7.2.2", + "UID": "b49c700e973259fe" + }, + "Version": "7.2.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "py-key-value-aio@0.4.4", + "Name": "py-key-value-aio", + "Identifier": { + "PURL": "pkg:pypi/py-key-value-aio@0.4.4", + "UID": "cad4efca15f4f7e" + }, + "Version": "0.4.4", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "aiofile@3.9.0", + "anyio@4.13.0", + "beartype@0.22.9", + "cachetools@7.0.5", + "keyring@25.7.0", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pyasn1@0.6.3", + "Name": "pyasn1", + "Identifier": { + "PURL": "pkg:pypi/pyasn1@0.6.3", + "UID": "c1d679e7c1ad2e5f" + }, + "Version": "0.6.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyasn1-modules@0.4.2", + "Name": "pyasn1-modules", + "Identifier": { + "PURL": "pkg:pypi/pyasn1-modules@0.4.2", + "UID": "349adcaa364bce72" + }, + "Version": "0.4.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pyasn1@0.6.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pyclipper@1.4.0", + "Name": "pyclipper", + "Identifier": { + "PURL": "pkg:pypi/pyclipper@1.4.0", + "UID": "cc112a5b21e48200" + }, + "Version": "1.4.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pycparser@3.0", + "Name": "pycparser", + "Identifier": { + "PURL": "pkg:pypi/pycparser@3.0", + "UID": "c92284e7051d4ada" + }, + "Version": "3.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pycryptodome@3.23.0", + "Name": "pycryptodome", + "Identifier": { + "PURL": "pkg:pypi/pycryptodome@3.23.0", + "UID": "bf2a0db37e5dfc4" + }, + "Version": "3.23.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pydantic@2.12.5", + "Name": "pydantic", + "Identifier": { + "PURL": "pkg:pypi/pydantic@2.12.5", + "UID": "c53c43db4d52a3cd" + }, + "Version": "2.12.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "annotated-types@0.7.0", + "email-validator@2.3.0", + "pydantic-core@2.41.5", + "typing-extensions@4.15.0", + "typing-inspection@0.4.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pydantic-core@2.41.5", + "Name": "pydantic-core", + "Identifier": { + "PURL": "pkg:pypi/pydantic-core@2.41.5", + "UID": "ba407f9fef614bf8" + }, + "Version": "2.41.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pydub@0.25.1", + "Name": "pydub", + "Identifier": { + "PURL": "pkg:pypi/pydub@0.25.1", + "UID": "c8db42cca9f08256" + }, + "Version": "0.25.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pydyf@0.12.1", + "Name": "pydyf", + "Identifier": { + "PURL": "pkg:pypi/pydyf@0.12.1", + "UID": "5ab04c66a2b48b21" + }, + "Version": "0.12.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pygments@2.20.0", + "Name": "pygments", + "Identifier": { + "PURL": "pkg:pypi/pygments@2.20.0", + "UID": "a97e8b3301b61131" + }, + "Version": "2.20.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyjwt@2.12.1", + "Name": "pyjwt", + "Identifier": { + "PURL": "pkg:pypi/pyjwt@2.12.1", + "UID": "96e16fe1c6072e55" + }, + "Version": "2.12.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cryptography@46.0.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pylatexenc@2.10", + "Name": "pylatexenc", + "Identifier": { + "PURL": "pkg:pypi/pylatexenc@2.10", + "UID": "bcb33ac6aa8acdf2" + }, + "Version": "2.10", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyobjc-core@12.1", + "Name": "pyobjc-core", + "Identifier": { + "PURL": "pkg:pypi/pyobjc-core@12.1", + "UID": "ba1de964ffdc3591" + }, + "Version": "12.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyobjc-framework-cocoa@12.1", + "Name": "pyobjc-framework-cocoa", + "Identifier": { + "PURL": "pkg:pypi/pyobjc-framework-cocoa@12.1", + "UID": "da8e00f678def44c" + }, + "Version": "12.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pyobjc-core@12.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pyobjc-framework-coreml@12.1", + "Name": "pyobjc-framework-coreml", + "Identifier": { + "PURL": "pkg:pypi/pyobjc-framework-coreml@12.1", + "UID": "36641e47680c6a9b" + }, + "Version": "12.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pyobjc-core@12.1", + "pyobjc-framework-cocoa@12.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pyobjc-framework-quartz@12.1", + "Name": "pyobjc-framework-quartz", + "Identifier": { + "PURL": "pkg:pypi/pyobjc-framework-quartz@12.1", + "UID": "2af5f832c1158bf2" + }, + "Version": "12.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pyobjc-core@12.1", + "pyobjc-framework-cocoa@12.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pyobjc-framework-vision@12.1", + "Name": "pyobjc-framework-vision", + "Identifier": { + "PURL": "pkg:pypi/pyobjc-framework-vision@12.1", + "UID": "2644d2dbe681c024" + }, + "Version": "12.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pyobjc-core@12.1", + "pyobjc-framework-cocoa@12.1", + "pyobjc-framework-coreml@12.1", + "pyobjc-framework-quartz@12.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pypdf@6.9.2", + "Name": "pypdf", + "Identifier": { + "PURL": "pkg:pypi/pypdf@6.9.2", + "UID": "3010bc68b29c7779" + }, + "Version": "6.9.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pypdfium2@4.30.0", + "Name": "pypdfium2", + "Identifier": { + "PURL": "pkg:pypi/pypdfium2@4.30.0", + "UID": "d75dc8bc83009b9e" + }, + "Version": "4.30.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyperclip@1.11.0", + "Name": "pyperclip", + "Identifier": { + "PURL": "pkg:pypi/pyperclip@1.11.0", + "UID": "61a7004745cdfd62" + }, + "Version": "1.11.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyphen@0.17.2", + "Name": "pyphen", + "Identifier": { + "PURL": "pkg:pypi/pyphen@0.17.2", + "UID": "b9fd3d00209ae3cd" + }, + "Version": "0.17.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pypinyin@0.55.0", + "Name": "pypinyin", + "Identifier": { + "PURL": "pkg:pypi/pypinyin@0.55.0", + "UID": "6cb589173ed34fe1" + }, + "Version": "0.55.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pypptx-with-oxml@1.0.3", + "Name": "pypptx-with-oxml", + "Identifier": { + "PURL": "pkg:pypi/pypptx-with-oxml@1.0.3", + "UID": "ee9c2cf4474b21d" + }, + "Version": "1.0.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "lxml@6.0.2", + "pillow@12.2.0", + "typing-extensions@4.15.0", + "xlsxwriter@3.2.9" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "python-dateutil@2.9.0.post0", + "Name": "python-dateutil", + "Identifier": { + "PURL": "pkg:pypi/python-dateutil@2.9.0.post0", + "UID": "7ff8c30198898771" + }, + "Version": "2.9.0.post0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "six@1.17.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "python-docx@1.2.0", + "Name": "python-docx", + "Identifier": { + "PURL": "pkg:pypi/python-docx@1.2.0", + "UID": "a8e160cb804d4790" + }, + "Version": "1.2.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "lxml@6.0.2", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "python-jose@3.5.0", + "Name": "python-jose", + "Identifier": { + "PURL": "pkg:pypi/python-jose@3.5.0", + "UID": "a9d4364a3b6c03a8" + }, + "Version": "3.5.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cryptography@46.0.6", + "ecdsa@0.19.2", + "pyasn1@0.6.3", + "rsa@4.9.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "python-pptx@1.0.2", + "Name": "python-pptx", + "Identifier": { + "PURL": "pkg:pypi/python-pptx@1.0.2", + "UID": "1435a994d3665ac0" + }, + "Version": "1.0.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "lxml@6.0.2", + "pillow@12.2.0", + "typing-extensions@4.15.0", + "xlsxwriter@3.2.9" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "pytz@2026.1.post1", + "Name": "pytz", + "Identifier": { + "PURL": "pkg:pypi/pytz@2026.1.post1", + "UID": "a8026db830e8cf48" + }, + "Version": "2026.1.post1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pywin32@311", + "Name": "pywin32", + "Identifier": { + "PURL": "pkg:pypi/pywin32@311", + "UID": "64b74c2fc0b1955f" + }, + "Version": "311", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pywin32-ctypes@0.2.3", + "Name": "pywin32-ctypes", + "Identifier": { + "PURL": "pkg:pypi/pywin32-ctypes@0.2.3", + "UID": "cba3d635a983757d" + }, + "Version": "0.2.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "pyyaml@6.0.3", + "Name": "pyyaml", + "Identifier": { + "PURL": "pkg:pypi/pyyaml@6.0.3", + "UID": "691cc315a4054d72" + }, + "Version": "6.0.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "qwen-vl-utils@0.0.14", + "Name": "qwen-vl-utils", + "Identifier": { + "PURL": "pkg:pypi/qwen-vl-utils@0.0.14", + "UID": "37b7acec89974552" + }, + "Version": "0.0.14", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "av@17.0.0", + "packaging@26.0", + "pillow@12.2.0", + "requests@2.33.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "rapidfuzz@3.14.5", + "Name": "rapidfuzz", + "Identifier": { + "PURL": "pkg:pypi/rapidfuzz@3.14.5", + "UID": "4334814b20e55342" + }, + "Version": "3.14.5", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "rapidocr@3.7.0", + "Name": "rapidocr", + "Identifier": { + "PURL": "pkg:pypi/rapidocr@3.7.0", + "UID": "a4731f048f745cd5" + }, + "Version": "3.7.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "colorlog@6.10.1", + "numpy@2.4.4", + "omegaconf@2.3.0", + "opencv-python@4.13.0.92", + "pillow@12.2.0", + "pyclipper@1.4.0", + "pyyaml@6.0.3", + "requests@2.33.1", + "shapely@2.1.2", + "six@1.17.0", + "tqdm@4.67.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "referencing@0.37.0", + "Name": "referencing", + "Identifier": { + "PURL": "pkg:pypi/referencing@0.37.0", + "UID": "95b475217a47d1ac" + }, + "Version": "0.37.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "attrs@26.1.0", + "rpds-py@0.30.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "regex@2026.3.32", + "Name": "regex", + "Identifier": { + "PURL": "pkg:pypi/regex@2026.3.32", + "UID": "69c6b38682842272" + }, + "Version": "2026.3.32", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "reportlab@4.4.10", + "Name": "reportlab", + "Identifier": { + "PURL": "pkg:pypi/reportlab@4.4.10", + "UID": "86c543d5bce3978e" + }, + "Version": "4.4.10", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "charset-normalizer@3.4.6", + "pillow@12.2.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "requests@2.33.1", + "Name": "requests", + "Identifier": { + "PURL": "pkg:pypi/requests@2.33.1", + "UID": "be2ea39cc1f29190" + }, + "Version": "2.33.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "certifi@2026.2.25", + "charset-normalizer@3.4.6", + "idna@3.11", + "urllib3@2.6.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "rich@14.3.3", + "Name": "rich", + "Identifier": { + "PURL": "pkg:pypi/rich@14.3.3", + "UID": "9f0b9bd3c379a8bc" + }, + "Version": "14.3.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "markdown-it-py@4.0.0", + "pygments@2.20.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "rich-rst@1.3.2", + "Name": "rich-rst", + "Identifier": { + "PURL": "pkg:pypi/rich-rst@1.3.2", + "UID": "bbe420a244fbd59a" + }, + "Version": "1.3.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "docutils@0.22.4", + "rich@14.3.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "robust-downloader@0.0.2", + "Name": "robust-downloader", + "Identifier": { + "PURL": "pkg:pypi/robust-downloader@0.0.2", + "UID": "547d4ffc3392e752" + }, + "Version": "0.0.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "colorlog@6.10.1", + "requests@2.33.1", + "tqdm@4.67.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "rpds-py@0.30.0", + "Name": "rpds-py", + "Identifier": { + "PURL": "pkg:pypi/rpds-py@0.30.0", + "UID": "e858ddf621f143f2" + }, + "Version": "0.30.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "rsa@4.9.1", + "Name": "rsa", + "Identifier": { + "PURL": "pkg:pypi/rsa@4.9.1", + "UID": "bf1e6ca1105cd9c7" + }, + "Version": "4.9.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "pyasn1@0.6.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "rtree@1.4.1", + "Name": "rtree", + "Identifier": { + "PURL": "pkg:pypi/rtree@1.4.1", + "UID": "136b6b7a8d362e75" + }, + "Version": "1.4.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "s3transfer@0.16.0", + "Name": "s3transfer", + "Identifier": { + "PURL": "pkg:pypi/s3transfer@0.16.0", + "UID": "edfc5fe8a44a507d" + }, + "Version": "0.16.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "botocore@1.42.80" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "safehttpx@0.1.7", + "Name": "safehttpx", + "Identifier": { + "PURL": "pkg:pypi/safehttpx@0.1.7", + "UID": "de3ee59a525585c0" + }, + "Version": "0.1.7", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "httpx@0.28.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "safetensors@0.7.0", + "Name": "safetensors", + "Identifier": { + "PURL": "pkg:pypi/safetensors@0.7.0", + "UID": "fca419eb5be720dd" + }, + "Version": "0.7.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4", + "packaging@26.0", + "torch@2.11.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "scikit-image@0.26.0", + "Name": "scikit-image", + "Identifier": { + "PURL": "pkg:pypi/scikit-image@0.26.0", + "UID": "dba1c55dd1ece5fd" + }, + "Version": "0.26.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "imageio@2.37.3", + "lazy-loader@0.5", + "networkx@3.6.1", + "numpy@2.4.4", + "packaging@26.0", + "pillow@12.2.0", + "scipy@1.17.1", + "tifffile@2026.3.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "scipy@1.17.1", + "Name": "scipy", + "Identifier": { + "PURL": "pkg:pypi/scipy@1.17.1", + "UID": "6da23db394b4d466" + }, + "Version": "1.17.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "secretstorage@3.5.0", + "Name": "secretstorage", + "Identifier": { + "PURL": "pkg:pypi/secretstorage@3.5.0", + "UID": "2e188ea519c6e323" + }, + "Version": "3.5.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cryptography@46.0.6", + "jeepney@0.9.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "semantic-version@2.10.0", + "Name": "semantic-version", + "Identifier": { + "PURL": "pkg:pypi/semantic-version@2.10.0", + "UID": "a0e70289e8614640" + }, + "Version": "2.10.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "semchunk@3.2.5", + "Name": "semchunk", + "Identifier": { + "PURL": "pkg:pypi/semchunk@3.2.5", + "UID": "8532719ede8a6888" + }, + "Version": "3.2.5", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "mpire@2.10.2", + "tqdm@4.67.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "setuptools@81.0.0", + "Name": "setuptools", + "Identifier": { + "PURL": "pkg:pypi/setuptools@81.0.0", + "UID": "62efd152cc0bcb22" + }, + "Version": "81.0.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "shapely@2.1.2", + "Name": "shapely", + "Identifier": { + "PURL": "pkg:pypi/shapely@2.1.2", + "UID": "50b519f8a488cd1f" + }, + "Version": "2.1.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "shellingham@1.5.4", + "Name": "shellingham", + "Identifier": { + "PURL": "pkg:pypi/shellingham@1.5.4", + "UID": "7e72312c22e72a3" + }, + "Version": "1.5.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "simsimd@6.5.16", + "Name": "simsimd", + "Identifier": { + "PURL": "pkg:pypi/simsimd@6.5.16", + "UID": "52826999b71a6a6" + }, + "Version": "6.5.16", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "six@1.17.0", + "Name": "six", + "Identifier": { + "PURL": "pkg:pypi/six@1.17.0", + "UID": "ac79dc21f2d40ee4" + }, + "Version": "1.17.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "sniffio@1.3.1", + "Name": "sniffio", + "Identifier": { + "PURL": "pkg:pypi/sniffio@1.3.1", + "UID": "2b2bc555d7ea120" + }, + "Version": "1.3.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "soupsieve@2.8.3", + "Name": "soupsieve", + "Identifier": { + "PURL": "pkg:pypi/soupsieve@2.8.3", + "UID": "824234a21cd9210e" + }, + "Version": "2.8.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "sse-starlette@3.3.4", + "Name": "sse-starlette", + "Identifier": { + "PURL": "pkg:pypi/sse-starlette@3.3.4", + "UID": "6d5d1d8fcde1709f" + }, + "Version": "3.3.4", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "anyio@4.13.0", + "starlette@0.52.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "starlette@0.52.1", + "Name": "starlette", + "Identifier": { + "PURL": "pkg:pypi/starlette@0.52.1", + "UID": "bb0c8678769c3ad1" + }, + "Version": "0.52.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "anyio@4.13.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "stringzilla@4.6.0", + "Name": "stringzilla", + "Identifier": { + "PURL": "pkg:pypi/stringzilla@4.6.0", + "UID": "fcec02b7460a7904" + }, + "Version": "4.6.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "sympy@1.14.0", + "Name": "sympy", + "Identifier": { + "PURL": "pkg:pypi/sympy@1.14.0", + "UID": "9ff03251ae05c292" + }, + "Version": "1.14.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "mpmath@1.3.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tabulate@0.10.0", + "Name": "tabulate", + "Identifier": { + "PURL": "pkg:pypi/tabulate@0.10.0", + "UID": "c95cd1797c5936f6" + }, + "Version": "0.10.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "tenacity@9.1.4", + "Name": "tenacity", + "Identifier": { + "PURL": "pkg:pypi/tenacity@9.1.4", + "UID": "f43afc0974e70bf8" + }, + "Version": "9.1.4", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "termcolor@3.3.0", + "Name": "termcolor", + "Identifier": { + "PURL": "pkg:pypi/termcolor@3.3.0", + "UID": "88bdf909e3550c1d" + }, + "Version": "3.3.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "tifffile@2026.3.3", + "Name": "tifffile", + "Identifier": { + "PURL": "pkg:pypi/tifffile@2026.3.3", + "UID": "19e4e2fc557e7447" + }, + "Version": "2026.3.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tiktoken@0.12.0", + "Name": "tiktoken", + "Identifier": { + "PURL": "pkg:pypi/tiktoken@0.12.0", + "UID": "4d5f964d574a6210" + }, + "Version": "0.12.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "regex@2026.3.32", + "requests@2.33.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tinycss2@1.5.1", + "Name": "tinycss2", + "Identifier": { + "PURL": "pkg:pypi/tinycss2@1.5.1", + "UID": "5f5bf54206e20b33" + }, + "Version": "1.5.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "webencodings@0.5.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tinyhtml5@2.1.0", + "Name": "tinyhtml5", + "Identifier": { + "PURL": "pkg:pypi/tinyhtml5@2.1.0", + "UID": "4ca217870b9d5b8" + }, + "Version": "2.1.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "webencodings@0.5.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tokenizers@0.22.2", + "Name": "tokenizers", + "Identifier": { + "PURL": "pkg:pypi/tokenizers@0.22.2", + "UID": "68484714ca924d69" + }, + "Version": "0.22.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "huggingface-hub@0.36.2" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tomlkit@0.13.3", + "Name": "tomlkit", + "Identifier": { + "PURL": "pkg:pypi/tomlkit@0.13.3", + "UID": "ed1e60932d874d10" + }, + "Version": "0.13.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "torch@2.11.0", + "Name": "torch", + "Identifier": { + "PURL": "pkg:pypi/torch@2.11.0", + "UID": "55bda6c93dac8f8c" + }, + "Version": "2.11.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cuda-bindings@13.2.0", + "cuda-toolkit@13.0.2", + "filelock@3.25.2", + "fsspec@2026.3.0", + "jinja2@3.1.6", + "networkx@3.6.1", + "nvidia-cudnn-cu13@9.19.0.56", + "nvidia-cusparselt-cu13@0.8.0", + "nvidia-nccl-cu13@2.28.9", + "nvidia-nvshmem-cu13@3.4.5", + "setuptools@81.0.0", + "sympy@1.14.0", + "triton@3.6.0", + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "torchvision@0.26.0", + "Name": "torchvision", + "Identifier": { + "PURL": "pkg:pypi/torchvision@0.26.0", + "UID": "2c96da25e41cbdcc" + }, + "Version": "0.26.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "numpy@2.4.4", + "pillow@12.2.0", + "torch@2.11.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tqdm@4.67.3", + "Name": "tqdm", + "Identifier": { + "PURL": "pkg:pypi/tqdm@4.67.3", + "UID": "c66695e708b9f512" + }, + "Version": "4.67.3", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "colorama@0.4.6" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "transformers@4.57.6", + "Name": "transformers", + "Identifier": { + "PURL": "pkg:pypi/transformers@4.57.6", + "UID": "a15e4c4f47c5b56f" + }, + "Version": "4.57.6", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "filelock@3.25.2", + "huggingface-hub@0.36.2", + "numpy@2.4.4", + "packaging@26.0", + "pyyaml@6.0.3", + "regex@2026.3.32", + "requests@2.33.1", + "safetensors@0.7.0", + "tokenizers@0.22.2", + "tqdm@4.67.3" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tree-sitter@0.25.2", + "Name": "tree-sitter", + "Identifier": { + "PURL": "pkg:pypi/tree-sitter@0.25.2", + "UID": "c6f116d013ae171f" + }, + "Version": "0.25.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "tree-sitter-c@0.24.1", + "Name": "tree-sitter-c", + "Identifier": { + "PURL": "pkg:pypi/tree-sitter-c@0.24.1", + "UID": "f2e7ff812dfef3dd" + }, + "Version": "0.24.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "tree-sitter-javascript@0.25.0", + "Name": "tree-sitter-javascript", + "Identifier": { + "PURL": "pkg:pypi/tree-sitter-javascript@0.25.0", + "UID": "2cb94636d27c8b1d" + }, + "Version": "0.25.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "tree-sitter-python@0.25.0", + "Name": "tree-sitter-python", + "Identifier": { + "PURL": "pkg:pypi/tree-sitter-python@0.25.0", + "UID": "a5563e4980cd406e" + }, + "Version": "0.25.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "tree-sitter-typescript@0.23.2", + "Name": "tree-sitter-typescript", + "Identifier": { + "PURL": "pkg:pypi/tree-sitter-typescript@0.23.2", + "UID": "595454645d4b0be8" + }, + "Version": "0.23.2", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "triton@3.6.0", + "Name": "triton", + "Identifier": { + "PURL": "pkg:pypi/triton@3.6.0", + "UID": "4011f59186a45e3b" + }, + "Version": "3.6.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "typer@0.21.2", + "Name": "typer", + "Identifier": { + "PURL": "pkg:pypi/typer@0.21.2", + "UID": "eb519b8473fa4e50" + }, + "Version": "0.21.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "annotated-doc@0.0.4", + "click@8.3.1", + "rich@14.3.3", + "shellingham@1.5.4" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "typing-extensions@4.15.0", + "Name": "typing-extensions", + "Identifier": { + "PURL": "pkg:pypi/typing-extensions@4.15.0", + "UID": "67cbda23a41e6bb9" + }, + "Version": "4.15.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "typing-inspection@0.4.2", + "Name": "typing-inspection", + "Identifier": { + "PURL": "pkg:pypi/typing-inspection@0.4.2", + "UID": "e141c01a2a6a5097" + }, + "Version": "0.4.2", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "typing-extensions@4.15.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "tzdata@2025.3", + "Name": "tzdata", + "Identifier": { + "PURL": "pkg:pypi/tzdata@2025.3", + "UID": "1a9c1c1e17973a68" + }, + "Version": "2025.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "uncalled-for@0.2.0", + "Name": "uncalled-for", + "Identifier": { + "PURL": "pkg:pypi/uncalled-for@0.2.0", + "UID": "a678bf9724184b38" + }, + "Version": "0.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "urllib3@2.6.3", + "Name": "urllib3", + "Identifier": { + "PURL": "pkg:pypi/urllib3@2.6.3", + "UID": "a517307e92b05a4" + }, + "Version": "2.6.3", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "watchfiles@1.1.1", + "Name": "watchfiles", + "Identifier": { + "PURL": "pkg:pypi/watchfiles@1.1.1", + "UID": "8d6958da1a73155b" + }, + "Version": "1.1.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "anyio@4.13.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "wcwidth@0.6.0", + "Name": "wcwidth", + "Identifier": { + "PURL": "pkg:pypi/wcwidth@0.6.0", + "UID": "79a0993370dc6abd" + }, + "Version": "0.6.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "weasyprint@68.1", + "Name": "weasyprint", + "Identifier": { + "PURL": "pkg:pypi/weasyprint@68.1", + "UID": "a19c5a08f98e5f73" + }, + "Version": "68.1", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "cffi@2.0.0", + "cssselect2@0.9.0", + "fonttools@4.62.1", + "pillow@12.2.0", + "pydyf@0.12.1", + "pyphen@0.17.2", + "tinycss2@1.5.1", + "tinyhtml5@2.1.0" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "webencodings@0.5.1", + "Name": "webencodings", + "Identifier": { + "PURL": "pkg:pypi/webencodings@0.5.1", + "UID": "68511b951cc24266" + }, + "Version": "0.5.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "websockets@16.0", + "Name": "websockets", + "Identifier": { + "PURL": "pkg:pypi/websockets@16.0", + "UID": "2a16316553f5ea6c" + }, + "Version": "16.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "win32-setctime@1.2.0", + "Name": "win32-setctime", + "Identifier": { + "PURL": "pkg:pypi/win32-setctime@1.2.0", + "UID": "8cd7957ae8f8938d" + }, + "Version": "1.2.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "xlsxwriter@3.2.9", + "Name": "xlsxwriter", + "Identifier": { + "PURL": "pkg:pypi/xlsxwriter@3.2.9", + "UID": "93db8085a9c67f76" + }, + "Version": "3.2.9", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "yarl@1.23.0", + "Name": "yarl", + "Identifier": { + "PURL": "pkg:pypi/yarl@1.23.0", + "UID": "b299f1be18ad3fc9" + }, + "Version": "1.23.0", + "Indirect": true, + "Relationship": "indirect", + "DependsOn": [ + "idna@3.11", + "multidict@6.7.1", + "propcache@0.4.1" + ], + "AnalyzedBy": "uv" + }, + { + "ID": "zipp@3.23.0", + "Name": "zipp", + "Identifier": { + "PURL": "pkg:pypi/zipp@3.23.0", + "UID": "3aaf2c8c9213899c" + }, + "Version": "3.23.0", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + }, + { + "ID": "zopfli@0.4.1", + "Name": "zopfli", + "Identifier": { + "PURL": "pkg:pypi/zopfli@0.4.1", + "UID": "c3159c362bdeaae3" + }, + "Version": "0.4.1", + "Indirect": true, + "Relationship": "indirect", + "AnalyzedBy": "uv" + } + ] + }, + { + "Target": ".venv/lib/python3.14/site-packages/skimage/data/_fetchers.py", + "Class": "secret" + } + ] +} From b917fd63b8659db5013e66aa9a7013006e454c05 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 07:21:14 +0200 Subject: [PATCH 10/17] fix: Use asyncpg driver URL in Alembic env.py (BRIC-7) Alembic's async_engine_from_config requires postgresql+asyncpg:// URL. Previously get_url() was stripping +asyncpg, causing 'No module named psycopg2' error. --- src/alembic/env.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/alembic/env.py b/src/alembic/env.py index d375951..54696b1 100644 --- a/src/alembic/env.py +++ b/src/alembic/env.py @@ -19,9 +19,15 @@ def get_url() -> str: - """Build the database URL from application settings (sync driver for Alembic).""" + """Build the async database URL from application settings. + + Returns the asyncpg URL for async engine creation. + """ db_config = DatabaseConfig() - return db_config.DATABASE_URL.replace("+asyncpg", "") + url = db_config.DATABASE_URL + if "+asyncpg" not in url: + url = url.replace("postgresql://", "postgresql+asyncpg://") + return url def run_migrations_offline() -> None: From 9b7cbcda1cc3f07a56bab8fd2d25ab8b97a7aaa7 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 07:31:37 +0200 Subject: [PATCH 11/17] fix: Create chunks table in migration instead of ALTER TABLE (BRIC-7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The chunks table doesn't exist at startup — LightRAG uses lightrag_doc_chunks. BM25 adapter needs its own chunks table with tsvector column. Migration now creates the full table with IF NOT EXISTS instead of ALTER. --- src/alembic/versions/001_add_bm25_support.py | 33 ++++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index 2559bf3..7bb97f6 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -17,15 +17,31 @@ def upgrade() -> None: - """Add tsvector column, indexes, and trigger for BM25 search.""" - # Add tsvector column - op.execute("ALTER TABLE chunks ADD COLUMN IF NOT EXISTS content_tsv tsvector") + """Add BM25 chunks table with tsvector column, indexes, and trigger.""" + # Create chunks table (used by BM25 adapter for full-text search) + op.execute( + """ + CREATE TABLE IF NOT EXISTS chunks ( + chunk_id VARCHAR(255) PRIMARY KEY, + content TEXT NOT NULL, + file_path TEXT NOT NULL, + working_dir VARCHAR(512) NOT NULL, + metadata JSONB DEFAULT '{}', + content_tsv tsvector + ) + """ + ) # Create GIN index for tsvector op.execute( "CREATE INDEX IF NOT EXISTS idx_chunks_content_tsv ON chunks USING GIN(content_tsv)" ) + # Create index on working_dir for filtering + op.execute( + "CREATE INDEX IF NOT EXISTS idx_chunks_working_dir ON chunks(working_dir)" + ) + # Create BM25 index (conditional on pg_textsearch extension) op.execute( """ @@ -63,17 +79,8 @@ def upgrade() -> None: """ ) - # WARNING: This UPDATE scans the entire table. For tables with >100K rows, - # consider running as a separate manual batch operation instead. - op.execute( - "UPDATE chunks SET content_tsv = to_tsvector('english', COALESCE(content, '')) WHERE content_tsv IS NULL" - ) - def downgrade() -> None: """Remove BM25 support.""" - op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks") + op.execute("DROP TABLE IF EXISTS chunks") op.execute("DROP FUNCTION IF EXISTS update_chunks_tsv()") - op.execute("DROP INDEX IF EXISTS idx_chunks_bm25") - op.execute("DROP INDEX IF EXISTS idx_chunks_content_tsv") - op.execute("ALTER TABLE chunks DROP COLUMN IF EXISTS content_tsv") From e15d41855223fa0ab2cbb2347da13d5477ad7acb Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 07:35:00 +0200 Subject: [PATCH 12/17] fix: Use separate alembic version table for raganything (BRIC-7) Shared DB with composable-agents caused 'Can't locate revision 002' error. Use raganything_alembic_version table to isolate migration histories. Also fix: create chunks table in migration (table doesn't exist at startup). --- src/alembic/env.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/alembic/env.py b/src/alembic/env.py index 54696b1..5846416 100644 --- a/src/alembic/env.py +++ b/src/alembic/env.py @@ -18,6 +18,9 @@ target_metadata = None +VERSION_TABLE = "raganything_alembic_version" + + def get_url() -> str: """Build the async database URL from application settings. @@ -42,6 +45,7 @@ def run_migrations_offline() -> None: target_metadata=target_metadata, literal_binds=True, dialect_opts={"paramstyle": "named"}, + version_table=VERSION_TABLE, ) with context.begin_transaction(): @@ -50,7 +54,7 @@ def run_migrations_offline() -> None: def do_run_migrations(connection) -> None: """Run migrations within a synchronous connection callback.""" - context.configure(connection=connection, target_metadata=target_metadata) + context.configure(connection=connection, target_metadata=target_metadata, version_table=VERSION_TABLE) with context.begin_transaction(): context.run_migrations() From bc71c0c77a20ceddb3244584d13e1952a398e5e4 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 08:07:24 +0200 Subject: [PATCH 13/17] feat: Add pg_textsearch extension to migration and DB config (BRIC-7) - Add CREATE EXTENSION IF NOT EXISTS pg_textsearch to migration - Add BM25 index directly (no longer conditional) since extension is guaranteed - Add shared_preload_libraries=pg_textsearch to bricks-db in docker-compose - Drop pg_textsearch extension in downgrade --- src/alembic/versions/001_add_bm25_support.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index 7bb97f6..1c3c674 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -18,6 +18,9 @@ def upgrade() -> None: """Add BM25 chunks table with tsvector column, indexes, and trigger.""" + # Create pg_textsearch extension (requires shared_preload_libraries in postgresql.conf) + op.execute("CREATE EXTENSION IF NOT EXISTS pg_textsearch") + # Create chunks table (used by BM25 adapter for full-text search) op.execute( """ @@ -42,17 +45,12 @@ def upgrade() -> None: "CREATE INDEX IF NOT EXISTS idx_chunks_working_dir ON chunks(working_dir)" ) - # Create BM25 index (conditional on pg_textsearch extension) + # Create BM25 index using pg_textsearch op.execute( """ - DO $$ - BEGIN - IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pg_textsearch') THEN - CREATE INDEX IF NOT EXISTS idx_chunks_bm25 - ON chunks USING bm25(content) - WITH (text_config='english'); - END IF; - END $$; + CREATE INDEX IF NOT EXISTS idx_chunks_bm25 + ON chunks USING bm25(content) + WITH (text_config='english') """ ) @@ -84,3 +82,4 @@ def downgrade() -> None: """Remove BM25 support.""" op.execute("DROP TABLE IF EXISTS chunks") op.execute("DROP FUNCTION IF EXISTS update_chunks_tsv()") + op.execute("DROP EXTENSION IF EXISTS pg_textsearch") From 7eb474ea11e99f94fedd9079ff76beda06a50830 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 08:16:38 +0200 Subject: [PATCH 14/17] docs: Update README with BM25, hybrid+, Alembic migration docs (BRIC-7) - Add hybrid+ and bm25 query modes to documentation - Add BM25 configuration section - Add Database Migrations section with Alembic details - Document hybrid+ response format with RRF scoring - Add BM25 env variables to .env.example - Update project structure with new files --- .env.example | 5 ++ README.md | 214 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 178 insertions(+), 41 deletions(-) diff --git a/.env.example b/.env.example index 9e0bc5c..c1135d7 100644 --- a/.env.example +++ b/.env.example @@ -30,6 +30,11 @@ COSINE_THRESHOLD=0.2 MAX_CONCURRENT_FILES=1 MAX_WORKERS=1 +# BM25 Configuration +BM25_ENABLED=true +BM25_TEXT_CONFIG=english +BM25_RRF_K=60 + # Server Configuration MCP_TRANSPORT=sse ALLOWED_ORIGINS=["*"] diff --git a/README.md b/README.md index da20930..2261d99 100644 --- a/README.md +++ b/README.md @@ -7,42 +7,47 @@ Multi-modal RAG service exposing a REST API and MCP server for document indexing ``` Clients (REST / MCP / Claude) - | - +-----------------------+ - | FastAPI App | - +-----------+-----------+ | - +---------------+---------------+ - | | - Application Layer MCP Tools - +------------------------------+ (FastMCP) - | api/ | | - | indexing_routes.py | | - | query_routes.py | | - | health_routes.py | | - | use_cases/ | | - | IndexFileUseCase | | - | IndexFolderUseCase | | - | requests/ responses/ | | - +------------------------------+ | - | | | - v v v - Domain Layer (ports) - +--------------------------------------+ - | RAGEnginePort StoragePort | - +--------------------------------------+ - | | - v v - Infrastructure Layer (adapters) - +--------------------------------------+ - | LightRAGAdapter MinioAdapter | - | (RAGAnything) (minio-py) | - +--------------------------------------+ - | | - v v - PostgreSQL MinIO - (pgvector + (object - Apache AGE) storage) + +-----------------------+ + | FastAPI App | + +-----------+-----------+ + | + +---------------+---------------+ + | | + Application Layer MCP Tools + +------------------------------+ (FastMCP) + | api/ | | + | indexing_routes.py | | + | query_routes.py | | + | health_routes.py | | + | use_cases/ | | + | IndexFileUseCase | | + | IndexFolderUseCase | | + | QueryUseCase | | + | requests/ responses/ | | + +------------------------------+ | + | | | | + v v v v + Domain Layer (ports) + +------------------------------------------+ + | RAGEnginePort StoragePort BM25EnginePort| + +------------------------------------------+ + | | | + v v v + Infrastructure Layer (adapters) + +------------------------------------------+ + | LightRAGAdapter MinioAdapter | + | (RAGAnything) (minio-py) | + | | + | PostgresBM25Adapter RRFCombiner | + | (pg_textsearch) (hybrid+ fusion) | + +------------------------------------------+ + | | | + v v v + PostgreSQL MinIO + (pgvector + (object + Apache AGE storage) + pg_textsearch) ``` ## Prerequisites @@ -220,8 +225,97 @@ Response (`200 OK`): |-------|------|----------|---------|-------------| | `working_dir` | string | yes | -- | RAG workspace directory for this project | | `query` | string | yes | -- | The search query | -| `mode` | string | no | `"naive"` | Search mode (see Query Modes below) | -| `top_k` | integer | no | `10` | Number of chunks to retrieve | +| `mode` | string | no | `"naive"` | Search mode: `naive`, `local`, `global`, `hybrid`, `hybrid+`, `mix`, `bm25`, `bypass` | + +#### BM25 query mode + +Returns results ranked by PostgreSQL full-text search using `pg_textsearch`. Each chunk includes a `score` field with the BM25 relevance score. + +```bash +curl -X POST http://localhost:8000/api/v1/query \ + -H "Content-Type: application/json" \ + -d '{ + "working_dir": "project-alpha", + "query": "quarterly revenue growth", + "mode": "bm25", + "top_k": 10 + }' +``` + +Response (`200 OK`): + +```json +{ + "status": "success", + "message": "", + "data": { + "entities": [], + "relationships": [], + "chunks": [ + { + "chunk_id": "abc123", + "content": "Quarterly revenue grew 12% year-over-year...", + "file_path": "reports/financials-q4.pdf", + "score": 3.456, + "metadata": {} + } + ], + "references": [] + }, + "metadata": { + "query_mode": "bm25", + "total_results": 10 + } +} +``` + +#### Hybrid+ query mode + +Runs BM25 and vector search in parallel, then merges results using Reciprocal Rank Fusion (RRF). Each chunk includes `bm25_rank`, `vector_rank`, and `combined_score` fields. + +```bash +curl -X POST http://localhost:8000/api/v1/query \ + -H "Content-Type: application/json" \ + -d '{ + "working_dir": "project-alpha", + "query": "quarterly revenue growth", + "mode": "hybrid+", + "top_k": 10 + }' +``` + +Response (`200 OK`): + +```json +{ + "status": "success", + "message": "", + "data": { + "entities": [], + "relationships": [], + "chunks": [ + { + "chunk_id": "abc123", + "content": "Quarterly revenue grew 12% year-over-year...", + "file_path": "reports/financials-q4.pdf", + "score": 0.0328, + "bm25_rank": 1, + "vector_rank": 3, + "combined_score": 0.0328, + "metadata": {} + } + ], + "references": [] + }, + "metadata": { + "query_mode": "hybrid+", + "total_results": 10, + "rrf_k": 60 + } +} +``` + +The `combined_score` is the sum of `bm25_score` and `vector_score`, each computed as `1 / (k + rank)`. Results are sorted by `combined_score` descending. A chunk that appears in both result sets will have a higher combined score than one that appears in only one. ## MCP Server @@ -233,7 +327,7 @@ The MCP server is mounted at `/mcp` and exposes a single tool: `query_knowledge_ |-----------|------|---------|-------------| | `working_dir` | string | required | RAG workspace directory for this project | | `query` | string | required | The search query | -| `mode` | string | `"naive"` | Search mode: `naive`, `local`, `global`, `hybrid`, `mix`, `bypass` | +| `mode` | string | `"naive"` | Search mode: `naive`, `local`, `global`, `hybrid`, `hybrid+`, `mix`, `bm25`, `bypass` | | `top_k` | integer | `10` | Number of chunks to retrieve | ### Transport modes @@ -321,6 +415,16 @@ All configuration is via environment variables, loaded through Pydantic Settings | `ENABLE_TABLE_PROCESSING` | `true` | Process tables during indexing | | `ENABLE_EQUATION_PROCESSING` | `true` | Process equations during indexing | +### BM25 (`BM25Config`) + +| Variable | Default | Description | +|----------|---------|-------------| +| `BM25_ENABLED` | `true` | Enable BM25 full-text search | +| `BM25_TEXT_CONFIG` | `english` | PostgreSQL text search configuration | +| `BM25_RRF_K` | `60` | RRF constant K for hybrid search (must be >= 1) | + +When `BM25_ENABLED` is `false` or the pg_textsearch extension is not available, `hybrid+` mode falls back to `naive` (vector-only) and `bm25` mode returns an error. + ### MinIO (`MinioConfig`) | Variable | Default | Description | @@ -339,7 +443,9 @@ All configuration is via environment variables, loaded through Pydantic Settings | `local` | Entity-focused search using the knowledge graph | | `global` | Relationship-focused search across the knowledge graph | | `hybrid` | Combines local + global strategies | +| `hybrid+` | Parallel BM25 + vector search using Reciprocal Rank Fusion (RRF). Best of both worlds | | `mix` | Knowledge graph + vector chunks combined | +| `bm25` | BM25 full-text search only. PostgreSQL pg_textsearch | | `bypass` | Direct LLM query without retrieval | ## Development @@ -361,6 +467,22 @@ docker compose logs -f raganything-api # Follow API logs docker compose down -v # Stop and remove volumes ``` +## Database Migrations + +Alembic migrations run automatically at startup via the `db_lifespan` context manager in `main.py`. The migration state is tracked in the `raganything_alembic_version` table, which is separate from the `composable-agents` Alembic table to avoid conflicts. + +The initial migration (`001_add_bm25_support`) creates the `chunks` table with a `tsvector` column for full-text search, GIN and BM25 indexes, and an auto-update trigger. + +### Production requirements + +The PostgreSQL server must have the `pg_textsearch` extension installed and loaded. In production, this requires: + +1. **Dockerfile.db** builds a custom PostgreSQL image that compiles `pg_textsearch` from source (along with `pgvector` and `Apache AGE`). + +2. **docker-compose.yml** must configure `shared_preload_libraries=pg_textsearch` for the `bricks-db` service. The local dev `docker-compose.yml` in this repository includes this by default. + +3. The Alembic migration `001_add_bm25_support` will fail if `pg_textsearch` is not available. Ensure the database image is built from `Dockerfile.db` and the shared library is preloaded. + ## Project Structure ``` @@ -374,25 +496,35 @@ src/ ports/ rag_engine.py -- RAGEnginePort (abstract) storage_port.py -- StoragePort (abstract) + bm25_engine.py -- BM25EnginePort (abstract) application/ api/ health_routes.py -- GET /health indexing_routes.py -- POST /file/index, /folder/index - query_routes.py -- POST /query - mcp_tools.py -- MCP tool: query_knowledge_base + query_routes.py -- POST /query + mcp_tools.py -- MCP tool: query_knowledge_base requests/ indexing_request.py -- IndexFileRequest, IndexFolderRequest - query_request.py -- QueryRequest + query_request.py -- QueryRequest, QueryMode responses/ query_response.py -- QueryResponse, QueryDataResponse use_cases/ index_file_use_case.py -- Downloads from MinIO, indexes single file index_folder_use_case.py -- Downloads from MinIO, indexes folder + query_use_case.py -- Query with bm25/hybrid+ support infrastructure/ rag/ lightrag_adapter.py -- LightRAGAdapter (RAGAnything/LightRAG) storage/ minio_adapter.py -- MinioAdapter (minio-py client) + bm25/ + pg_textsearch_adapter.py -- PostgresBM25Adapter (pg_textsearch) + hybrid/ + rrf_combiner.py -- RRFCombiner (Reciprocal Rank Fusion) + alembic/ + env.py -- Alembic migration environment (async) + versions/ + 001_add_bm25_support.py -- BM25 table, indexes, triggers ``` ## License From 294630e8ac5f47b840f332b76dfdec43042039b0 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 11:50:16 +0200 Subject: [PATCH 15/17] fix: hybrid+ mode now correctly merges BM25 and vector results by chunk_id - RRF combiner now matches by chunk_id (database hash ID) instead of reference_id (per-file sequential number), fixing bm25_rank always being null in hybrid+ results - BM25 adapter queries lightrag_doc_chunks directly (no separate chunks table) - BM25 SQL uses to_bm25query(query, index_name) with GIN pre-filter for correctness - Added _make_workspace() to BM25 adapter matching LightRAGAdapter's workspace mapping - Alembic migration runs synchronously before uvicorn (fixes event loop deadlock) - Logging visible in Docker via custom LOG_CONFIG dict passed to both dictConfig and uvicorn - Empty folder indexing returns SUCCESS with 'No files found' instead of FAILED - file_extensions empty string coerced to None via BeforeValidator - HybridSearchResult now includes optional reference_id field from vector results - ChunkResponse.reference_id is now Optional (null for BM25-only results) - All 116 tests passing --- src/alembic/env.py | 6 +- src/alembic/versions/001_add_bm25_support.py | 49 ++- src/application/requests/indexing_request.py | 18 +- src/application/responses/query_response.py | 8 +- src/application/use_cases/query_use_case.py | 33 +- .../bm25/pg_textsearch_adapter.py | 119 +++---- src/infrastructure/hybrid/rrf_combiner.py | 17 +- src/infrastructure/rag/lightrag_adapter.py | 5 +- src/main.py | 60 ++-- .../bm25/test_pg_textsearch_adapter.py | 318 ++++++++++-------- .../hybrid/test_rrf_combiner.py | 91 +++++ tests/unit/test_lifespan.py | 51 +-- tests/unit/test_query_use_case.py | 9 +- 13 files changed, 465 insertions(+), 319 deletions(-) diff --git a/src/alembic/env.py b/src/alembic/env.py index 5846416..e12e0c7 100644 --- a/src/alembic/env.py +++ b/src/alembic/env.py @@ -54,7 +54,11 @@ def run_migrations_offline() -> None: def do_run_migrations(connection) -> None: """Run migrations within a synchronous connection callback.""" - context.configure(connection=connection, target_metadata=target_metadata, version_table=VERSION_TABLE) + context.configure( + connection=connection, + target_metadata=target_metadata, + version_table=VERSION_TABLE, + ) with context.begin_transaction(): context.run_migrations() diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index 1c3c674..9e51110 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -17,44 +17,25 @@ def upgrade() -> None: - """Add BM25 chunks table with tsvector column, indexes, and trigger.""" - # Create pg_textsearch extension (requires shared_preload_libraries in postgresql.conf) + """Add BM25 full-text search to lightrag_doc_chunks.""" op.execute("CREATE EXTENSION IF NOT EXISTS pg_textsearch") - # Create chunks table (used by BM25 adapter for full-text search) op.execute( - """ - CREATE TABLE IF NOT EXISTS chunks ( - chunk_id VARCHAR(255) PRIMARY KEY, - content TEXT NOT NULL, - file_path TEXT NOT NULL, - working_dir VARCHAR(512) NOT NULL, - metadata JSONB DEFAULT '{}', - content_tsv tsvector - ) - """ - ) - - # Create GIN index for tsvector - op.execute( - "CREATE INDEX IF NOT EXISTS idx_chunks_content_tsv ON chunks USING GIN(content_tsv)" + "ALTER TABLE lightrag_doc_chunks ADD COLUMN IF NOT EXISTS content_tsv tsvector" ) - # Create index on working_dir for filtering op.execute( - "CREATE INDEX IF NOT EXISTS idx_chunks_working_dir ON chunks(working_dir)" + "CREATE INDEX IF NOT EXISTS idx_lightrag_chunks_content_tsv ON lightrag_doc_chunks USING GIN(content_tsv)" ) - # Create BM25 index using pg_textsearch op.execute( """ - CREATE INDEX IF NOT EXISTS idx_chunks_bm25 - ON chunks USING bm25(content) + CREATE INDEX IF NOT EXISTS idx_lightrag_chunks_bm25 + ON lightrag_doc_chunks USING bm25(content) WITH (text_config='english') """ ) - # Create auto-update trigger function op.execute( """ CREATE OR REPLACE FUNCTION update_chunks_tsv() @@ -67,19 +48,29 @@ def upgrade() -> None: """ ) - # Create trigger - op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON chunks") + op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON lightrag_doc_chunks") op.execute( """ CREATE TRIGGER trg_chunks_content_tsv - BEFORE INSERT OR UPDATE ON chunks + BEFORE INSERT OR UPDATE ON lightrag_doc_chunks FOR EACH ROW EXECUTE FUNCTION update_chunks_tsv(); """ ) + # WARNING: This UPDATE scans the entire table. For tables with >100K rows, + # consider running as a separate manual batch operation instead. + op.execute( + "UPDATE lightrag_doc_chunks SET content_tsv = to_tsvector('english', COALESCE(content, '')) WHERE content_tsv IS NULL" + ) -def downgrade() -> None: - """Remove BM25 support.""" op.execute("DROP TABLE IF EXISTS chunks") + + +def downgrade() -> None: + """Remove BM25 support from lightrag_doc_chunks.""" + op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON lightrag_doc_chunks") op.execute("DROP FUNCTION IF EXISTS update_chunks_tsv()") + op.execute("DROP INDEX IF EXISTS idx_lightrag_chunks_bm25") + op.execute("DROP INDEX IF EXISTS idx_lightrag_chunks_content_tsv") + op.execute("ALTER TABLE lightrag_doc_chunks DROP COLUMN IF EXISTS content_tsv") op.execute("DROP EXTENSION IF EXISTS pg_textsearch") diff --git a/src/application/requests/indexing_request.py b/src/application/requests/indexing_request.py index 080f163..81c19da 100644 --- a/src/application/requests/indexing_request.py +++ b/src/application/requests/indexing_request.py @@ -1,4 +1,14 @@ -from pydantic import BaseModel, Field +from typing import Annotated + +from pydantic import BaseModel, BeforeValidator, Field + + +def _coerce_file_extensions(v: str | list[str] | None) -> list[str] | None: + if v is None or v == "": + return None + if isinstance(v, str): + return [v] + return v class IndexFileRequest(BaseModel): @@ -16,6 +26,6 @@ class IndexFolderRequest(BaseModel): recursive: bool = Field( default=True, description="Process subdirectories recursively" ) - file_extensions: list[str] | None = Field( - default=None, description="File extensions to filter" - ) + file_extensions: Annotated[ + list[str] | None, BeforeValidator(_coerce_file_extensions) + ] = Field(default=None, description="File extensions to filter") diff --git a/src/application/responses/query_response.py b/src/application/responses/query_response.py index b250ab1..d631907 100644 --- a/src/application/responses/query_response.py +++ b/src/application/responses/query_response.py @@ -22,10 +22,14 @@ class RelationshipResponse(BaseModel): class ChunkResponse(BaseModel): - reference_id: str + reference_id: str | None = None content: str file_path: str - chunk_id: str + chunk_id: str = "" + score: float | None = None + bm25_rank: int | None = None + vector_rank: int | None = None + combined_score: float | None = None class ReferenceResponse(BaseModel): diff --git a/src/application/use_cases/query_use_case.py b/src/application/use_cases/query_use_case.py index 0f10ad9..eb31b3b 100644 --- a/src/application/use_cases/query_use_case.py +++ b/src/application/use_cases/query_use_case.py @@ -98,7 +98,7 @@ async def execute( combined_results = self.rrf_combiner.combine( bm25_results=bm25_hits, - vector_results=vector_results, + vector_results=vector_results, # type: ignore[arg-type] top_k=top_k, ) @@ -109,23 +109,29 @@ async def execute( ) def _format_bm25_results(self, results: list) -> dict: - """Format BM25 results to match API response format.""" + """Format BM25 results with rank information.""" + chunks = [] + for rank, r in enumerate(results, start=1): + chunks.append( + { + "reference_id": r.chunk_id, + "content": r.content, + "file_path": r.file_path, + "chunk_id": r.chunk_id, + "score": r.score, + "bm25_rank": rank, + "vector_rank": None, + "combined_score": None, + "metadata": r.metadata, + } + ) return { "status": "success", "message": "", "data": { "entities": [], "relationships": [], - "chunks": [ - { - "chunk_id": r.chunk_id, - "content": r.content, - "file_path": r.file_path, - "score": r.score, - "metadata": r.metadata, - } - for r in results - ], + "chunks": chunks, "references": [], }, "metadata": { @@ -144,9 +150,10 @@ def _format_hybrid_results(self, results: list) -> dict: "relationships": [], "chunks": [ { - "chunk_id": r.chunk_id, + "reference_id": r.reference_id, "content": r.content, "file_path": r.file_path, + "chunk_id": r.chunk_id, "score": r.combined_score, "bm25_rank": r.bm25_rank, "vector_rank": r.vector_rank, diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py index 6449bb2..ba09c2e 100644 --- a/src/infrastructure/bm25/pg_textsearch_adapter.py +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -1,6 +1,7 @@ """PostgreSQL BM25 adapter using pg_textsearch extension.""" import asyncio +import hashlib import logging from typing import Any @@ -14,11 +15,8 @@ class PostgresBM25Adapter(BM25EnginePort): """PostgreSQL BM25 implementation using pg_textsearch. - Uses PostgreSQL native full-text search with tsvector/tsquery - and pg_textsearch extension for BM25-style ranking. - - The <@> operator returns negative scores (lower is better), - so we convert to positive for consistency. + Queries the lightrag_doc_chunks table directly, using the same + workspace mapping as LightRAGAdapter (_make_workspace). """ def __init__(self, db_url: str): @@ -26,6 +24,12 @@ def __init__(self, db_url: str): self._pool: asyncpg.Pool | None = None self._pool_lock = asyncio.Lock() + @staticmethod + def _make_workspace(working_dir: str) -> str: + """Map working_dir to lightrag_doc_chunks.workspace (same as LightRAGAdapter).""" + digest = hashlib.sha256(working_dir.encode()).hexdigest()[:16] + return f"ws_{digest}" + async def _get_pool(self) -> asyncpg.Pool: """Get or create database connection pool with double-checked locking.""" if self._pool is not None: @@ -65,37 +69,26 @@ async def search( working_dir: str, top_k: int = 10, ) -> list[BM25SearchResult]: - """Search using BM25 ranking. - - Uses pg_textsearch <@> operator for BM25 scoring. - Scores are negative (lower is better), converted to positive. - - Args: - query: Search query string - working_dir: Project/workspace directory - top_k: Number of results to return - - Returns: - List of BM25SearchResult ordered by relevance - """ + """Search using BM25 ranking on lightrag_doc_chunks.""" pool = await self._get_pool() + workspace = self._make_workspace(working_dir) try: async with pool.acquire() as conn: sql = """ SELECT - chunk_id, + id AS chunk_id, content, file_path, - content <@> websearch_to_tsquery('english', $1) as score, - metadata - FROM chunks - WHERE working_dir = $2 - AND content_tsv @@ websearch_to_tsquery('english', $1) + content <@> to_bm25query($1, 'idx_lightrag_chunks_bm25') as score + FROM lightrag_doc_chunks + WHERE workspace = $2 + AND content_tsv @@ plainto_tsquery('english', $1) + AND content <@> to_bm25query($1, 'idx_lightrag_chunks_bm25') < 0 ORDER BY score LIMIT $3 """ - results = await conn.fetch(sql, query, working_dir, top_k) + results = await conn.fetch(sql, query, workspace, top_k) return [ BM25SearchResult( @@ -103,12 +96,16 @@ async def search( content=row["content"], file_path=row["file_path"], score=abs(row["score"]), - metadata=row["metadata"] or {}, + metadata={}, ) for row in results ] except Exception as e: - logger.error("BM25 search failed: %s", e, extra={"query": query, "working_dir": working_dir}) + logger.error( + "BM25 search failed: %s", + e, + extra={"query": query, "working_dir": working_dir}, + ) raise async def index_document( @@ -119,75 +116,43 @@ async def index_document( working_dir: str, metadata: dict[str, Any] | None = None, ) -> None: - """Index document chunk. - - The tsvector column is auto-updated via trigger, - so we only need to INSERT/UPDATE the row. - - Args: - chunk_id: Unique chunk identifier - content: Text content to index - file_path: Path to source file - working_dir: Project/workspace directory - metadata: Optional metadata dictionary - """ - pool = await self._get_pool() - - try: - async with pool.acquire() as conn: - await conn.execute( - """ - INSERT INTO chunks (chunk_id, content, file_path, working_dir, metadata) - VALUES ($1, $2, $3, $4, $5) - ON CONFLICT (chunk_id) DO UPDATE SET - content = EXCLUDED.content, - file_path = EXCLUDED.file_path, - metadata = EXCLUDED.metadata - """, - chunk_id, - content, - file_path, - working_dir, - metadata or {}, - ) - except Exception as e: - logger.error("BM25 document indexing failed: %s", e, extra={"chunk_id": chunk_id}) - raise + """No-op: LightRAG owns the lightrag_doc_chunks table.""" + pass async def create_index(self, working_dir: str) -> None: - """Create BM25 index for workspace. - - The index is auto-updated via trigger; this method is for explicit re-indexing. - - Args: - working_dir: Project/workspace directory - """ + """Re-index tsvector for workspace chunks.""" pool = await self._get_pool() + workspace = self._make_workspace(working_dir) try: async with pool.acquire() as conn: await conn.execute( """ - UPDATE chunks - SET content_tsv = to_tsvector('english', content) - WHERE working_dir = $1 AND content_tsv IS NULL + UPDATE lightrag_doc_chunks + SET content_tsv = to_tsvector('english', COALESCE(content, '')) + WHERE workspace = $1 AND content_tsv IS NULL """, - working_dir, + workspace, ) except Exception as e: - logger.error("BM25 index creation failed: %s", e, extra={"working_dir": working_dir}) + logger.error( + "BM25 index creation failed: %s", e, extra={"working_dir": working_dir} + ) raise async def drop_index(self, working_dir: str) -> None: - """Drop BM25 index for workspace.""" + """Clear tsvector for workspace chunks.""" pool = await self._get_pool() + workspace = self._make_workspace(working_dir) try: async with pool.acquire() as conn: await conn.execute( - "UPDATE chunks SET content_tsv = NULL WHERE working_dir = $1", - working_dir, + "UPDATE lightrag_doc_chunks SET content_tsv = NULL WHERE workspace = $1", + workspace, ) except Exception as e: - logger.error("BM25 index drop failed: %s", e, extra={"working_dir": working_dir}) + logger.error( + "BM25 index drop failed: %s", e, extra={"working_dir": working_dir} + ) raise diff --git a/src/infrastructure/hybrid/rrf_combiner.py b/src/infrastructure/hybrid/rrf_combiner.py index db8ceae..9f57166 100644 --- a/src/infrastructure/hybrid/rrf_combiner.py +++ b/src/infrastructure/hybrid/rrf_combiner.py @@ -17,6 +17,7 @@ class HybridSearchResult: bm25_score: float combined_score: float metadata: dict[str, Any] + reference_id: str | None = None bm25_rank: int | None = None vector_rank: int | None = None @@ -42,6 +43,7 @@ def _add_bm25_result( "metadata": result.metadata, "bm25_score": 0.0, "vector_score": 0.0, + "reference_id": None, "bm25_rank": rank, "vector_rank": None, } @@ -52,9 +54,12 @@ def _add_bm25_result( def _add_vector_result( self, scores: dict[str, dict[str, Any]], rank: int, chunk: dict[str, Any] ) -> None: - chunk_id = chunk.get("reference_id") or chunk.get("chunk_id") - if chunk_id is None: + raw_chunk_id = chunk.get("chunk_id") + raw_ref_id = chunk.get("reference_id") + chunk_id = raw_chunk_id or raw_ref_id + if not chunk_id: return + reference_id = raw_ref_id if chunk_id not in scores: scores[chunk_id] = { "content": chunk.get("content", ""), @@ -62,12 +67,17 @@ def _add_vector_result( "metadata": chunk.get("metadata", {}), "bm25_score": 0.0, "vector_score": 0.0, + "reference_id": reference_id, "bm25_rank": None, "vector_rank": rank, } else: existing = scores[chunk_id]["vector_rank"] - scores[chunk_id]["vector_rank"] = min(existing, rank) if existing is not None else rank + scores[chunk_id]["vector_rank"] = ( + min(existing, rank) if existing is not None else rank + ) + if reference_id: + scores[chunk_id]["reference_id"] = reference_id actual_rank = scores[chunk_id]["vector_rank"] if actual_rank is not None: @@ -98,6 +108,7 @@ def combine( bm25_score=data["bm25_score"], combined_score=data["bm25_score"] + data["vector_score"], metadata=data["metadata"], + reference_id=data["reference_id"], bm25_rank=data["bm25_rank"], vector_rank=data["vector_rank"], ) diff --git a/src/infrastructure/rag/lightrag_adapter.py b/src/infrastructure/rag/lightrag_adapter.py index 386a301..6184af3 100644 --- a/src/infrastructure/rag/lightrag_adapter.py +++ b/src/infrastructure/rag/lightrag_adapter.py @@ -288,7 +288,10 @@ async def index_folder( processing_time_ms = (time.time() - start_time) * 1000 total = len(all_files) - if failed == 0 and succeeded > 0: + if total == 0: + status = IndexingStatus.SUCCESS + message = f"No files found in '{folder_path}'" + elif failed == 0 and succeeded > 0: status = IndexingStatus.SUCCESS message = f"Successfully indexed {succeeded} file(s) from '{folder_path}'" elif succeeded > 0 and failed > 0: diff --git a/src/main.py b/src/main.py index 8abb43e..d3071d4 100644 --- a/src/main.py +++ b/src/main.py @@ -1,7 +1,7 @@ """Main entry point for the RAGAnything API.""" -import asyncio import logging +import logging.config import threading from contextlib import asynccontextmanager from pathlib import Path @@ -18,13 +18,45 @@ from application.api.query_routes import query_router from dependencies import app_config, bm25_adapter +_LOG_FORMAT = "%(asctime)s %(levelname)-8s [%(name)s] %(message)s" + +LOG_CONFIG = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "standard": {"format": _LOG_FORMAT}, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "standard", + "stream": "ext://sys.stderr", + }, + }, + "loggers": { + "uvicorn": {"handlers": ["console"], "level": "INFO", "propagate": False}, + "uvicorn.error": {"handlers": ["console"], "level": "INFO", "propagate": False}, + "uvicorn.access": { + "handlers": ["console"], + "level": "INFO", + "propagate": False, + }, + }, + "root": { + "level": "INFO", + "handlers": ["console"], + }, +} + +logging.config.dictConfig(LOG_CONFIG) + logger = logging.getLogger(__name__) MCP_PATH = "/mcp" def _run_alembic_upgrade() -> None: - """Run Alembic migrations to head (called via asyncio.to_thread).""" + """Run Alembic migrations to head.""" alembic_dir = Path(__file__).parent cfg = Config(str(alembic_dir / "alembic.ini")) cfg.set_main_option("script_location", str(alembic_dir / "alembic")) @@ -33,24 +65,9 @@ def _run_alembic_upgrade() -> None: @asynccontextmanager async def db_lifespan(_app: FastAPI): - """Database migrations and cleanup lifecycle. - - - Runs Alembic migrations on startup - - Closes BM25 connection pool on shutdown - """ - logger.info("Application startup initiated") - - try: - logger.info("Running database migrations...") - await asyncio.to_thread(_run_alembic_upgrade) - logger.info("Database migrations completed") - except Exception: - logger.exception("Failed to run migrations — refusing to start") - raise - + """Closes BM25 connection pool on shutdown.""" yield - # Cleanup on shutdown logger.info("Application shutdown initiated") if bm25_adapter is not None: try: @@ -97,12 +114,17 @@ async def combined_lifespan(app: FastAPI): def run_fastapi(): """Run FastAPI server with uvicorn.""" + logger.info("Running database migrations...") + _run_alembic_upgrade() + logger.info("Database migrations completed") + uvicorn.run( app, host=app_config.HOST, port=app_config.PORT, log_level=app_config.UVICORN_LOG_LEVEL, - access_log=False, + log_config=LOG_CONFIG, + access_log=True, ws="none", ) diff --git a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py index a363332..02bd78d 100644 --- a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py +++ b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py @@ -22,146 +22,198 @@ def mock_connection(): return conn -@pytest.mark.asyncio -async def test_search_returns_results(mock_pool, mock_connection): - """Search should return BM25SearchResult list.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool - - # Mock the pool.acquire context manager - mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) - - # Mock database response with negative scores (pg_textsearch returns negative) - mock_connection.fetch.return_value = [ - { - "chunk_id": "123", - "content": "PostgreSQL database system", - "file_path": "/doc.pdf", - "score": -2.345, - "metadata": {"page": 1}, - } - ] - - results = await adapter.search("PostgreSQL", "workspace1", top_k=5) - - assert len(results) == 1 - assert results[0].chunk_id == "123" - assert results[0].content == "PostgreSQL database system" - assert results[0].file_path == "/doc.pdf" - assert results[0].score == 2.345 # Negative converted to positive - assert results[0].metadata == {"page": 1} - - -@pytest.mark.asyncio -async def test_search_converts_negative_scores(mock_pool, mock_connection): - """Search should convert negative BM25 scores to positive.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool - mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) - - mock_connection.fetch.return_value = [ - { - "chunk_id": "1", - "content": "test", - "file_path": "/t.pdf", - "score": -5.0, - "metadata": {}, - } - ] - - results = await adapter.search("test", "ws", top_k=10) - - assert results[0].score == 5.0 # Negative converted to positive - - -@pytest.mark.asyncio -async def test_search_with_no_results(mock_pool, mock_connection): - """Search should return empty list when no matches.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool - mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) - - mock_connection.fetch.return_value = [] - - results = await adapter.search("nonexistent", "workspace1", top_k=10) - - assert results == [] - - -@pytest.mark.asyncio -async def test_index_document_executes_correct_sql(mock_pool, mock_connection): - """Index document should execute correct INSERT/UPDATE SQL.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool - mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) - - await adapter.index_document( - chunk_id="123", - content="test content", - file_path="/doc.pdf", - working_dir="workspace1", - metadata={"page": 1}, - ) - - # Verify SQL was executed - mock_connection.execute.assert_called_once() - call_args = mock_connection.execute.call_args[0] - sql = call_args[0] - assert "INSERT INTO chunks" in sql or "UPDATE chunks" in sql - - -@pytest.mark.asyncio -async def test_create_index_executes_correct_sql(mock_pool, mock_connection): - """Create index should execute correct SQL.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool - mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__exit__ = AsyncMock(return_value=None) - - await adapter.create_index("workspace1") - - mock_connection.execute.assert_called() - +def _acquire_mock(conn): + """Helper to create an async context manager for pool.acquire().""" + cm = AsyncMock() + cm.__aenter__ = AsyncMock(return_value=conn) + cm.__aexit__ = AsyncMock(return_value=None) + return cm + + +class TestMakeWorkspace: + """Tests for _make_workspace static method.""" + + def test_make_workspace_produces_ws_prefix(self): + """Should produce workspace with ws_ prefix.""" + result = PostgresBM25Adapter._make_workspace( + "36ecc1eb-dead-4000-beef-1234567890ab" + ) + assert result.startswith("ws_") + + def test_make_workspace_is_deterministic(self): + """Same input should always produce same workspace.""" + result1 = PostgresBM25Adapter._make_workspace("test-working-dir") + result2 = PostgresBM25Adapter._make_workspace("test-working-dir") + assert result1 == result2 + + def test_make_workspace_different_inputs_different_outputs(self): + """Different inputs should produce different workspaces.""" + result1 = PostgresBM25Adapter._make_workspace("dir-a") + result2 = PostgresBM25Adapter._make_workspace("dir-b") + assert result1 != result2 + + +class TestSearch: + @pytest.mark.asyncio + async def test_search_returns_results(self, mock_pool, mock_connection): + """Search should return BM25SearchResult list.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + mock_connection.fetch.return_value = [ + { + "chunk_id": "123", + "content": "PostgreSQL database system", + "file_path": "/doc.pdf", + "score": -2.345, + } + ] + + results = await adapter.search("PostgreSQL", "workspace1", top_k=5) + + assert len(results) == 1 + assert results[0].chunk_id == "123" + assert results[0].content == "PostgreSQL database system" + assert results[0].file_path == "/doc.pdf" + assert results[0].score == 2.345 + + @pytest.mark.asyncio + async def test_search_converts_negative_scores(self, mock_pool, mock_connection): + """Search should convert negative BM25 scores to positive.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + mock_connection.fetch.return_value = [ + { + "chunk_id": "1", + "content": "test", + "file_path": "/t.pdf", + "score": -5.0, + } + ] + + results = await adapter.search("test", "ws", top_k=10) + assert results[0].score == 5.0 + + @pytest.mark.asyncio + async def test_search_with_no_results(self, mock_pool, mock_connection): + """Search should return empty list when no matches.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + mock_connection.fetch.return_value = [] + + results = await adapter.search("nonexistent", "workspace1", top_k=10) + assert results == [] + + @pytest.mark.asyncio + async def test_search_queries_lightrag_doc_chunks(self, mock_pool, mock_connection): + """Search should query lightrag_doc_chunks with workspace mapping.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + mock_connection.fetch.return_value = [] + + await adapter.search("test query", "some-working-dir", top_k=5) + + sql = mock_connection.fetch.call_args[0][0] + assert "lightrag_doc_chunks" in sql + assert "workspace" in sql + + workspace_arg = mock_connection.fetch.call_args[0][2] + assert workspace_arg == PostgresBM25Adapter._make_workspace("some-working-dir") + + +class TestIndexDocument: + @pytest.mark.asyncio + async def test_index_document_is_noop(self, mock_pool, mock_connection): + """Index document should be a no-op since LightRAG owns the table.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + + await adapter.index_document( + chunk_id="123", + content="test content", + file_path="/doc.pdf", + working_dir="workspace1", + metadata={"page": 1}, + ) + + mock_pool.acquire.assert_not_called() + mock_connection.execute.assert_not_called() + + +class TestCreateIndex: + @pytest.mark.asyncio + async def test_create_index_updates_lightrag_doc_chunks( + self, mock_pool, mock_connection + ): + """Create index should update lightrag_doc_chunks tsvector.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + await adapter.create_index("some-working-dir") + + mock_connection.execute.assert_called_once() + sql = mock_connection.execute.call_args[0][0] + assert "lightrag_doc_chunks" in sql + assert "content_tsv" in sql + + workspace_arg = mock_connection.execute.call_args[0][1] + assert workspace_arg == PostgresBM25Adapter._make_workspace("some-working-dir") + + +class TestDropIndex: + @pytest.mark.asyncio + async def test_drop_index_clears_tsvector(self, mock_pool, mock_connection): + """Drop index should clear tsvector for workspace.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + await adapter.drop_index("workspace1") + + mock_connection.execute.assert_called_once() + sql = mock_connection.execute.call_args[0][0] + assert "lightrag_doc_chunks" in sql + assert "content_tsv = NULL" in sql -@pytest.mark.asyncio -async def test_drop_index_clears_tsvector(mock_pool, mock_connection): - """Drop index should clear tsvector for workspace.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool - mock_pool.acquire.return_value.__aenter__ = AsyncMock(return_value=mock_connection) - mock_pool.acquire.return_value.__aexit__ = AsyncMock(return_value=None) + @pytest.mark.asyncio + async def test_drop_index_uses_workspace_mapping(self, mock_pool, mock_connection): + """Drop index should map working_dir to workspace.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) - await adapter.drop_index("workspace1") + await adapter.drop_index("my-working-dir") - # Verify SQL was executed - mock_connection.execute.assert_called_once() - call_args = mock_connection.execute.call_args[0] - assert "UPDATE chunks" in call_args[0] - assert "content_tsv = NULL" in call_args[0] + workspace_arg = mock_connection.execute.call_args[0][1] + assert workspace_arg == PostgresBM25Adapter._make_workspace("my-working-dir") -@pytest.mark.asyncio -async def test_close_closes_pool(mock_pool): - """Close should close connection pool.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = mock_pool +class TestClose: + @pytest.mark.asyncio + async def test_close_closes_pool(self, mock_pool): + """Close should close connection pool.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = mock_pool - await adapter.close() + await adapter.close() - mock_pool.close.assert_called_once() - assert adapter._pool is None + mock_pool.close.assert_called_once() + assert adapter._pool is None + @pytest.mark.asyncio + async def test_close_with_no_pool(self): + """Close should handle None pool gracefully.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter._pool = None -@pytest.mark.asyncio -async def test_close_with_no_pool(): - """Close should handle None pool gracefully.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") - adapter._pool = None + await adapter.close() - await adapter.close() - - assert adapter._pool is None + assert adapter._pool is None diff --git a/tests/infrastructure/hybrid/test_rrf_combiner.py b/tests/infrastructure/hybrid/test_rrf_combiner.py index a430bab..f0cc270 100644 --- a/tests/infrastructure/hybrid/test_rrf_combiner.py +++ b/tests/infrastructure/hybrid/test_rrf_combiner.py @@ -221,3 +221,94 @@ def test_combine_only_vector_results(): assert combined[0].chunk_id == "1" assert combined[0].bm25_score == 0 assert combined[0].vector_score > 0 + + +def test_combine_uses_chunk_id_not_reference_id(): + """RRF should match by chunk_id, not reference_id. + + Vector results include both chunk_id (e.g. 'chunk-abc123') and + reference_id (e.g. '1'). BM25 results use the same chunk_id. + The combiner must match by chunk_id so overlapping results merge. + """ + combiner = RRFCombiner(k=60) + + bm25_results = [ + BM25SearchResult( + chunk_id="chunk-abc123", + content="shared result", + file_path="/doc.pdf", + score=5.0, + metadata={}, + ), + ] + + vector_results = { + "data": { + "chunks": [ + { + "chunk_id": "chunk-abc123", + "reference_id": "1", + "content": "shared result", + "file_path": "/doc.pdf", + }, + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + assert len(combined) == 1 + assert combined[0].chunk_id == "chunk-abc123" + assert combined[0].bm25_rank == 1 + assert combined[0].vector_rank == 1 + assert combined[0].reference_id == "1" + assert combined[0].combined_score == 1 / (60 + 1) + 1 / (60 + 1) + + +def test_combine_preserves_reference_id_from_vector(): + """RRF should preserve reference_id from vector results.""" + combiner = RRFCombiner() + + bm25_results = [] + + vector_results = { + "data": { + "chunks": [ + { + "chunk_id": "chunk-xyz", + "reference_id": "3", + "content": "Vector result", + "file_path": "/c.pdf", + }, + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + assert len(combined) == 1 + assert combined[0].reference_id == "3" + + +def test_combine_no_chunk_id_falls_back_to_reference_id(): + """If vector results lack chunk_id, use reference_id as fallback.""" + combiner = RRFCombiner() + + bm25_results = [] + + vector_results = { + "data": { + "chunks": [ + { + "reference_id": "5", + "content": "Old format vector result", + "file_path": "/d.pdf", + }, + ] + } + } + + combined = combiner.combine(bm25_results, vector_results, top_k=10) + + assert len(combined) == 1 + assert combined[0].chunk_id == "5" diff --git a/tests/unit/test_lifespan.py b/tests/unit/test_lifespan.py index cd89212..f59b9e7 100644 --- a/tests/unit/test_lifespan.py +++ b/tests/unit/test_lifespan.py @@ -8,22 +8,6 @@ class TestLifespan: """Tests for lifespan context managers in main.py.""" - @pytest.mark.asyncio - async def test_db_lifespan_runs_migrations_on_startup(self): - """Should run Alembic migrations on startup.""" - from main import db_lifespan - - mock_app = MagicMock() - - with ( - patch("main.bm25_adapter", None), - patch("main.asyncio.to_thread") as mock_to_thread, - ): - mock_to_thread.return_value = None - async with db_lifespan(mock_app): - pass - mock_to_thread.assert_called_once() - @pytest.mark.asyncio async def test_db_lifespan_closes_bm25_pool_on_shutdown(self): """Should close BM25 adapter connection pool on shutdown.""" @@ -32,7 +16,7 @@ async def test_db_lifespan_closes_bm25_pool_on_shutdown(self): mock_app = MagicMock() mock_bm25 = AsyncMock() - with patch("main.bm25_adapter", mock_bm25), patch("main.asyncio.to_thread"): + with patch("main.bm25_adapter", mock_bm25): async with db_lifespan(mock_app): pass mock_bm25.close.assert_called_once() @@ -44,27 +28,10 @@ async def test_db_lifespan_handles_no_bm25_adapter(self): mock_app = MagicMock() - with patch("main.bm25_adapter", None), patch("main.asyncio.to_thread"): + with patch("main.bm25_adapter", None): async with db_lifespan(mock_app): pass - @pytest.mark.asyncio - async def test_db_lifespan_raises_on_migration_failure(self): - """Should raise if migrations fail — refusing to start with broken schema.""" - from main import db_lifespan - - mock_app = MagicMock() - - with ( - patch("main.bm25_adapter", None), - patch("main.asyncio.to_thread") as mock_to_thread, - ): - mock_to_thread.side_effect = Exception("Migration failed") - with pytest.raises(Exception, match="Migration failed"): - async with db_lifespan(mock_app): - pass - mock_to_thread.assert_called_once() - @pytest.mark.asyncio async def test_db_lifespan_handles_close_failure(self): """Should not crash if BM25 close fails.""" @@ -74,7 +41,7 @@ async def test_db_lifespan_handles_close_failure(self): mock_bm25 = AsyncMock() mock_bm25.close = AsyncMock(side_effect=Exception("Close failed")) - with patch("main.bm25_adapter", mock_bm25), patch("main.asyncio.to_thread"): + with patch("main.bm25_adapter", mock_bm25): async with db_lifespan(mock_app): pass mock_bm25.close.assert_called_once() @@ -92,3 +59,15 @@ async def test_run_alembic_upgrade_calls_command(self): _run_alembic_upgrade() mock_upgrade.assert_called_once_with(mock_cfg, "head") + + def test_run_fastapi_runs_migrations_before_uvicorn(self): + """Should run migrations synchronously before starting uvicorn.""" + with ( + patch("main._run_alembic_upgrade") as mock_migrate, + patch("main.uvicorn.run") as mock_uvicorn, + ): + from main import run_fastapi + + run_fastapi() + mock_migrate.assert_called_once() + mock_uvicorn.assert_called_once() diff --git a/tests/unit/test_query_use_case.py b/tests/unit/test_query_use_case.py index 44f6e62..3679425 100644 --- a/tests/unit/test_query_use_case.py +++ b/tests/unit/test_query_use_case.py @@ -108,7 +108,7 @@ async def test_execute_hybrid_plus_with_bm25( mock_bm25 = AsyncMock() mock_bm25.search.return_value = [ BM25SearchResult( - chunk_id="1", + chunk_id="chunk-abc123", content="bm25 result", file_path="/a.pdf", score=5.0, @@ -119,6 +119,7 @@ async def test_execute_hybrid_plus_with_bm25( "data": { "chunks": [ { + "chunk_id": "chunk-abc123", "reference_id": "2", "content": "vector result", "file_path": "/b.pdf", @@ -137,6 +138,12 @@ async def test_execute_hybrid_plus_with_bm25( assert result["status"] == "success" assert result["metadata"]["query_mode"] == "hybrid+" + chunk = result["data"]["chunks"][0] + assert chunk["chunk_id"] == "chunk-abc123" + assert chunk["bm25_rank"] == 1 + assert chunk["vector_rank"] == 1 + assert chunk["reference_id"] == "2" + async def test_execute_hybrid_plus_without_bm25_falls_back( self, mock_rag_engine: AsyncMock ) -> None: From 0791dcfbb2789ff6a715ca5276ac528430070c3a Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 12:26:36 +0200 Subject: [PATCH 16/17] fix: use configurable French text_config for BM25 search - BM25 adapter now accepts text_config parameter (default: english, env: BM25_TEXT_CONFIG) - Creates text-config-specific BM25 index (e.g. idx_lightrag_chunks_bm25_french) - Auto-rebuilds content_tsv and trigger function when text_config changes - Removed GIN tsvector pre-filter (was too strict with AND matching for multi-word queries) - BM25 ranking via to_bm25query handles relevance scoring directly - Updated .env.raganything-api to BM25_TEXT_CONFIG=french - All 121 tests passing --- src/alembic/versions/001_add_bm25_support.py | 13 +-- src/dependencies.py | 3 +- .../bm25/pg_textsearch_adapter.py | 102 ++++++++++++++++-- .../bm25/test_pg_textsearch_adapter.py | 37 ++++++- 4 files changed, 134 insertions(+), 21 deletions(-) diff --git a/src/alembic/versions/001_add_bm25_support.py b/src/alembic/versions/001_add_bm25_support.py index 9e51110..d25aa2f 100644 --- a/src/alembic/versions/001_add_bm25_support.py +++ b/src/alembic/versions/001_add_bm25_support.py @@ -28,14 +28,6 @@ def upgrade() -> None: "CREATE INDEX IF NOT EXISTS idx_lightrag_chunks_content_tsv ON lightrag_doc_chunks USING GIN(content_tsv)" ) - op.execute( - """ - CREATE INDEX IF NOT EXISTS idx_lightrag_chunks_bm25 - ON lightrag_doc_chunks USING bm25(content) - WITH (text_config='english') - """ - ) - op.execute( """ CREATE OR REPLACE FUNCTION update_chunks_tsv() @@ -57,8 +49,6 @@ def upgrade() -> None: """ ) - # WARNING: This UPDATE scans the entire table. For tables with >100K rows, - # consider running as a separate manual batch operation instead. op.execute( "UPDATE lightrag_doc_chunks SET content_tsv = to_tsvector('english', COALESCE(content, '')) WHERE content_tsv IS NULL" ) @@ -70,7 +60,8 @@ def downgrade() -> None: """Remove BM25 support from lightrag_doc_chunks.""" op.execute("DROP TRIGGER IF EXISTS trg_chunks_content_tsv ON lightrag_doc_chunks") op.execute("DROP FUNCTION IF EXISTS update_chunks_tsv()") - op.execute("DROP INDEX IF EXISTS idx_lightrag_chunks_bm25") + for suffix in ("english", "french"): + op.execute(f"DROP INDEX IF EXISTS idx_lightrag_chunks_bm25_{suffix}") op.execute("DROP INDEX IF EXISTS idx_lightrag_chunks_content_tsv") op.execute("ALTER TABLE lightrag_doc_chunks DROP COLUMN IF EXISTS content_tsv") op.execute("DROP EXTENSION IF EXISTS pg_textsearch") diff --git a/src/dependencies.py b/src/dependencies.py index ad9115a..b311ef9 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -40,7 +40,8 @@ if bm25_config.BM25_ENABLED: try: bm25_adapter = PostgresBM25Adapter( - db_url=db_config.DATABASE_URL.replace("+asyncpg", "") + db_url=db_config.DATABASE_URL.replace("+asyncpg", ""), + text_config=bm25_config.BM25_TEXT_CONFIG, ) except Exception as e: print(f"WARNING: BM25 adapter initialization failed: {e}") diff --git a/src/infrastructure/bm25/pg_textsearch_adapter.py b/src/infrastructure/bm25/pg_textsearch_adapter.py index ba09c2e..905915a 100644 --- a/src/infrastructure/bm25/pg_textsearch_adapter.py +++ b/src/infrastructure/bm25/pg_textsearch_adapter.py @@ -19,11 +19,18 @@ class PostgresBM25Adapter(BM25EnginePort): workspace mapping as LightRAGAdapter (_make_workspace). """ - def __init__(self, db_url: str): + _BM25_INDEX_PREFIX = "idx_lightrag_chunks_bm25" + + def __init__(self, db_url: str, text_config: str = "english"): self.db_url = db_url + self.text_config = text_config self._pool: asyncpg.Pool | None = None self._pool_lock = asyncio.Lock() + @property + def bm25_index_name(self) -> str: + return f"{self._BM25_INDEX_PREFIX}_{self.text_config}" + @staticmethod def _make_workspace(working_dir: str) -> str: """Map working_dir to lightrag_doc_chunks.workspace (same as LightRAGAdapter).""" @@ -54,8 +61,85 @@ async def _check_extension(self) -> None: "BM25 ranking <@> operator will not work. " "Run: CREATE EXTENSION pg_textsearch;" ) + return except Exception as e: logger.warning("Could not check pg_textsearch extension: %s", e) + return + + await self._ensure_bm25_index(conn) + await self._rebuild_tsv_if_config_changed(conn) + + async def _ensure_bm25_index(self, conn) -> None: + """Create or recreate the BM25 index for the configured text_config. + + Drops any stale BM25 index from a different text_config. + """ + index_name = self.bm25_index_name + try: + existing = await conn.fetchval( + "SELECT indexname FROM pg_indexes WHERE indexname = $1", + index_name, + ) + if existing: + logger.info( + "BM25 index '%s' already exists for text_config='%s'", + index_name, + self.text_config, + ) + return + + for suffix in ("english", "french"): + stale = f"{self._BM25_INDEX_PREFIX}_{suffix}" + if stale != index_name: + await conn.execute(f"DROP INDEX IF EXISTS {stale}") + + await conn.execute( + f""" + CREATE INDEX {index_name} + ON lightrag_doc_chunks USING bm25(content) + WITH (text_config='{self.text_config}') + """ + ) + logger.info( + "Created BM25 index '%s' with text_config='%s'", + index_name, + self.text_config, + ) + except Exception as e: + logger.error("Failed to ensure BM25 index: %s", e) + + async def _rebuild_tsv_if_config_changed(self, conn) -> None: + """Rebuild content_tsv if trigger function uses a different text_config.""" + try: + func_def = await conn.fetchval( + "SELECT prosrc FROM pg_proc WHERE proname = 'update_chunks_tsv'" + ) + if func_def and f"'{self.text_config}'" not in func_def: + logger.info( + "Updating trigger function from old text_config to '%s'", + self.text_config, + ) + await conn.execute( + f""" + CREATE OR REPLACE FUNCTION update_chunks_tsv() + RETURNS TRIGGER AS $$ + BEGIN + NEW.content_tsv := to_tsvector('{self.text_config}', COALESCE(NEW.content, '')); + RETURN NEW; + END; + $$ LANGUAGE plpgsql; + """ + ) + status = await conn.execute( + f""" + UPDATE lightrag_doc_chunks + SET content_tsv = to_tsvector('{self.text_config}', COALESCE(content, '')) + WHERE content_tsv IS NOT NULL + """ + ) + logger.info("Rebuilt content_tsv: %s with text_config='%s'", status, self.text_config) + except Exception as e: + logger.warning("Could not check/rebuild trigger function: %s", e) async def close(self) -> None: """Close connection pool on shutdown.""" @@ -72,6 +156,7 @@ async def search( """Search using BM25 ranking on lightrag_doc_chunks.""" pool = await self._get_pool() workspace = self._make_workspace(working_dir) + bm25_index = f"idx_lightrag_chunks_bm25_{self.text_config}" try: async with pool.acquire() as conn: @@ -80,15 +165,16 @@ async def search( id AS chunk_id, content, file_path, - content <@> to_bm25query($1, 'idx_lightrag_chunks_bm25') as score + content <@> to_bm25query($1, $3) as score FROM lightrag_doc_chunks WHERE workspace = $2 - AND content_tsv @@ plainto_tsquery('english', $1) - AND content <@> to_bm25query($1, 'idx_lightrag_chunks_bm25') < 0 + AND content <@> to_bm25query($1, $3) < 0 ORDER BY score - LIMIT $3 + LIMIT $4 """ - results = await conn.fetch(sql, query, workspace, top_k) + results = await conn.fetch( + sql, query, workspace, bm25_index, top_k + ) return [ BM25SearchResult( @@ -127,9 +213,9 @@ async def create_index(self, working_dir: str) -> None: try: async with pool.acquire() as conn: await conn.execute( - """ + f""" UPDATE lightrag_doc_chunks - SET content_tsv = to_tsvector('english', COALESCE(content, '')) + SET content_tsv = to_tsvector('{self.text_config}', COALESCE(content, '')) WHERE workspace = $1 AND content_tsv IS NULL """, workspace, diff --git a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py index 02bd78d..5dbe999 100644 --- a/tests/infrastructure/bm25/test_pg_textsearch_adapter.py +++ b/tests/infrastructure/bm25/test_pg_textsearch_adapter.py @@ -53,6 +53,26 @@ def test_make_workspace_different_inputs_different_outputs(self): assert result1 != result2 +class TestTextConfig: + """Tests for text_config and BM25 index naming.""" + + def test_default_text_config_is_english(self): + adapter = PostgresBM25Adapter(db_url="postgresql://test") + assert adapter.text_config == "english" + + def test_custom_text_config(self): + adapter = PostgresBM25Adapter(db_url="postgresql://test", text_config="french") + assert adapter.text_config == "french" + + def test_bm25_index_name_includes_text_config(self): + adapter = PostgresBM25Adapter(db_url="postgresql://test", text_config="french") + assert adapter.bm25_index_name == "idx_lightrag_chunks_bm25_french" + + def test_bm25_index_name_english(self): + adapter = PostgresBM25Adapter(db_url="postgresql://test", text_config="english") + assert adapter.bm25_index_name == "idx_lightrag_chunks_bm25_english" + + class TestSearch: @pytest.mark.asyncio async def test_search_returns_results(self, mock_pool, mock_connection): @@ -127,6 +147,20 @@ async def test_search_queries_lightrag_doc_chunks(self, mock_pool, mock_connecti workspace_arg = mock_connection.fetch.call_args[0][2] assert workspace_arg == PostgresBM25Adapter._make_workspace("some-working-dir") + @pytest.mark.asyncio + async def test_search_uses_bm25_index_with_text_config(self, mock_pool, mock_connection): + """Search should use text_config-specific BM25 index.""" + adapter = PostgresBM25Adapter(db_url="postgresql://test", text_config="french") + adapter._pool = mock_pool + mock_pool.acquire.return_value = _acquire_mock(mock_connection) + + mock_connection.fetch.return_value = [] + + await adapter.search("test query", "some-working-dir", top_k=5) + + bm25_index_arg = mock_connection.fetch.call_args[0][3] + assert bm25_index_arg == "idx_lightrag_chunks_bm25_french" + class TestIndexDocument: @pytest.mark.asyncio @@ -153,7 +187,7 @@ async def test_create_index_updates_lightrag_doc_chunks( self, mock_pool, mock_connection ): """Create index should update lightrag_doc_chunks tsvector.""" - adapter = PostgresBM25Adapter(db_url="postgresql://test") + adapter = PostgresBM25Adapter(db_url="postgresql://test", text_config="french") adapter._pool = mock_pool mock_pool.acquire.return_value = _acquire_mock(mock_connection) @@ -163,6 +197,7 @@ async def test_create_index_updates_lightrag_doc_chunks( sql = mock_connection.execute.call_args[0][0] assert "lightrag_doc_chunks" in sql assert "content_tsv" in sql + assert "french" in sql workspace_arg = mock_connection.execute.call_args[0][1] assert workspace_arg == PostgresBM25Adapter._make_workspace("some-working-dir") From 4c725a5ad787f964c3bbcaccfbdbc752fa6ab292 Mon Sep 17 00:00:00 2001 From: Kaiohz Date: Wed, 8 Apr 2026 13:32:34 +0200 Subject: [PATCH 17/17] refactor: remove score/bm25_rank/vector_rank/combined_score from chunk response These are internal ranking details not useful for API consumers. --- src/application/responses/query_response.py | 4 --- src/application/use_cases/query_use_case.py | 31 +++++++-------------- tests/unit/test_query_use_case.py | 2 -- 3 files changed, 10 insertions(+), 27 deletions(-) diff --git a/src/application/responses/query_response.py b/src/application/responses/query_response.py index d631907..0c84989 100644 --- a/src/application/responses/query_response.py +++ b/src/application/responses/query_response.py @@ -26,10 +26,6 @@ class ChunkResponse(BaseModel): content: str file_path: str chunk_id: str = "" - score: float | None = None - bm25_rank: int | None = None - vector_rank: int | None = None - combined_score: float | None = None class ReferenceResponse(BaseModel): diff --git a/src/application/use_cases/query_use_case.py b/src/application/use_cases/query_use_case.py index eb31b3b..ace4dcd 100644 --- a/src/application/use_cases/query_use_case.py +++ b/src/application/use_cases/query_use_case.py @@ -109,22 +109,16 @@ async def execute( ) def _format_bm25_results(self, results: list) -> dict: - """Format BM25 results with rank information.""" - chunks = [] - for rank, r in enumerate(results, start=1): - chunks.append( - { - "reference_id": r.chunk_id, - "content": r.content, - "file_path": r.file_path, - "chunk_id": r.chunk_id, - "score": r.score, - "bm25_rank": rank, - "vector_rank": None, - "combined_score": None, - "metadata": r.metadata, - } - ) + """Format BM25 results to match API response format.""" + chunks = [ + { + "reference_id": r.chunk_id, + "content": r.content, + "file_path": r.file_path, + "chunk_id": r.chunk_id, + } + for r in results + ] return { "status": "success", "message": "", @@ -154,11 +148,6 @@ def _format_hybrid_results(self, results: list) -> dict: "content": r.content, "file_path": r.file_path, "chunk_id": r.chunk_id, - "score": r.combined_score, - "bm25_rank": r.bm25_rank, - "vector_rank": r.vector_rank, - "combined_score": r.combined_score, - "metadata": r.metadata, } for r in results ], diff --git a/tests/unit/test_query_use_case.py b/tests/unit/test_query_use_case.py index 3679425..46e9bd3 100644 --- a/tests/unit/test_query_use_case.py +++ b/tests/unit/test_query_use_case.py @@ -140,8 +140,6 @@ async def test_execute_hybrid_plus_with_bm25( chunk = result["data"]["chunks"][0] assert chunk["chunk_id"] == "chunk-abc123" - assert chunk["bm25_rank"] == 1 - assert chunk["vector_rank"] == 1 assert chunk["reference_id"] == "2" async def test_execute_hybrid_plus_without_bm25_falls_back(