diff --git a/pyproject.toml b/pyproject.toml index 56e1426..bec2c99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,6 +90,24 @@ ignore = [] fixable = ["ALL"] unfixable = [] +# Per-file ignores for special cases +[tool.ruff.lint.per-file-ignores] +"tests/**/*.py" = [ + "ANN", # Ignore all missing type annotations (ANN001, ANN201, etc.) + "T201", # Allow print statements +] + +"src/models/request_models.py" = ["N815"] # camelCase fields required for API contract +"src/optimization/optimized_module_loader.py" = ["N815"] # Pydantic model fields +"src/optimization/optimizers/generator_optimizer.py" = ["N815"] # Pydantic model fields +"src/response_generator/response_generate.py" = ["N815", "ANN401"] # Pydantic model fields + DSPy streamify Any type + +# Library interface patterns - legitimate Any usage +"src/contextual_retrieval/contextual_retrieval_api_client.py" = ["ANN401"] # httpx **kwargs pass-through +"src/guardrails/dspy_nemo_adapter.py" = ["ANN401"] # LangChain LLM interface + DSPy dynamic types +"src/llm_orchestrator_config/context_manager.py" = ["ANN401"] # MockResponse with dynamic attributes +"src/optimization/metrics/*.py" = ["ANN401"] # DSPy optimizer trace parameter (internal type) +"byk-stack-setup/script.py" = ["T201"] # CLI script uses print [tool.ruff.format] # Like Black, use double quotes for strings. @@ -123,4 +141,4 @@ exclude = [ ] # --- Global strictness --- -typeCheckingMode = "standard" # Standard typechecking mode +typeCheckingMode = "standard" # Standard typechecking mode \ No newline at end of file diff --git a/src/contextual_retrieval/bm25_search.py b/src/contextual_retrieval/bm25_search.py index 2be66e4..7ec8ea9 100644 --- a/src/contextual_retrieval/bm25_search.py +++ b/src/contextual_retrieval/bm25_search.py @@ -5,7 +5,7 @@ when collection data changes. """ -from typing import List, Dict, Any, Optional, Set +from typing import List, Dict, Any, Optional, Set, TYPE_CHECKING from loguru import logger from rank_bm25 import BM25Okapi import re @@ -20,13 +20,16 @@ ) from contextual_retrieval.config import ConfigLoader, ContextualRetrievalConfig +if TYPE_CHECKING: + from contextual_retrieval.contextual_retrieval_api_client import HTTPClientManager + class SmartBM25Search: """In-memory BM25 search with smart refresh capabilities.""" def __init__( self, qdrant_url: str, config: Optional["ContextualRetrievalConfig"] = None - ): + ) -> None: self.qdrant_url = qdrant_url self._config = config if config is not None else ConfigLoader.load_config() self._http_client_manager = None @@ -40,7 +43,7 @@ def __init__( # Strong references to background tasks to prevent premature GC self._background_tasks: Set[asyncio.Task[None]] = set() - async def _get_http_client_manager(self): + async def _get_http_client_manager(self) -> "HTTPClientManager": """Get the HTTP client manager instance.""" if self._http_client_manager is None: self._http_client_manager = await get_http_client_manager() @@ -356,7 +359,7 @@ def _tokenize_text(self, text: str) -> List[str]: tokens = self.tokenizer_pattern.findall(text.lower()) return tokens - async def close(self): + async def close(self) -> None: """Close HTTP client.""" if self._http_client_manager: await self._http_client_manager.close() diff --git a/src/contextual_retrieval/constants.py b/src/contextual_retrieval/constants.py index cb7c49c..b009ebd 100644 --- a/src/contextual_retrieval/constants.py +++ b/src/contextual_retrieval/constants.py @@ -15,7 +15,7 @@ class HttpClientConstants: DEFAULT_FAILURE_THRESHOLD = 5 DEFAULT_RECOVERY_TIMEOUT = 60.0 - # Timeouts (seconds) + # Timeouts in seconds DEFAULT_READ_TIMEOUT = 30.0 DEFAULT_CONNECT_TIMEOUT = 10.0 DEFAULT_WRITE_TIMEOUT = 10.0 diff --git a/src/contextual_retrieval/contextual_retrieval_api_client.py b/src/contextual_retrieval/contextual_retrieval_api_client.py index 3b82e1c..0de1455 100644 --- a/src/contextual_retrieval/contextual_retrieval_api_client.py +++ b/src/contextual_retrieval/contextual_retrieval_api_client.py @@ -24,7 +24,7 @@ class ServiceResilienceManager: """Service resilience manager with circuit breaker functionality for HTTP requests.""" - def __init__(self, config: Optional["ContextualRetrievalConfig"] = None): + def __init__(self, config: Optional["ContextualRetrievalConfig"] = None) -> None: # Load configuration if not provided if config is None: config = ConfigLoader.load_config() @@ -81,7 +81,7 @@ class HTTPClientManager: _instance: Optional["HTTPClientManager"] = None _lock = asyncio.Lock() - def __init__(self, config: Optional["ContextualRetrievalConfig"] = None): + def __init__(self, config: Optional["ContextualRetrievalConfig"] = None) -> None: """Initialize HTTP client manager.""" # Load configuration if not provided self._config = config if config is not None else ConfigLoader.load_config() @@ -169,7 +169,7 @@ async def get_client( SecureErrorHandler.sanitize_error_message( e, "HTTP client initialization" ) - ) + ) from e return self._client diff --git a/src/contextual_retrieval/contextual_retriever.py b/src/contextual_retrieval/contextual_retriever.py index 048c131..bdb61eb 100644 --- a/src/contextual_retrieval/contextual_retriever.py +++ b/src/contextual_retrieval/contextual_retriever.py @@ -43,7 +43,7 @@ def __init__( config_path: Optional[str] = None, llm_service: Optional["LLMOrchestrationService"] = None, shared_bm25: Optional[SmartBM25Search] = None, - ): + ) -> None: """ Initialize contextual retriever. @@ -120,7 +120,7 @@ async def initialize(self) -> bool: logger.error(f"Failed to initialize Contextual Retriever: {e}") return False - def _get_session_llm_service(self): + def _get_session_llm_service(self) -> "LLMOrchestrationService": """ Get cached LLM service for current retrieval session. Uses injected service if available, creates new instance as fallback. @@ -140,7 +140,7 @@ def _get_session_llm_service(self): return self._session_llm_service - def _clear_session_cache(self): + def _clear_session_cache(self) -> None: """Clear cached connections at end of retrieval session.""" if self._session_llm_service is not None: logger.debug("Clearing session LLM service cache") @@ -374,7 +374,9 @@ async def _execute_batch_query_searches( self._search_single_query_with_embedding( query, i, embedding, collections, limit ) - for i, (query, embedding) in enumerate(zip(queries, batch_embeddings)) + for i, (query, embedding) in enumerate( + zip(queries, batch_embeddings, strict=True) + ) ] # Execute all searches in parallel @@ -621,7 +623,7 @@ async def health_check(self) -> Dict[str, Any]: return health_status - async def close(self): + async def close(self) -> None: """Clean up resources.""" try: await self.provider_detection.close() diff --git a/src/contextual_retrieval/provider_detection.py b/src/contextual_retrieval/provider_detection.py index de75090..8abb4d1 100644 --- a/src/contextual_retrieval/provider_detection.py +++ b/src/contextual_retrieval/provider_detection.py @@ -7,7 +7,7 @@ - No hardcoded weights or preferences """ -from typing import List, Optional, Dict, Any +from typing import List, Optional, Dict, Any, TYPE_CHECKING from loguru import logger from contextual_retrieval.contextual_retrieval_api_client import get_http_client_manager from contextual_retrieval.error_handler import SecureErrorHandler @@ -18,18 +18,21 @@ ) from contextual_retrieval.config import ConfigLoader, ContextualRetrievalConfig +if TYPE_CHECKING: + from contextual_retrieval.contextual_retrieval_api_client import HTTPClientManager + class DynamicProviderDetection: """Dynamic collection selection without hardcoded preferences.""" def __init__( self, qdrant_url: str, config: Optional["ContextualRetrievalConfig"] = None - ): + ) -> None: self.qdrant_url = qdrant_url self._config = config if config is not None else ConfigLoader.load_config() self._http_client_manager = None - async def _get_http_client_manager(self): + async def _get_http_client_manager(self) -> "HTTPClientManager": """Get the HTTP client manager instance.""" if self._http_client_manager is None: self._http_client_manager = await get_http_client_manager() @@ -212,7 +215,7 @@ async def get_collection_stats(self) -> Dict[str, Any]: return stats - async def close(self): + async def close(self) -> None: """Close HTTP client.""" if self._http_client_manager: await self._http_client_manager.close() diff --git a/src/contextual_retrieval/qdrant_search.py b/src/contextual_retrieval/qdrant_search.py index 2c7d260..31515f3 100644 --- a/src/contextual_retrieval/qdrant_search.py +++ b/src/contextual_retrieval/qdrant_search.py @@ -5,7 +5,7 @@ existing contextual embeddings created by the vector indexer. """ -from typing import List, Dict, Any, Optional, Protocol +from typing import List, Dict, Any, Optional, Protocol, TYPE_CHECKING from loguru import logger import asyncio from contextual_retrieval.contextual_retrieval_api_client import get_http_client_manager @@ -17,6 +17,9 @@ ) from contextual_retrieval.config import ConfigLoader, ContextualRetrievalConfig +if TYPE_CHECKING: + from contextual_retrieval.contextual_retrieval_api_client import HTTPClientManager + class LLMServiceProtocol(Protocol): """Protocol defining the interface required from LLM service for embedding operations.""" @@ -47,12 +50,12 @@ class QdrantContextualSearch: def __init__( self, qdrant_url: str, config: Optional["ContextualRetrievalConfig"] = None - ): + ) -> None: self.qdrant_url = qdrant_url self._config = config if config is not None else ConfigLoader.load_config() self._http_client_manager = None - async def _get_http_client_manager(self): + async def _get_http_client_manager(self) -> "HTTPClientManager": """Get the HTTP client manager instance.""" if self._http_client_manager is None: self._http_client_manager = await get_http_client_manager() @@ -345,7 +348,7 @@ def get_embeddings_for_queries_batch( logger.error(f"Failed to get batch embeddings: {e}") return None - async def close(self): + async def close(self) -> None: """Close HTTP client.""" if self._http_client_manager: await self._http_client_manager.close() diff --git a/src/contextual_retrieval/rank_fusion.py b/src/contextual_retrieval/rank_fusion.py index c53f89a..acea0aa 100644 --- a/src/contextual_retrieval/rank_fusion.py +++ b/src/contextual_retrieval/rank_fusion.py @@ -14,7 +14,7 @@ class DynamicRankFusion: """Dynamic score fusion without hardcoded collection weights.""" - def __init__(self, config: Optional["ContextualRetrievalConfig"] = None): + def __init__(self, config: Optional["ContextualRetrievalConfig"] = None) -> None: """ Initialize rank fusion with configuration. @@ -184,7 +184,7 @@ def _reciprocal_rank_fusion( # Calculate final fused scores fused_results: List[Dict[str, Any]] = [] - for chunk_id, data in chunk_scores.items(): + for data in chunk_scores.values(): chunk = data["chunk"].copy() # Calculate fused RRF score