diff --git a/.jules/bolt.md b/.jules/bolt.md index 19955b2a..cf5e60c4 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -31,3 +31,7 @@ ## 2024-05-24 - Pass Local State to Avoid Redundant Reads **Learning:** When a process involves modifying remote state (e.g. deleting folders) and then querying it (e.g. getting rules from remaining folders), maintaining a local replica of the state avoids redundant API calls. If you know what you deleted, you don't need to ask the server "what's left?". **Action:** Identify sequences of "Read -> Modify -> Read" and optimize to "Read -> Modify (update local) -> Use local". + +## 2024-05-24 - Parallelize DNS Validation +**Learning:** DNS lookups (`socket.getaddrinfo`) are blocking I/O operations. Performing them sequentially in a list comprehension (e.g., to filter URLs) can be a major bottleneck. Parallelizing them alongside the fetch operation can significantly reduce startup time. +**Action:** Move validation logic that involves network I/O into the parallel worker thread instead of pre-filtering sequentially. diff --git a/main.py b/main.py index c7810580..4ea1e1cc 100644 --- a/main.py +++ b/main.py @@ -469,7 +469,8 @@ def fetch_folder_data(url: str) -> Dict[str, Any]: def warm_up_cache(urls: Sequence[str]) -> None: urls = list(set(urls)) - urls_to_fetch = [u for u in urls if u not in _cache and validate_folder_url(u)] + # Optimization: Filter out already cached URLs (content check) + urls_to_fetch = [u for u in urls if u not in _cache] if not urls_to_fetch: return @@ -477,9 +478,16 @@ def warm_up_cache(urls: Sequence[str]) -> None: if not USE_COLORS: log.info(f"Warming up cache for {total} URLs...") + # Helper function to validate AND fetch in the worker thread + # Validation involves DNS lookups (blocking I/O), so parallelization is critical. + def _validate_and_fetch(url: str) -> None: + if validate_folder_url(url): + _gh_get(url) + completed = 0 with concurrent.futures.ThreadPoolExecutor() as executor: - futures = {executor.submit(_gh_get, url): url for url in urls_to_fetch} + # Submit task that does both validation and fetch + futures = {executor.submit(_validate_and_fetch, url): url for url in urls_to_fetch} if USE_COLORS: sys.stderr.write(f"\r{Colors.CYAN}⏳ Warming up cache: 0/{total}...{Colors.ENDC}")