From 4fc315f581543c3cf696fd78e5975513863e7132 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 24 Feb 2026 15:12:55 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Skip=20redundant=20DNS=20va?= =?UTF-8?q?lidation=20for=20cached=20blocklists?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: abhimehro <84992105+abhimehro@users.noreply.github.com> --- main.py | 46 +++++++++++---- tests/test_warm_up_cache_perf.py | 98 ++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+), 11 deletions(-) create mode 100644 tests/test_warm_up_cache_perf.py diff --git a/main.py b/main.py index 7e758616..d845c35b 100644 --- a/main.py +++ b/main.py @@ -1131,19 +1131,23 @@ def validate_folder_data(data: Dict[str, Any], url: str) -> bool: ) return False if "rules" in rg: - if not isinstance (rg["rules"], list): - log. error ( - f"Invalid data from {sanitize_for_log(url)} : rule_groups[fil].rules must be a list." + if not isinstance(rg["rules"], list): + log.error( + f"Invalid data from {sanitize_for_log(url)}: rule_groups[{i}].rules must be a list." ) return False -# Ensure each rule within the group is an object (dict), -# because later code treats each rule as a mapping (e.g., rule.get(...)). -for j, rule in enumerate (rgi"rules"1): -if not isinstance (rule, dict): - log. error ( - f"Invalid data from {sanitize_for_log(u rl)}: rule_groups[fiłl.rules[kił] must be an object." - ) - return False + + # Ensure each rule within the group is an object (dict), + # because later code treats each rule as a mapping (e.g., rule.get(...)). + for j, rule in enumerate(rg["rules"]): + if not isinstance(rule, dict): + log.error( + f"Invalid data from {sanitize_for_log(url)}: rule_groups[{i}].rules[{j}] must be an object." + ) + return False + + return True + # Lock to protect updates to _api_stats in multi-threaded contexts. # Without this, concurrent increments can lose updates because `+=` is not atomic. @@ -1767,6 +1771,22 @@ def fetch_folder_data(url: str) -> Dict[str, Any]: return js +def _is_cache_fresh(url: str) -> bool: + """Checks if the URL is in the persistent cache and within TTL.""" + # Check in-memory cache first (though warm_up_cache filters these out, + # having it here makes the helper more robust) + with _cache_lock: + if url in _cache: + return True + + entry = _disk_cache.get(url) + if entry: + last_validated = entry.get("last_validated", 0) + if time.time() - last_validated < CACHE_TTL_SECONDS: + return True + return False + + def warm_up_cache(urls: Sequence[str]) -> None: """ Pre-fetches and caches folder data from multiple URLs in parallel. @@ -1788,6 +1808,10 @@ def warm_up_cache(urls: Sequence[str]) -> None: # OPTIMIZATION: Combine validation (DNS) and fetching (HTTP) in one task # to allow validation latency to be parallelized. def _validate_and_fetch(url: str): + # Optimization: Skip DNS validation if cache is fresh + if _is_cache_fresh(url): + return _gh_get(url) + if validate_folder_url(url): return _gh_get(url) return None diff --git a/tests/test_warm_up_cache_perf.py b/tests/test_warm_up_cache_perf.py new file mode 100644 index 00000000..b258d205 --- /dev/null +++ b/tests/test_warm_up_cache_perf.py @@ -0,0 +1,98 @@ +import time +import unittest +from unittest.mock import patch, MagicMock +import sys +import os + +# Add root to path to import main +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +import main + +class TestWarmUpCachePerf(unittest.TestCase): + def setUp(self): + main._cache.clear() + main._disk_cache.clear() + main.validate_folder_url.cache_clear() + + def tearDown(self): + main._cache.clear() + main._disk_cache.clear() + main.validate_folder_url.cache_clear() + + def test_warm_up_skips_validation_for_fresh_cache(self): + """ + Test that warm_up_cache skips validate_folder_url if the URL is in disk cache and fresh. + """ + test_url = "https://example.com/test.json" + test_data = {"group": {"group": "Test Folder"}, "domains": ["example.com"]} + + # Populate disk cache with fresh entry + main._disk_cache[test_url] = { + "data": test_data, + "last_validated": time.time(), # Fresh + "fetched_at": time.time(), + } + + # Mock validate_folder_url to ensure it is NOT called + # Mock _gh_get to verify it IS called (which will use the cache) + with patch('main.validate_folder_url') as mock_validate: + with patch('main._gh_get', return_value=test_data) as mock_gh_get: + + main.warm_up_cache([test_url]) + + # Verify _gh_get was called (it handles cache retrieval) + mock_gh_get.assert_called_with(test_url) + + # Verify validate_folder_url was NOT called + # This assertion will FAIL before the fix + mock_validate.assert_not_called() + + def test_warm_up_calls_validation_for_stale_cache(self): + """ + Test that warm_up_cache calls validate_folder_url if the URL is stale in disk cache. + """ + test_url = "https://example.com/test.json" + test_data = {"group": {"group": "Test Folder"}, "domains": ["example.com"]} + + # Populate disk cache with STALE entry + stale_time = time.time() - (main.CACHE_TTL_SECONDS + 100) + main._disk_cache[test_url] = { + "data": test_data, + "last_validated": stale_time, + "fetched_at": stale_time, + } + + with patch('main.validate_folder_url', return_value=True) as mock_validate: + with patch('main._gh_get', return_value=test_data) as mock_gh_get: + + main.warm_up_cache([test_url]) + + # Verify validate_folder_url WAS called + mock_validate.assert_called_with(test_url) + + # Verify _gh_get was called + mock_gh_get.assert_called_with(test_url) + + def test_warm_up_calls_validation_for_missing_cache(self): + """ + Test that warm_up_cache calls validate_folder_url if the URL is not in disk cache. + """ + test_url = "https://example.com/test.json" + test_data = {"group": {"group": "Test Folder"}, "domains": ["example.com"]} + + # Cache is empty + + with patch('main.validate_folder_url', return_value=True) as mock_validate: + with patch('main._gh_get', return_value=test_data) as mock_gh_get: + + main.warm_up_cache([test_url]) + + # Verify validate_folder_url WAS called + mock_validate.assert_called_with(test_url) + + # Verify _gh_get was called + mock_gh_get.assert_called_with(test_url) + +if __name__ == '__main__': + unittest.main()