Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,31 @@ https://controld.com/dashboard/profiles/741861frakbm/filters
- Python 3.13+
- Runtime dependencies (install with `pip install -r requirements.txt` or `uv sync`)

## Blocklist Cache

ctrld-sync maintains a persistent on-disk cache of downloaded blocklist data to speed up subsequent syncs.

### Cache location

| Platform | Default path |
|----------|-------------|
| Linux / Unix | `~/.cache/ctrld-sync/blocklists.json` (or `$XDG_CACHE_HOME/ctrld-sync/`) |
| macOS | `~/Library/Caches/ctrld-sync/blocklists.json` |
| Windows | `%LOCALAPPDATA%\ctrld-sync\cache\blocklists.json` |

### How it works

1. **Within TTL (24 hours):** cached data is returned immediately—no HTTP request is made.
2. **TTL expired:** a conditional request is sent using `If-None-Match` (ETag) or `If-Modified-Since`. A `304 Not Modified` response reuses the cached data with no download.
3. **Changed or missing:** the full blocklist is downloaded and the cache is updated.

### Cache CLI flags

```bash
python main.py --clear-cache # delete the cache file and exit
python main.py --no-cache # disable the cache for this run (data is fetched fresh, cache is not updated)
```

## Testing

This project includes a comprehensive test suite to ensure code quality and correctness.
Expand Down
47 changes: 31 additions & 16 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ def _api_client() -> httpx.Client:
CACHE_TTL_SECONDS = 24 * 60 * 60 # 24 hours: within TTL, serve from disk without HTTP request
_disk_cache: Dict[str, Dict[str, Any]] = {} # Loaded from disk on startup
_cache_stats = {"hits": 0, "misses": 0, "validations": 0, "errors": 0}
_no_cache: bool = False # Set to True when --no-cache flag is passed
_api_stats = {"control_d_api_calls": 0, "blocklist_fetches": 0}

# --------------------------------------------------------------------------- #
Expand Down Expand Up @@ -788,7 +789,11 @@ def save_disk_cache() -> None:

SECURITY: Creates cache directory with user-only permissions (0o700)
to prevent other users from reading cached blocklist data.

No-op when --no-cache is active.
"""
if _no_cache:
return
try:
cache_dir = get_cache_dir()
cache_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -1377,13 +1382,14 @@ def _gh_get(url: str) -> Dict:
last_modified = r_retry.headers.get("Last-Modified")

# Update disk cache with new data and headers
_disk_cache[url] = {
"data": data,
"etag": etag,
"last_modified": last_modified,
"fetched_at": time.time(),
"last_validated": time.time(),
}
if not _no_cache:
_disk_cache[url] = {
"data": data,
"etag": etag,
"last_modified": last_modified,
"fetched_at": time.time(),
"last_validated": time.time(),
}

_cache_stats["misses"] += 1
return data
Expand Down Expand Up @@ -1444,13 +1450,14 @@ def _gh_get(url: str) -> Dict:
last_modified = r.headers.get("Last-Modified")

# Update disk cache with new data and headers
_disk_cache[url] = {
"data": data,
"etag": etag,
"last_modified": last_modified,
"fetched_at": time.time(),
"last_validated": time.time(),
}
if not _no_cache:
_disk_cache[url] = {
"data": data,
"etag": etag,
"last_modified": last_modified,
"fetched_at": time.time(),
"last_validated": time.time(),
}

_cache_stats["misses"] += 1

Expand Down Expand Up @@ -2421,6 +2428,9 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--clear-cache", action="store_true", help="Clear the persistent blocklist cache and exit"
)
parser.add_argument(
"--no-cache", action="store_true", help="Disable the persistent blocklist cache for this run"
)
return parser.parse_args()


Expand All @@ -2438,7 +2448,7 @@ def main():
check_env_permissions()
load_dotenv()

global TOKEN
global TOKEN, _no_cache
# Re-initialize TOKEN to pick up values from .env (since load_dotenv was delayed)
TOKEN = _clean_env_kv(os.getenv("TOKEN"), "TOKEN")

Expand All @@ -2449,9 +2459,14 @@ def main():
# argument errors do not perform unnecessary filesystem I/O or logging.
load_disk_cache()

# Handle --no-cache: disable disk cache for this run
if args.no_cache:
_no_cache = True
_disk_cache.clear()
log.info("Persistent disk cache disabled for this run (--no-cache)")

# Handle --clear-cache: delete cache file and exit immediately
if args.clear_cache:
global _disk_cache
cache_file = get_cache_dir() / "blocklists.json"
if cache_file.exists():
try:
Expand Down
61 changes: 61 additions & 0 deletions tests/test_disk_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def setUp(self):
main.validate_folder_url.cache_clear()
# Reset stats
main._cache_stats = {"hits": 0, "misses": 0, "validations": 0, "errors": 0}
# Ensure no-cache flag is off for each test
main._no_cache = False

# Create temporary cache directory for testing
self.temp_dir = tempfile.mkdtemp()
Expand All @@ -45,6 +47,8 @@ def tearDown(self):
main._disk_cache.clear()
main.validate_folder_url.cache_clear()
main._cache_stats = {"hits": 0, "misses": 0, "validations": 0, "errors": 0}
# Restore no-cache flag
main._no_cache = False

# Clean up temp directory
import shutil
Expand Down Expand Up @@ -401,6 +405,63 @@ def test_clear_cache_deletes_file(self):
# In-memory disk cache should be empty
self.assertEqual(len(main._disk_cache), 0)

def test_no_cache_skips_disk_cache_write(self):
"""Test that --no-cache prevents writing to disk cache."""
test_url = "https://example.com/no-cache-test.json"
test_data = {"group": {"group": "Test"}, "domains": ["example.com"]}

original_no_cache = main._no_cache
try:
main._no_cache = True

def mock_stream(method, url, headers=None):
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.raise_for_status = MagicMock()
mock_response.headers = {
"Content-Type": "application/json",
"ETag": "etag999",
}
json_bytes = json.dumps(test_data).encode()
mock_response.iter_bytes = MagicMock(return_value=[json_bytes])
mock_response.__enter__ = MagicMock(return_value=mock_response)
mock_response.__exit__ = MagicMock(return_value=False)
return mock_response

with patch.object(main._gh, 'stream', side_effect=mock_stream):
result = main._gh_get(test_url)

# Data should be returned correctly
self.assertEqual(result, test_data)
# Disk cache should NOT have been updated
self.assertNotIn(test_url, main._disk_cache)
finally:
main._no_cache = original_no_cache

def test_no_cache_skips_save(self):
"""Test that save_disk_cache() is a no-op when --no-cache is active."""
cache_dir = Path(self.temp_dir)
main.get_cache_dir = lambda: cache_dir

main._disk_cache["https://example.com/test.json"] = {
"data": {"group": {"group": "Test"}, "domains": ["test.com"]},
"etag": "xyz",
"last_modified": None,
"fetched_at": 1234567890.0,
"last_validated": 1234567890.0,
}

original_no_cache = main._no_cache
try:
main._no_cache = True
main.save_disk_cache()
finally:
main._no_cache = original_no_cache

# Cache file should NOT have been created
cache_file = cache_dir / "blocklists.json"
self.assertFalse(cache_file.exists())


if __name__ == '__main__':
unittest.main()
Loading