Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 27 additions & 8 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,7 @@
BATCH_KEYS = [f"hostnames[{i}]" for i in range(BATCH_SIZE)]
MAX_RETRIES = 10
RETRY_DELAY = 1
MAX_RETRY_DELAY = 60.0 # Maximum retry delay in seconds (caps exponential growth)
FOLDER_CREATION_DELAY = 5 # <--- CHANGED: Increased from 2 to 5 for patience
MAX_RESPONSE_SIZE = 10 * 1024 * 1024 # 10MB limit

Expand Down Expand Up @@ -795,7 +796,7 @@
# This prevents other users from reading cached data
if platform.system() != "Windows":
cache_dir.chmod(0o700)

Check warning

Code scanning / Pylint (reported by Codacy)

Trailing whitespace Warning

Trailing whitespace
cache_file = cache_dir / "blocklists.json"

# Write atomically: write to temp file, then rename
Expand All @@ -810,7 +811,7 @@

# Atomic rename (POSIX guarantees atomicity)
temp_file.replace(cache_file)

Check warning

Code scanning / Pylint (reported by Codacy)

Trailing whitespace Warning

Trailing whitespace
log.debug(f"Saved {len(_disk_cache):,} entries to disk cache")

except Exception as e:
Expand Down Expand Up @@ -985,7 +986,7 @@
def is_valid_profile_id_format(profile_id: str) -> bool:
"""
Checks if a profile ID matches the expected format.

Check warning

Code scanning / Pylintpython3 (reported by Codacy)

Trailing whitespace Warning

Trailing whitespace
Validates against PROFILE_ID_PATTERN and enforces maximum length of 64 characters.
"""
if not PROFILE_ID_PATTERN.match(profile_id):
Expand Down Expand Up @@ -1151,9 +1152,31 @@
)


def retry_with_jitter(attempt: int, base_delay: float = 1.0, max_delay: float = MAX_RETRY_DELAY) -> float:

Check warning

Code scanning / Pylintpython3 (reported by Codacy)

Line too long (106/100) Warning

Line too long (106/100)

Check warning

Code scanning / Pylint (reported by Codacy)

Line too long (106/100) Warning

Line too long (106/100)
"""Calculate retry delay with exponential backoff and full jitter.

Full jitter draws uniformly from [0, min(base_delay * 2^attempt, max_delay))
to spread retries evenly across the full window and prevent thundering herd.

Args:
attempt: Retry attempt number (0-indexed)
base_delay: Base delay in seconds (default: 1.0)
max_delay: Maximum delay cap in seconds (default: MAX_RETRY_DELAY)

Returns:
Delay in seconds with full jitter applied
"""
exponential_delay = min(base_delay * (2 ** attempt), max_delay)
return exponential_delay * random.random()

Check notice

Code scanning / Bandit

Standard pseudo-random generators are not suitable for security/cryptographic purposes. Note

Standard pseudo-random generators are not suitable for security/cryptographic purposes.

Check notice

Code scanning / Bandit (reported by Codacy)

Standard pseudo-random generators are not suitable for security/cryptographic purposes. Note

Standard pseudo-random generators are not suitable for security/cryptographic purposes.


def _retry_request(request_func, max_retries=MAX_RETRIES, delay=RETRY_DELAY):
"""
Retry request with exponential backoff.
Retry request with exponential backoff and full jitter.

RETRY STRATEGY:
- Uses retry_with_jitter() for full jitter: delay drawn from [0, min(delay*2^attempt, MAX_RETRY_DELAY)]

Check warning

Code scanning / Pylintpython3 (reported by Codacy)

Line too long (107/100) Warning

Line too long (107/100)

Check warning

Code scanning / Pylint (reported by Codacy)

Line too long (107/100) Warning

Line too long (107/100)
- Full jitter prevents thundering herd when multiple clients fail simultaneously

RATE LIMIT HANDLING:
- Parses X-RateLimit-* headers from all API responses
Expand Down Expand Up @@ -1219,13 +1242,9 @@
log.debug(f"Response content: {sanitize_for_log(e.response.text)}")
raise

# Exponential backoff with jitter to prevent thundering herd
# Base delay: delay * (2^attempt) gives exponential growth
# Jitter: multiply by random factor in range [0.5, 1.5] to spread retries
# This prevents multiple failed requests from retrying simultaneously
base_wait = delay * (2**attempt)
jitter_factor = 0.5 + random.random() # Random value between 0.5 and 1.5
wait_time = base_wait * jitter_factor
# Full jitter exponential backoff: delay drawn from [0, min(delay * 2^attempt, MAX_RETRY_DELAY)]
# Spreads retries evenly across the full window to prevent thundering herd
wait_time = retry_with_jitter(attempt, base_delay=delay)

log.warning(
f"Request failed (attempt {attempt + 1}/{max_retries}): "
Expand Down Expand Up @@ -1418,7 +1437,7 @@
"data": data,
"etag": etag,
"last_modified": last_modified,
"fetched_at": time.time(),

Check warning

Code scanning / Pylintpython3 (reported by Codacy)

Trailing whitespace Warning

Trailing whitespace
"last_validated": time.time(),
}

Expand Down
6 changes: 3 additions & 3 deletions tests/test_rate_limit.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def test_failed_request_parses_headers(self):
with main._rate_limit_lock:
assert main._rate_limit_info["remaining"] == 50

@patch('random.random', return_value=0.5)
@patch('random.random', return_value=1.0)
def test_429_without_retry_after_uses_exponential_backoff(self, mock_random):
"""Test that 429 without Retry-After falls back to exponential backoff."""
mock_request = MagicMock()
Expand All @@ -260,8 +260,8 @@ def test_429_without_retry_after_uses_exponential_backoff(self, mock_random):

request_func = MagicMock(side_effect=[error, error, success_response])

# With delay=1, backoff should be: 1s, 2s
# Total wait should be >= 3 seconds (assuming jitter factor 1.0)
# With delay=1 and random.random()=1.0 (full jitter), backoff is: 1s, 2s
# Total wait should be >= 3 seconds
start_time = time.time()
result = main._retry_request(request_func, max_retries=3, delay=1)
elapsed = time.time() - start_time
Expand Down
23 changes: 11 additions & 12 deletions tests/test_retry_jitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
"Jitter should produce different wait times across runs"

def test_jitter_stays_within_bounds(self):
"""Verify jitter keeps delays within expected range (0.5x to 1.5x base)."""
"""Verify jitter keeps delays within expected range (0 to 1x base, full jitter)."""
request_func = Mock(side_effect=httpx.TimeoutException("Connection timeout"))

with patch('time.sleep') as mock_sleep:
Expand All @@ -66,11 +66,11 @@

wait_times = [call.args[0] for call in mock_sleep.call_args_list]

# Verify each wait time is within jitter bounds
# Verify each wait time is within full-jitter bounds [0, min(base, MAX_RETRY_DELAY)]
for attempt, wait_time in enumerate(wait_times):
base_delay = 1 * (2 ** attempt) # Exponential backoff formula
min_expected = base_delay * 0.5
max_expected = base_delay * 1.5
min_expected = 0.0 # Full jitter can produce 0
max_expected = min(base_delay, main.MAX_RETRY_DELAY)

assert min_expected <= wait_time <= max_expected, \
f"Attempt {attempt}: wait time {wait_time:.2f}s outside jitter bounds " \
Expand All @@ -80,13 +80,12 @@
"""Verify that despite jitter, the exponential base scaling is correct.

We fix random.random() to a constant so that jitter becomes deterministic,
and then assert that each delay matches delay * 2**attempt * jitter_factor.
and then assert that each delay matches delay * 2**attempt * random_factor.
"""
request_func = Mock(side_effect=httpx.TimeoutException("Connection timeout"))

# Use a fixed random.random() so jitter multiplier is stable across attempts.
# Assuming jitter is implemented as: base_delay * (0.5 + random.random()),
# a fixed return_value of 0.5 yields a jitter_factor of 1.0.
# Full jitter is implemented as: min(base_delay * 2**attempt, MAX_RETRY_DELAY) * random.random()

Check warning

Code scanning / Pylintpython3 (reported by Codacy)

Line too long (104/100) Warning test

Line too long (104/100)

Check warning

Code scanning / Pylint (reported by Codacy)

Line too long (104/100) Warning test

Line too long (104/100)
# With random.random() fixed at 0.5, each delay = exponential_delay * 0.5.
with patch('time.sleep') as mock_sleep, patch('random.random', return_value=0.5):
try:
main._retry_request(request_func, max_retries=5, delay=1)
Expand All @@ -95,10 +94,10 @@

wait_times = [call.args[0] for call in mock_sleep.call_args_list]

jitter_factor = 0.5 + 0.5 # Matches the patched random.random() above
for attempt, wait_time in enumerate(wait_times):
base_delay = 1 * (2 ** attempt)
expected_delay = base_delay * jitter_factor
exponential_delay = min(base_delay, main.MAX_RETRY_DELAY)
expected_delay = exponential_delay * 0.5 # random.random() fixed at 0.5
# Use approx to avoid brittle float equality while still being strict.
assert wait_time == pytest.approx(expected_delay), (
f"Attempt {attempt}: expected {expected_delay:.2f}s, "
Expand Down Expand Up @@ -143,8 +142,8 @@
wait_times = [call.args[0] for call in mock_sleep.call_args_list]
assert len(wait_times) == 2

# First retry: base=1, range=[0.5, 1.5]
assert 0.5 <= wait_times[0] <= 1.5
# First retry: full jitter, base=1, range=[0, 1.0) since random.random() < 1.0
assert 0.0 <= wait_times[0] < 1.0

Check notice

Code scanning / Bandit

Use of assert detected. The enclosed code will be removed when compiling to optimised byte code. Note test

Use of assert detected. The enclosed code will be removed when compiling to optimised byte code.

Check notice

Code scanning / Bandit (reported by Codacy)

Use of assert detected. The enclosed code will be removed when compiling to optimised byte code. Note test

Use of assert detected. The enclosed code will be removed when compiling to optimised byte code.

def test_successful_retry_after_transient_failure(self):
"""Verify successful request after transient failures works correctly."""
Expand Down
3 changes: 0 additions & 3 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading