abhimehro · abhimehro · Feb 20, 2026 · Feb 19, 2026 · Feb 20, 2026
diff --git a/main.py b/main.py
@@ -937,6 +937,7 @@
                 f"Failed to resolve/validate domain {sanitize_for_log(hostname)}: {sanitize_for_log(e)}"
             )
             return False
+        return True
 
 
 @lru_cache(maxsize=128)
@@ -1233,207 +1234,207 @@
                # Don't retry other 4xx errors (auth failures, bad requests, etc.)
                if 400 <= code < 500 and code != 429:
                    if hasattr(e, "response") and e.response is not None:
                        log.debug(
                            f"Response content: {sanitize_for_log(e.response.text)}"
                        )
                    raise

            if attempt == max_retries - 1:
                if hasattr(e, "response") and e.response is not None:
                    log.debug(f"Response content: {sanitize_for_log(e.response.text)}")
                raise

            # Full jitter exponential backoff: delay drawn from [0, min(delay * 2^attempt, MAX_RETRY_DELAY)]
            # Spreads retries evenly across the full window to prevent thundering herd
            wait_time = retry_with_jitter(attempt, base_delay=delay)

            log.warning(
                f"Request failed (attempt {attempt + 1}/{max_retries}): "
                f"{sanitize_for_log(e)}. Retrying in {wait_time:.2f}s..."
            )
            time.sleep(wait_time)


 def _gh_get(url: str) -> Dict:
    """
    Fetch blocklist data from URL with HTTP cache header support.

    CACHING STRATEGY:
    1. Check in-memory cache first (fastest)
    2. Check disk cache and send conditional request (If-None-Match/If-Modified-Since)
    3. If 304 Not Modified: reuse cached data (cache validation)
    4. If 200 OK: download new data and update cache

    SECURITY: Validates data structure regardless of cache source
    """
    global _cache_stats, _api_stats

    # First check: Quick check without holding lock for long
    with _cache_lock:
        if url in _cache:
            _cache_stats["hits"] += 1
            return _cache[url]

    # Track that we're about to make a blocklist fetch
    with _cache_lock:
        _api_stats["blocklist_fetches"] += 1

    # Check disk cache for TTL-based hit or conditional request headers
    headers = {}
    cached_entry = _disk_cache.get(url)
    if cached_entry:
        last_validated = cached_entry.get("last_validated", 0)
        if time.time() - last_validated < CACHE_TTL_SECONDS:
            # Within TTL: return cached data directly without any HTTP request
            data = cached_entry["data"]
            with _cache_lock:
                _cache[url] = data
            _cache_stats["hits"] += 1
            log.debug(f"Disk cache hit (within TTL) for {sanitize_for_log(url)}")
            return data
        # Beyond TTL: send conditional request using cached ETag/Last-Modified
        # Server returns 304 if content hasn't changed
        # NOTE: Cached values may be None if the server didn't send these headers.
        # httpx requires header values to be str/bytes, so we only add headers
        # when the cached value is truthy.
        etag = cached_entry.get("etag")
        if etag:
            headers["If-None-Match"] = etag
        last_modified = cached_entry.get("last_modified")
        if last_modified:
            headers["If-Modified-Since"] = last_modified

    # Fetch data (or validate cache)
    # Explicitly let HTTPError propagate (no need to catch just to re-raise)
    try:
        with _gh.stream("GET", url, headers=headers) as r:
            # Handle 304 Not Modified - cached data is still valid
            if r.status_code == 304:
                if cached_entry and "data" in cached_entry:
                    log.debug(f"Cache validated (304) for {sanitize_for_log(url)}")
                    _cache_stats["validations"] += 1

                    # Update in-memory cache with validated data
                    data = cached_entry["data"]
                    with _cache_lock:
                        _cache[url] = data

                    # Update timestamp in disk cache to track last validation
                    cached_entry["last_validated"] = time.time()
                    return data
                else:
                    # Shouldn't happen, but handle gracefully
                    log.warning(f"Got 304 but no cached data for {sanitize_for_log(url)}, re-fetching")
                    _cache_stats["errors"] += 1
                    # Close the original streaming response before retrying
                    r.close()
                    # Retry without conditional headers using streaming again so that
                    # MAX_RESPONSE_SIZE and related protections still apply.
                    headers = {}
                    with _gh.stream("GET", url, headers=headers) as r_retry:
                        r_retry.raise_for_status()

                        # 1. Check Content-Length header if present
                        cl = r_retry.headers.get("Content-Length")
                        if cl:
                            try:
                                if int(cl) > MAX_RESPONSE_SIZE:
                                    raise ValueError(
                                        f"Response too large from {sanitize_for_log(url)} "
                                        f"({int(cl) / (1024 * 1024):.2f} MB)"
                                    )
                            except ValueError as e:
                                # Only catch the conversion error, let the size error propagate
                                if "Response too large" in str(e):
                                    raise e
                                log.warning(
                                    f"Malformed Content-Length header from {sanitize_for_log(url)}: {cl!r}. "
                                    "Falling back to streaming size check."
                                )

                        # 2. Stream and check actual size
                        chunks = []
                        current_size = 0
                        for chunk in r_retry.iter_bytes():
                            current_size += len(chunk)
                            if current_size > MAX_RESPONSE_SIZE:
                                raise ValueError(
                                    f"Response too large from {sanitize_for_log(url)} "
                                    f"(> {MAX_RESPONSE_SIZE / (1024 * 1024):.2f} MB)"
                                )
                            chunks.append(chunk)

                        try:
                            data = json.loads(b"".join(chunks))
                        except json.JSONDecodeError as e:
                            raise ValueError(
                                f"Invalid JSON response from {sanitize_for_log(url)}"
                            ) from e

                        # Store cache headers for future conditional requests
                        # ETag is preferred over Last-Modified (more reliable)
                        etag = r_retry.headers.get("ETag")
                        last_modified = r_retry.headers.get("Last-Modified")

                        # Update disk cache with new data and headers
                        _disk_cache[url] = {
                            "data": data,
                            "etag": etag,
                            "last_modified": last_modified,
                            "fetched_at": time.time(),
                            "last_validated": time.time(),
                        }

                        _cache_stats["misses"] += 1
                        return data

            r.raise_for_status()

            # Security: Validate Content-Type
            # Prevent processing of unexpected content types (e.g., HTML/XML from captive portals or attack sites)
            content_type = r.headers.get("Content-Type", "").lower()
            allowed_types = ["application/json", "text/json", "text/plain"]
            if not any(t in content_type for t in allowed_types):
                raise ValueError(
                    f"Invalid Content-Type from {sanitize_for_log(url)}: {content_type}. "
                    f"Expected one of: {', '.join(allowed_types)}"
                )

            # 1. Check Content-Length header if present
            cl = r.headers.get("Content-Length")
            if cl:
                try:
                    if int(cl) > MAX_RESPONSE_SIZE:
                        raise ValueError(
                            f"Response too large from {sanitize_for_log(url)} "
                            f"({int(cl) / (1024 * 1024):.2f} MB)"
                        )
                except ValueError as e:
                    # Only catch the conversion error, let the size error propagate
                    if "Response too large" in str(e):
                        raise e
                    log.warning(
                        f"Malformed Content-Length header from {sanitize_for_log(url)}: {cl!r}. "
                        "Falling back to streaming size check."
                    )

            # 2. Stream and check actual size
            chunks = []
            current_size = 0
            # Optimization: Use 16KB chunks to reduce loop overhead/appends for large files
            for chunk in r.iter_bytes(chunk_size=16 * 1024):
                current_size += len(chunk)
                if current_size > MAX_RESPONSE_SIZE:
                    raise ValueError(
                        f"Response too large from {sanitize_for_log(url)} "
                        f"(> {MAX_RESPONSE_SIZE / (1024 * 1024):.2f} MB)"
                    )
                chunks.append(chunk)

            try:
                data = json.loads(b"".join(chunks))
            except json.JSONDecodeError as e:
                raise ValueError(
                    f"Invalid JSON response from {sanitize_for_log(url)}"
                ) from e

@@ -1499,125 +1500,125 @@
                f"{Colors.FAIL}   Please verify the Profile ID from your Control D Dashboard URL.{Colors.ENDC}"
            )
        else:
            log.error(f"API Access Check Failed ({code}): {sanitize_for_log(e)}")
        return False
    except httpx.RequestError as e:
        log.error(f"Network Error during access check: {sanitize_for_log(e)}")
        return False


 def list_existing_folders(client: httpx.Client, profile_id: str) -> Dict[str, str]:
    """
    Retrieves all existing folders (groups) for a given profile.

    Returns a dictionary mapping folder names to their IDs.
    Returns empty dict on error.
    """
    try:
        data = _api_get(client, f"{API_BASE}/{profile_id}/groups").json()
        folders = data.get("body", {}).get("groups", [])
        result = {}
        for f in folders:
            if not f.get("group") or not f.get("PK"):
                continue
            pk = str(f["PK"])
            if validate_folder_id(pk):
                result[f["group"].strip()] = pk
        return result
    except (httpx.HTTPError, KeyError) as e:
        log.error(f"Failed to list existing folders: {sanitize_for_log(e)}")
        return {}


 def verify_access_and_get_folders(
    client: httpx.Client, profile_id: str
 ) -> Optional[Dict[str, str]]:
    """Combine access check and folder listing into a single API request.

    Returns:
        Dict of {folder_name: folder_id} on success.
        None if access is denied or the request fails after retries.
    """
    url = f"{API_BASE}/{profile_id}/groups"

    for attempt in range(MAX_RETRIES):
        try:
            resp = client.get(url)
            resp.raise_for_status()

            try:
                data = resp.json()

                # Ensure we got the expected top-level JSON structure.
                # We defensively validate types here so that unexpected but valid
                # JSON (e.g., a list or a scalar) doesn't cause AttributeError/TypeError
                # and cause the operation to fail unexpectedly.
                if not isinstance(data, dict):
                    log.error(
                        "Failed to parse folders data: expected JSON object at top level, "
                        f"got {type(data).__name__}"
                    )
                    return None

                body = data.get("body")
                if not isinstance(body, dict):
                    log.error(
                        "Failed to parse folders data: expected 'body' to be an object, "
                        f"got {type(body).__name__ if body is not None else 'None'}"
                    )
                    return None

                folders = body.get("groups", [])
                if not isinstance(folders, list):
                    log.error(
                        "Failed to parse folders data: expected 'body[\"groups\"]' to be a list, "
                        f"got {type(folders).__name__}"
                    )
                    return None

                # Only process entries that are dicts and have the required keys.
                result: Dict[str, str] = {}
                for f in folders:
                    if not isinstance(f, dict):
                        # Skip non-dict entries instead of crashing; this protects
                        # against partial data corruption or unexpected API changes.
                        continue
                    name = f.get("group")
                    pk = f.get("PK")
                    # Skip entries with empty or None values for required fields
                    if not name or not pk:
                        continue

                    pk_str = str(pk)
                    if not validate_folder_id(pk_str):
                        continue

                    result[str(name).strip()] = pk_str

                return result
            except (ValueError, TypeError, AttributeError) as err:
                # As a final safeguard, catch any remaining parsing/shape errors so
                # that a malformed response cannot crash the caller.
                log.error("Failed to parse folders data: %s", sanitize_for_log(err))
                return None

        except httpx.HTTPStatusError as e:
            code = e.response.status_code
            if code in (401, 403, 404):
                if code == 401:
                    log.critical(
                        f"{Colors.FAIL}❌ Authentication Failed: The API Token is invalid.{Colors.ENDC}"
                    )
                    log.critical(
                        f"{Colors.FAIL}   Please check your token at: https://controld.com/account/manage-account{Colors.ENDC}"
                    )
                elif code == 403:
                    log.critical(
                        "%s🚫 Access Denied: Token lacks permission for "
                        "Profile %s.%s",
                        Colors.FAIL,
                        sanitize_for_log(profile_id),
                        Colors.ENDC,
                    )
                elif code == 404:
                    log.critical(
@@ -1787,273 +1788,273 @@
                # Restore progress bar after warning
                render_progress_bar(completed, total, "Warming up cache", prefix="⏳")

    if USE_COLORS:
        sys.stderr.write(
            f"\r\033[K{Colors.GREEN}✅ Warming up cache: Done!{Colors.ENDC}\n"
        )
        sys.stderr.flush()


 def delete_folder(
    client: httpx.Client, profile_id: str, name: str, folder_id: str
 ) -> bool:
    """
    Deletes a folder (group) from a Control D profile.

    Returns True on success, False on failure. Logs detailed error information.
    """
    try:
        _api_delete(client, f"{API_BASE}/{profile_id}/groups/{folder_id}")
        log.info(
            "Deleted folder %s (ID %s)",
            sanitize_for_log(name),
            sanitize_for_log(folder_id),
        )
        return True
    except httpx.HTTPError as e:
        log.error(
            f"Failed to delete folder {sanitize_for_log(name)} (ID {sanitize_for_log(folder_id)}): {sanitize_for_log(e)}"
        )
        return False


 def create_folder(
    client: httpx.Client, profile_id: str, name: str, do: int, status: int
 ) -> Optional[str]:
    """
    Create a new folder and return its ID.
    Attempts to read ID from response first, then falls back to polling.
    """
    try:
        # 1. Send the Create Request
        response = _api_post(
            client,
            f"{API_BASE}/{profile_id}/groups",
            data={"name": name, "do": do, "status": status},
        )

        # OPTIMIZATION: Try to grab ID directly from response to avoid the wait loop
        try:
            resp_data = response.json()
            body = resp_data.get("body", {})

            # Check if it returned a single group object
            if isinstance(body, dict) and "group" in body and "PK" in body["group"]:
                pk = str(body["group"]["PK"])
                if not validate_folder_id(pk, log_errors=False):
                    log.error(f"API returned invalid folder ID: {sanitize_for_log(pk)}")
                    return None
                log.info(
                    "Created folder %s (ID %s) [Direct]",
                    sanitize_for_log(name),
                    sanitize_for_log(pk),
                )
                return pk

            # Check if it returned a list containing our group
            if isinstance(body, dict) and "groups" in body:
                for grp in body["groups"]:
                    if grp.get("group") == name:
                        pk = str(grp["PK"])
                        if not validate_folder_id(pk, log_errors=False):
                            log.error(f"API returned invalid folder ID: {sanitize_for_log(pk)}")
                            continue
                        log.info(
                            "Created folder %s (ID %s) [Direct]",
                            sanitize_for_log(name),
                            sanitize_for_log(pk),
                        )
                        return pk
        except Exception as e:
            log.debug(
                f"Could not extract ID from POST response: " f"{sanitize_for_log(e)}"
            )

        # 2. Fallback: Poll for the new folder (The Robust Retry Logic)
        for attempt in range(MAX_RETRIES + 1):
            try:
                data = _api_get(client, f"{API_BASE}/{profile_id}/groups").json()
                groups = data.get("body", {}).get("groups", [])

                for grp in groups:
                    if grp["group"].strip() == name.strip():
                        pk = str(grp["PK"])
                        if not validate_folder_id(pk, log_errors=False):
                            log.error(f"API returned invalid folder ID: {sanitize_for_log(pk)}")
                            return None
                        log.info(
                            "Created folder %s (ID %s) [Polled]",
                            sanitize_for_log(name),
                            sanitize_for_log(pk),
                        )
                        return pk
            except Exception as e:
                log.warning(
                    f"Error fetching groups on attempt {attempt}: {sanitize_for_log(e)}"
                )

            if attempt < MAX_RETRIES:
                wait_time = FOLDER_CREATION_DELAY * (attempt + 1)
                log.info(
                    f"Folder '{sanitize_for_log(name)}' not found yet. Retrying in {wait_time}s..."
                )
                time.sleep(wait_time)

        log.error(
            f"Folder {sanitize_for_log(name)} was not found after creation and retries."
        )
        return None

    except (httpx.HTTPError, KeyError) as e:
        log.error(
            f"Failed to create folder {sanitize_for_log(name)}: {sanitize_for_log(e)}"
        )
        return None


 def push_rules(
    profile_id: str,
    folder_name: str,
    folder_id: str,
    do: int,
    status: int,
    hostnames: List[str],
    existing_rules: Set[str],
    client: httpx.Client,
    batch_executor: Optional[concurrent.futures.Executor] = None,
 ) -> bool:
    """
    Pushes rules to a folder in batches, filtering duplicates and invalid rules.

    Deduplicates input, validates rules against RULE_PATTERN, and sends batches
    in parallel for optimal performance. Updates existing_rules set with newly
    added rules. Returns True if all batches succeed.
    """
    if not hostnames:
        log.info("Folder %s - no rules to push", sanitize_for_log(folder_name))
        return True

    original_count = len(hostnames)

    # Optimization 1: Deduplicate input list while preserving order using dict.fromkeys()
    # This is significantly faster than using a 'seen' set in the loop for large lists.
    # It also naturally deduplicates invalid rules, preventing log spam.
    unique_hostnames = dict.fromkeys(hostnames)

    filtered_hostnames = []
    skipped_unsafe = 0

    # Optimization 2: Inline regex match and check existence
    # Using a local reference to the match method avoids function call overhead
    # in the hot loop. This provides a measurable speedup for large lists.
    match_rule = RULE_PATTERN.match

    for h in unique_hostnames:
        if h in existing_rules:
            continue

        if not match_rule(h):
            log.warning(
                f"Skipping unsafe rule in {sanitize_for_log(folder_name)}: {sanitize_for_log(h)}"
            )
            skipped_unsafe += 1
            continue

        filtered_hostnames.append(h)

    if skipped_unsafe > 0:
        log.warning(
            f"Folder {sanitize_for_log(folder_name)}: skipped {skipped_unsafe} unsafe rules"
        )

    duplicates_count = original_count - len(filtered_hostnames) - skipped_unsafe

    if duplicates_count > 0:
        log.info(
            f"Folder {sanitize_for_log(folder_name)}: skipping {duplicates_count} duplicate rules"
        )

    if not filtered_hostnames:
        log.info(
            f"Folder {sanitize_for_log(folder_name)} - no new rules to push after filtering duplicates"
        )
        return True

    successful_batches = 0

    # Prepare batches
    batches = []
    for start in range(0, len(filtered_hostnames), BATCH_SIZE):
        batches.append(filtered_hostnames[start : start + BATCH_SIZE])

    total_batches = len(batches)

    # Optimization: Hoist loop invariants to avoid redundant computations
    str_do = str(do)
    str_status = str(status)
    str_group = str(folder_id)
    sanitized_folder_name = sanitize_for_log(folder_name)
    progress_label = f"Folder {sanitized_folder_name}"

    def process_batch(batch_idx: int, batch_data: List[str]) -> Optional[List[str]]:
        """Processes a single batch of rules by sending API request."""
        data = {
            "do": str_do,
            "status": str_status,
            "group": str_group,
        }
        # Optimization: Use pre-calculated keys and zip for faster dict update
        # strict=False is intentional: batch_data may be shorter than BATCH_KEYS for final batch
        data.update(zip(BATCH_KEYS, batch_data, strict=False))

        try:
            _api_post_form(client, f"{API_BASE}/{profile_id}/rules", data=data)
            if not USE_COLORS:
                log.info(
                    "Folder %s – batch %d: added %d rules",
                    sanitized_folder_name,
                    batch_idx,
                    len(batch_data),
                )
            return batch_data
        except httpx.HTTPError as e:
            if USE_COLORS:
                sys.stderr.write("\n")
            log.error(
                f"Failed to push batch {batch_idx} for folder {sanitized_folder_name}: {sanitize_for_log(e)}"
            )
            if hasattr(e, "response") and e.response is not None:
                log.debug(f"Response content: {sanitize_for_log(e.response.text)}")
            return None

    # Optimization 3: Parallelize batch processing
    # Using 3 workers to speed up writes without hitting aggressive rate limits.
    # If only 1 batch, run it synchronously to avoid ThreadPoolExecutor overhead.
    if total_batches == 1:
        result = process_batch(1, batches[0])
        if result:
            successful_batches += 1
            existing_rules.update(result)

        render_progress_bar(
            successful_batches,
            total_batches,
            progress_label,
        )
    else:
        # Use provided executor or create a local one (fallback)
        if batch_executor:
            executor_ctx = contextlib.nullcontext(batch_executor)
        else:
            executor_ctx = concurrent.futures.ThreadPoolExecutor(max_workers=3)

        with executor_ctx as executor:
            futures = {
                executor.submit(process_batch, i, batch): i
                for i, batch in enumerate(batches, 1)
            }

            for future in concurrent.futures.as_completed(futures):
                result = future.result()
                if result:
                    successful_batches += 1
@@ -2114,211 +2115,211 @@
            if not push_rules(
                profile_id,
                name,
                folder_id,
                do,
                status,
                hostnames,
                existing_rules,
                client,
                batch_executor=batch_executor,
            ):
                folder_success = False
    else:
        hostnames = [r["PK"] for r in folder_data.get("rules", []) if r.get("PK")]
        if not push_rules(
            profile_id,
            name,
            folder_id,
            main_do,
            main_status,
            hostnames,
            existing_rules,
            client,
            batch_executor=batch_executor,
        ):
            folder_success = False

    return folder_success


 # --------------------------------------------------------------------------- #
 # 4. Main workflow
 # --------------------------------------------------------------------------- #
 def sync_profile(
    profile_id: str,
    folder_urls: Sequence[str],
    dry_run: bool = False,
    no_delete: bool = False,
    plan_accumulator: Optional[List[Dict[str, Any]]] = None,
 ) -> bool:
    """
    Synchronizes Control D folders from remote blocklist URLs.

    Fetches folder data, optionally deletes existing folders with same names,
    creates new folders, and pushes rules in batches. In dry-run mode, only
    generates a plan without making API changes. Returns True if all folders
    sync successfully.
    """
    # SECURITY: Clear cached DNS validations at the start of each sync run.
    # This prevents TOCTOU issues where a domain's IP could change between runs.
    validate_folder_url.cache_clear()

    try:
        # Fetch all folder data first
        folder_data_list = []

        # OPTIMIZATION: Move validation inside the thread pool to parallelize DNS lookups.
        # Previously, sequential validation blocked the main thread.
        def _fetch_if_valid(url: str):
            # Optimization: If we already have the content in cache, return it directly.
            # The content was validated at the time of fetch (warm_up_cache).
            # Read directly from cache to avoid calling fetch_folder_data while holding lock.
            with _cache_lock:
                if url in _cache:
                    return _cache[url]

            if validate_folder_url(url):
                return fetch_folder_data(url)
            return None

        with concurrent.futures.ThreadPoolExecutor() as executor:
            future_to_url = {
                executor.submit(_fetch_if_valid, url): url for url in folder_urls
            }

            for future in concurrent.futures.as_completed(future_to_url):
                url = future_to_url[future]
                try:
                    result = future.result()
                    if result:
                        folder_data_list.append(result)
                except (httpx.HTTPError, KeyError, ValueError) as e:
                    log.error(
                        f"Failed to fetch folder data from {sanitize_for_log(url)}: {sanitize_for_log(e)}"
                    )
                    continue

        if not folder_data_list:
            log.error("No valid folder data found")
            return False

        # Build plan entries
        plan_entry = {"profile": profile_id, "folders": []}
        for folder_data in folder_data_list:
            grp = folder_data["group"]
            name = grp["group"].strip()

            if "rule_groups" in folder_data:
                # Multi-action format
                total_rules = sum(
                    len(rg.get("rules", [])) for rg in folder_data["rule_groups"]
                )
                plan_entry["folders"].append(
                    {
                        "name": name,
                        "rules": total_rules,
                        "rule_groups": [
                            {
                                "rules": len(rg.get("rules", [])),
                                "action": rg.get("action", {}).get("do"),
                                "status": rg.get("action", {}).get("status"),
                            }
                            for rg in folder_data["rule_groups"]
                        ],
                    }
                )
            else:
                # Legacy single-action format
                hostnames = [
                    r["PK"] for r in folder_data.get("rules", []) if r.get("PK")
                ]
                plan_entry["folders"].append(
                    {
                        "name": name,
                        "rules": len(hostnames),
                        "action": grp.get("action", {}).get("do"),
                        "status": grp.get("action", {}).get("status"),
                    }
                )

        if plan_accumulator is not None:
            plan_accumulator.append(plan_entry)

        if dry_run:
            print_plan_details(plan_entry)
            log.info("Dry-run complete: no API calls were made.")
            return True

        # Create new folders and push rules
        success_count = 0

        # CRITICAL FIX: Switch to Serial Processing (1 worker)
        # This prevents API rate limits and ensures stability for large folders.
        max_workers = 1

        # Shared executor for rate-limited operations (DELETE, push_rules batches)
        # Reusing this executor prevents thread churn and enforces global rate limits.
        with concurrent.futures.ThreadPoolExecutor(
            max_workers=DELETE_WORKERS
        ) as shared_executor, _api_client() as client:
            # Verify access and list existing folders in one request
            existing_folders = verify_access_and_get_folders(client, profile_id)
            if existing_folders is None:
                return False

            if not no_delete:
                deletion_occurred = False

                # Identify folders to delete
                folders_to_delete = []
                for folder_data in folder_data_list:
                    name = folder_data["group"]["group"].strip()
                    if name in existing_folders:
                        folders_to_delete.append((name, existing_folders[name]))

                if folders_to_delete:
                    # Parallel delete to speed up the "clean slate" phase
                    # Use shared_executor (3 workers)
                    future_to_name = {
                        shared_executor.submit(
                            delete_folder, client, profile_id, name, folder_id
                        ): name
                        for name, folder_id in folders_to_delete
                    }

                    for future in concurrent.futures.as_completed(future_to_name):
                        name = future_to_name[future]
                        try:
                            if future.result():
                                del existing_folders[name]
                                deletion_occurred = True
                        except Exception as exc:
                            # Sanitize both name and exception to prevent log injection
                            log.error(
                                "Failed to delete folder %s: %s",
                                sanitize_for_log(name),
                                sanitize_for_log(exc),
                            )

                # CRITICAL FIX: Increased wait time for massive folders to clear
                if deletion_occurred:
                    if not USE_COLORS:
                        log.info(
                            "Waiting 60s for deletions to propagate (prevents 'Badware Hoster' zombie state)..."
                        )
                    countdown_timer(60, "Waiting for deletions to propagate")

            # Optimization: Pass the updated existing_folders to avoid redundant API call
            existing_rules = get_all_existing_rules(
                client, profile_id, known_folders=existing_folders
            )

            with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_workers
            ) as executor:
                future_to_folder = {
                    executor.submit(
                        _process_single_folder,
                        folder_data,
                        profile_id,
                        existing_rules,
@@ -2387,364 +2388,364 @@
    print(f"{line('├', '┬', '┤')}\n{row([f'{Colors.HEADER}Profile ID{Colors.ENDC}', f'{Colors.HEADER}Folders{Colors.ENDC}', f'{Colors.HEADER}Rules{Colors.ENDC}', f'{Colors.HEADER}Duration{Colors.ENDC}', f'{Colors.HEADER}Status{Colors.ENDC}'])}")
    print(line("├", "┼", "┤"))

    for r in sync_results:
        sc = Colors.GREEN if r["success"] else Colors.FAIL
        print(row([r["profile"], str(r["folders"]), f"{r['rules']:,}", f"{r['duration']:.1f}s", f"{sc}{r['status_label']}{Colors.ENDC}"]))

    print(f"{line('├', '┼', '┤')}\n{row(['TOTAL', str(t_f), f'{t_r:,}', f'{t_d:.1f}s', f'{t_col}{t_status}{Colors.ENDC}'])}")
    print(f"{line('└', '┴', '┘')}\n")


 def parse_args() -> argparse.Namespace:
    """
    Parses command-line arguments for the Control D sync tool.

    Supports profile IDs, folder URLs, dry-run mode, no-delete flag,
    and plan JSON output file path.
    """
    parser = argparse.ArgumentParser(description="Control D folder sync")
    parser.add_argument(
        "--profiles", help="Comma-separated list of profile IDs", default=None
    )
    parser.add_argument(
        "--folder-url", action="append", help="Folder JSON URL(s)", default=None
    )
    parser.add_argument("--dry-run", action="store_true", help="Plan only")
    parser.add_argument(
        "--no-delete", action="store_true", help="Do not delete existing folders"
    )
    parser.add_argument("--plan-json", help="Write plan to JSON file", default=None)
    parser.add_argument(
        "--clear-cache", action="store_true", help="Clear the persistent blocklist cache and exit"
    )
    return parser.parse_args()


 def main():
    """
    Main entry point for Control D Sync.

    Loads environment configuration, validates inputs, warms up cache,
    and syncs profiles. Supports interactive prompts for missing credentials
    when running in a TTY. Prints summary statistics and exits with appropriate
    status code.
    """
    # SECURITY: Check .env permissions (after Colors is defined for NO_COLOR support)
    # This must happen BEFORE load_dotenv() to prevent reading secrets from world-readable files
    check_env_permissions()
    load_dotenv()

    global TOKEN
    # Re-initialize TOKEN to pick up values from .env (since load_dotenv was delayed)
    TOKEN = _clean_env_kv(os.getenv("TOKEN"), "TOKEN")

    args = parse_args()

    # Load persistent cache from disk (graceful degradation on any error)
    # NOTE: Called only after successful argument parsing so that `--help` or
    #       argument errors do not perform unnecessary filesystem I/O or logging.
    load_disk_cache()

    # Handle --clear-cache: delete cache file and exit immediately
    if args.clear_cache:
        global _disk_cache
        cache_file = get_cache_dir() / "blocklists.json"
        if cache_file.exists():
            try:
                cache_file.unlink()
                print(f"{Colors.GREEN}✓ Cleared blocklist cache: {cache_file}{Colors.ENDC}")
            except OSError as e:
                print(f"{Colors.FAIL}✗ Failed to clear cache: {e}{Colors.ENDC}")
                exit(1)
        else:
            print(f"{Colors.CYAN}ℹ No cache file found, nothing to clear{Colors.ENDC}")
        _disk_cache.clear()
        exit(0)
    profiles_arg = (
        _clean_env_kv(args.profiles or os.getenv("PROFILE", ""), "PROFILE") or ""
    )
    profile_ids = [extract_profile_id(p) for p in profiles_arg.split(",") if p.strip()]
    folder_urls = args.folder_url if args.folder_url else DEFAULT_FOLDER_URLS

    # Interactive prompts for missing config
    if not args.dry_run and sys.stdin.isatty():
        if not profile_ids:
            print(f"{Colors.CYAN}ℹ Profile ID is missing.{Colors.ENDC}")
            print(
                f"{Colors.CYAN}  You can find this in the URL of your profile in the Control D Dashboard (or just paste the URL).{Colors.ENDC}"
            )

            def validate_profile_input(value: str) -> bool:
                """Validates one or more profile IDs from comma-separated input."""
                ids = [extract_profile_id(p) for p in value.split(",") if p.strip()]
                return bool(ids) and all(
                    validate_profile_id(pid, log_errors=False) for pid in ids
                )

            p_input = get_validated_input(
                f"{Colors.BOLD}Enter Control D Profile ID:{Colors.ENDC} ",
                validate_profile_input,
                "Invalid ID(s) or URL(s). Must be a valid Profile ID or a Control D Profile URL. Comma-separate for multiple.",
            )
            profile_ids = [
                extract_profile_id(p) for p in p_input.split(",") if p.strip()
            ]

        if not TOKEN:
            print(f"{Colors.CYAN}ℹ API Token is missing.{Colors.ENDC}")
            print(
                f"{Colors.CYAN}  You can generate one at: https://controld.com/account/manage-account{Colors.ENDC}"
            )

            t_input = get_password(
                f"{Colors.BOLD}Enter Control D API Token:{Colors.ENDC} ",
                lambda x: len(x) > 8,
                "Token seems too short. Please check your API token.",
            )
            TOKEN = t_input

    if not profile_ids and not args.dry_run:
        log.error(
            "PROFILE missing and --dry-run not set. Provide --profiles or set PROFILE env."
        )
        exit(1)

    if not TOKEN and not args.dry_run:
        log.error("TOKEN missing and --dry-run not set. Set TOKEN env for live sync.")
        exit(1)

    warm_up_cache(folder_urls)

    plan: List[Dict[str, Any]] = []
    success_count = 0
    sync_results = []

    profile_id = "unknown"
    start_time = time.time()

    try:
        for profile_id in profile_ids or ["dry-run-placeholder"]:
            start_time = time.time()
            # Skip validation for dry-run placeholder
            if profile_id != "dry-run-placeholder" and not validate_profile_id(
                profile_id
            ):
                sync_results.append(
                    {
                        "profile": profile_id,
                        "folders": 0,
                        "rules": 0,
                        "status_label": "❌ Invalid Profile ID",
                        "success": False,
                        "duration": 0.0,
                    }
                )
                continue

            log.info("Starting sync for profile %s", profile_id)
            status = sync_profile(
                profile_id,
                folder_urls,
                dry_run=args.dry_run,
                no_delete=args.no_delete,
                plan_accumulator=plan,
            )
            end_time = time.time()
            duration = end_time - start_time

            if status:
                success_count += 1

            # RESTORED STATS LOGIC: Calculate actual counts from the plan
            entry = next((p for p in plan if p["profile"] == profile_id), None)
            folder_count = len(entry["folders"]) if entry else 0
            rule_count = sum(f["rules"] for f in entry["folders"]) if entry else 0

            if args.dry_run:
                status_text = "✅ Planned" if status else "❌ Failed (Dry)"
            else:
                status_text = "✅ Success" if status else "❌ Failed"

            sync_results.append(
                {
                    "profile": profile_id,
                    "folders": folder_count,
                    "rules": rule_count,
                    "status_label": status_text,
                    "success": status,
                    "duration": duration,
                }
            )
    except KeyboardInterrupt:
        duration = time.time() - start_time
        print(
            f"\n{Colors.WARNING}⚠️  Sync cancelled by user. Finishing current task...{Colors.ENDC}"
        )

        # Try to recover stats for the interrupted profile
        entry = next((p for p in plan if p["profile"] == profile_id), None)
        folder_count = len(entry["folders"]) if entry else 0
        rule_count = sum(f["rules"] for f in entry["folders"]) if entry else 0

        sync_results.append(
            {
                "profile": profile_id,
                "folders": folder_count,
                "rules": rule_count,
                "status_label": "⛔ Cancelled",
                "success": False,
                "duration": duration,
            }
        )

    if args.plan_json:
        with open(args.plan_json, "w", encoding="utf-8") as f:
            json.dump(plan, f, indent=2)
        log.info("Plan written to %s", args.plan_json)

    # Print Summary Table
    # Determine the width for the Profile ID column (min 25)
    max_profile_len = max((len(r["profile"]) for r in sync_results), default=25)
    profile_col_width = max(25, max_profile_len)

    # Column widths
    w_profile = profile_col_width
    w_folders = 10
    w_rules = 12
    w_duration = 10
    w_status = 15

    def make_col_separator(left, mid, right, horiz):
        parts = [
            horiz * (w_profile + 2),
            horiz * (w_folders + 2),
            horiz * (w_rules + 2),
            horiz * (w_duration + 2),
            horiz * (w_status + 2),
        ]
        return left + mid.join(parts) + right

    # Calculate table width using a dummy separator
    dummy_sep = make_col_separator(Box.TL, Box.T, Box.TR, Box.H)
    table_width = len(dummy_sep)

    title_text = " DRY RUN SUMMARY " if args.dry_run else " SYNC SUMMARY "
    title_color = Colors.CYAN if args.dry_run else Colors.HEADER

    # Top Border (Single Cell for Title)
    print("\n" + Box.TL + Box.H * (table_width - 2) + Box.TR)

    # Title Row
    visible_title = title_text.strip()
    inner_width = table_width - 2
    pad_left = (inner_width - len(visible_title)) // 2
    pad_right = inner_width - len(visible_title) - pad_left
    print(
        f"{Box.V}{' ' * pad_left}{title_color}{visible_title}{Colors.ENDC}{' ' * pad_right}{Box.V}"
    )

    # Separator between Title and Headers (introduces columns)
    print(make_col_separator(Box.L, Box.T, Box.R, Box.H))

    # Header Row
    print(
        f"{Box.V} {Colors.BOLD}{'Profile ID':<{w_profile}}{Colors.ENDC} "
        f"{Box.V} {Colors.BOLD}{'Folders':>{w_folders}}{Colors.ENDC} "
        f"{Box.V} {Colors.BOLD}{'Rules':>{w_rules}}{Colors.ENDC} "
        f"{Box.V} {Colors.BOLD}{'Duration':>{w_duration}}{Colors.ENDC} "
        f"{Box.V} {Colors.BOLD}{'Status':<{w_status}}{Colors.ENDC} {Box.V}"
    )

    # Separator between Header and Body
    print(make_col_separator(Box.L, Box.X, Box.R, Box.H))

    # Rows
    total_folders = 0
    total_rules = 0
    total_duration = 0.0

    for res in sync_results:
        # Use boolean success field for color logic
        status_color = Colors.GREEN if res["success"] else Colors.FAIL

        s_folders = f"{res['folders']:,}"
        s_rules = f"{res['rules']:,}"
        s_duration = f"{res['duration']:.1f}s"

        print(
            f"{Box.V} {res['profile']:<{w_profile}} "
            f"{Box.V} {s_folders:>{w_folders}} "
            f"{Box.V} {s_rules:>{w_rules}} "
            f"{Box.V} {s_duration:>{w_duration}} "
            f"{Box.V} {status_color}{res['status_label']:<{w_status}}{Colors.ENDC} {Box.V}"
        )
        total_folders += res["folders"]
        total_rules += res["rules"]
        total_duration += res["duration"]

    # Separator between Body and Total
    print(make_col_separator(Box.L, Box.X, Box.R, Box.H))

    # Total Row
    total = len(profile_ids or ["dry-run-placeholder"])
    all_success = success_count == total

    if args.dry_run:
        if all_success:
            total_status_text = "✅ Ready"
        else:
            total_status_text = "❌ Errors"
    else:
        if all_success:
            total_status_text = "✅ All Good"
        else:
            total_status_text = "❌ Errors"

    total_status_color = Colors.GREEN if all_success else Colors.FAIL

    s_total_folders = f"{total_folders:,}"
    s_total_rules = f"{total_rules:,}"
    s_total_duration = f"{total_duration:.1f}s"

    print(
        f"{Box.V} {Colors.BOLD}{'TOTAL':<{w_profile}}{Colors.ENDC} "
        f"{Box.V} {s_total_folders:>{w_folders}} "
        f"{Box.V} {s_total_rules:>{w_rules}} "
        f"{Box.V} {s_total_duration:>{w_duration}} "
        f"{Box.V} {total_status_color}{total_status_text:<{w_status}}{Colors.ENDC} {Box.V}"
    )
    # Bottom Border
    print(make_col_separator(Box.BL, Box.B, Box.BR, Box.H))

    # Success Delight
    if all_success and USE_COLORS and not args.dry_run:
        success_msgs = [
            "✨ All synced!",
            "🚀 Ready for liftoff!",
            "🎨 Beautifully done!",
            "💎 Smooth operation!",
            "🌈 Perfect harmony!",
        ]
        print(f"\n{Colors.GREEN}{random.choice(success_msgs)}{Colors.ENDC}")

    # Display API statistics
    total_api_calls = _api_stats["control_d_api_calls"] + _api_stats["blocklist_fetches"]
    if total_api_calls > 0:
        print(f"{Colors.BOLD}API Statistics:{Colors.ENDC}")
        print(f"  • Control D API calls: {_api_stats['control_d_api_calls']:>7,}")
        print(f"  • Blocklist fetches:   {_api_stats['blocklist_fetches']:>7,}")
        print(f"  • Total API requests:  {total_api_calls:>7,}")
        print()

    # Display cache statistics if any cache activity occurred
    if _cache_stats["hits"] + _cache_stats["misses"] + _cache_stats["validations"] > 0:
        print(f"{Colors.BOLD}Cache Statistics:{Colors.ENDC}")
        print(f"  • Hits (in-memory):    {_cache_stats['hits']:>7,}")
        print(f"  • Misses (downloaded): {_cache_stats['misses']:>7,}")
        print(f"  • Validations (304):   {_cache_stats['validations']:>7,}")
        if _cache_stats["errors"] > 0:
            print(f"  • Errors (non-fatal):  {_cache_stats['errors']:>7,}")

        # Calculate cache effectiveness
        total_requests = _cache_stats["hits"] + _cache_stats["misses"] + _cache_stats["validations"]
        if total_requests > 0: