From 10c28175e7ffda48e769118433ab16deddc480ea Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 14 Jan 2026 14:56:18 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20concurrency=20an?= =?UTF-8?q?d=20deduplicate=20rules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Minimized critical section in `get_all_existing_rules` to improve parallelism. - Used set difference in `push_rules` to deduplicate payload and reduce API calls. - Updated journal. --- .jules/bolt.md | 4 ++++ main.py | 17 ++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/.jules/bolt.md b/.jules/bolt.md index c717282b..b056144d 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -19,3 +19,7 @@ ## 2024-05-24 - Avoid Copying Large Sets for Membership Checks **Learning:** Copying a large set (e.g. 100k items) to create a snapshot for read-only membership checks is expensive O(N) and unnecessary. Python's set membership testing is thread-safe. **Action:** When filtering data against a shared large set, iterate and check membership directly instead of snapshotting, unless strict transactional consistency across the entire iteration is required. + +## 2024-05-24 - Minimize Critical Sections +**Learning:** Holding a lock while performing O(N) iteration (like adding items to a set one by one) serializes parallel workers, negating the benefit of concurrency. Preparing data in a thread-local structure and then merging it into the shared structure with a single operation (like `set.update`) keeps the critical section small and maximizes parallelism. +**Action:** When using locks, perform as much work as possible (data preparation, parsing) outside the lock, and only acquire it for the final merge/update. diff --git a/main.py b/main.py index e6aabc57..493a0785 100644 --- a/main.py +++ b/main.py @@ -333,10 +333,13 @@ def _fetch_folder_rules(folder_id: str): try: data = _api_get(client, f"{API_BASE}/{profile_id}/rules/{folder_id}").json() folder_rules = data.get("body", {}).get("rules", []) - with all_rules_lock: - for rule in folder_rules: - if rule.get("PK"): - all_rules.add(rule["PK"]) + + # Optimization: Extract PKs locally to minimize lock contention + local_pks = {rule["PK"] for rule in folder_rules if rule.get("PK")} + + if local_pks: + with all_rules_lock: + all_rules.update(local_pks) except httpx.HTTPError: pass except Exception as e: @@ -477,9 +480,9 @@ def push_rules( original_count = len(hostnames) - # Optimization: Check directly against existing_rules to avoid O(N) copy. - # Membership testing in set is thread-safe, and we don't need a strict snapshot for deduplication. - filtered_hostnames = [h for h in hostnames if h not in existing_rules] + # Optimization: Deduplicate source hostnames and check against existing_rules. + # Using set difference is cleaner, handles source duplicates, and reduces API calls. + filtered_hostnames = list(set(hostnames) - existing_rules) duplicates_count = original_count - len(filtered_hostnames) if duplicates_count > 0: