From 10c28175e7ffda48e769118433ab16deddc480ea Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 14 Jan 2026 14:56:18 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20concurrency=20an?=
 =?UTF-8?q?d=20deduplicate=20rules?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Minimized critical section in `get_all_existing_rules` to improve parallelism.
- Used set difference in `push_rules` to deduplicate payload and reduce API calls.
- Updated journal.
---
 .jules/bolt.md |  4 ++++
 main.py        | 17 ++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/.jules/bolt.md b/.jules/bolt.md
index c717282b..b056144d 100644
--- a/.jules/bolt.md
+++ b/.jules/bolt.md
@@ -19,3 +19,7 @@
 ## 2024-05-24 - Avoid Copying Large Sets for Membership Checks
 **Learning:** Copying a large set (e.g. 100k items) to create a snapshot for read-only membership checks is expensive O(N) and unnecessary. Python's set membership testing is thread-safe.
 **Action:** When filtering data against a shared large set, iterate and check membership directly instead of snapshotting, unless strict transactional consistency across the entire iteration is required.
+
+## 2024-05-24 - Minimize Critical Sections
+**Learning:** Holding a lock while performing O(N) iteration (like adding items to a set one by one) serializes parallel workers, negating the benefit of concurrency. Preparing data in a thread-local structure and then merging it into the shared structure with a single operation (like `set.update`) keeps the critical section small and maximizes parallelism.
+**Action:** When using locks, perform as much work as possible (data preparation, parsing) outside the lock, and only acquire it for the final merge/update.
diff --git a/main.py b/main.py
index e6aabc57..493a0785 100644
--- a/main.py
+++ b/main.py
@@ -333,10 +333,13 @@ def _fetch_folder_rules(folder_id: str):
         try:
             data = _api_get(client, f"{API_BASE}/{profile_id}/rules/{folder_id}").json()
             folder_rules = data.get("body", {}).get("rules", [])
-            with all_rules_lock:
-                for rule in folder_rules:
-                    if rule.get("PK"):
-                        all_rules.add(rule["PK"])
+
+            # Optimization: Extract PKs locally to minimize lock contention
+            local_pks = {rule["PK"] for rule in folder_rules if rule.get("PK")}
+
+            if local_pks:
+                with all_rules_lock:
+                    all_rules.update(local_pks)
         except httpx.HTTPError:
             pass
         except Exception as e:
@@ -477,9 +480,9 @@ def push_rules(
 
     original_count = len(hostnames)
 
-    # Optimization: Check directly against existing_rules to avoid O(N) copy.
-    # Membership testing in set is thread-safe, and we don't need a strict snapshot for deduplication.
-    filtered_hostnames = [h for h in hostnames if h not in existing_rules]
+    # Optimization: Deduplicate source hostnames and check against existing_rules.
+    # Using set difference is cleaner, handles source duplicates, and reduces API calls.
+    filtered_hostnames = list(set(hostnames) - existing_rules)
     duplicates_count = original_count - len(filtered_hostnames)
 
     if duplicates_count > 0: