From 64d253a3c93c9bf9b5880a9a53da775dd63f6cec Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 27 May 2026 00:29:10 +0000
Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20XML=20iterparse?=
 =?UTF-8?q?=20to=20prevent=20memory=20leak=20and=20speed=20up=20tag=20matc?=
 =?UTF-8?q?hing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: xbmc4lyfe <273732874+xbmc4lyfe@users.noreply.github.com>
---
 .jules/bolt.md |  3 +++
 verify_nzb.py  | 24 +++++++++++-------------
 2 files changed, 14 insertions(+), 13 deletions(-)
 create mode 100644 .jules/bolt.md
diff --git a/.jules/bolt.md b/.jules/bolt.md
new file mode 100644
index 0000000..0290ca7
--- /dev/null
+++ b/.jules/bolt.md
@@ -0,0 +1,3 @@
+## 2026-05-27 - ElementTree.iterparse Memory Leaks and Overhead
+**Learning:** `ElementTree.iterparse` leaks memory if only parsing the `end` events and calling `.clear()` on child elements, as the `root` element retains references to its children until the document finishes parsing. Also, parsing the local XML namespace name using a custom method (like `.rsplit()`) adds up linearly with the size of the XML.
+**Action:** For parsing large XML files using `iterparse`, always fetch the `start` and `end` events, grab the `root` element on the first `start` event, and call both `elem.clear()` and `root.clear()` on the `end` events. Use simple string checks like `tag.endswith("}segment") or tag == "segment"` to save string parsing overhead.
diff --git a/verify_nzb.py b/verify_nzb.py
index 953dccd..9d5d3ce 100644
--- a/verify_nzb.py
+++ b/verify_nzb.py
@@ -80,23 +80,21 @@ class MissingArticleError(NntpError):
     """The NNTP server does not have the requested article."""
 
 
-def _local_name(tag: str) -> str:
-    if "}" in tag:
-        return tag.rsplit("}", 1)[-1]
-    return tag
-
-
 def parse_nzb_message_ids(path: str | Path) -> Iterator[str]:
     """Yield message IDs from <segment> elements in an NZB file."""
 
     with open(path, "rb") as handle:
-        for event, elem in ET.iterparse(handle, events=("end",)):
-            if _local_name(elem.tag) != "segment":
-                continue
-            text = (elem.text or "").strip()
-            if text:
-                yield text
-            elem.clear()
+        context = ET.iterparse(handle, events=("start", "end"))
+        _, root = next(context)
+        for event, elem in context:
+            if event == "end":
+                tag = elem.tag
+                if tag.endswith("}segment") or tag == "segment":
+                    text = (elem.text or "").strip()
+                    if text:
+                        yield text
+                elem.clear()
+                root.clear()
 
 
 def normalize_message_id(message_id: str) -> str: