From f7b88e9fd8e0051b7f3efc3630f79a802a87785f Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 29 May 2026 00:34:48 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Optimize=20yEnc=20decoding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace manual byte iteration with `bytes.translate` and `bytes.find` - Handle `==` consecutive escapes correctly without failing like `.split` - Improve yEnc deep check performance - Document learnings in `.jules/bolt.md` Co-authored-by: xbmc4lyfe <273732874+xbmc4lyfe@users.noreply.github.com> --- .jules/bolt.md | 3 +++ verify_nzb.py | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 .jules/bolt.md diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 0000000..4272daa --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2024-05-29 - yEnc decoding optimization with bytes.find() and bytes.translate() +**Learning:** Using `bytes.split(b'=')` for yEnc decoding fails when lines contain consecutive escapes (e.g., `==`) due to how splitting creates empty strings and fails to track indices correctly. To optimize Python byte string processing effectively while handling escapes correctly, use `bytes.translate()` for bulk decoding and `bytes.find()` to locate and apply escapes manually. +**Action:** Prioritize `bytes.translate` and `bytes.find` over `.split` when translating strings with multi-character escape sequences, ensuring correctness on edge cases like consecutive escapes. diff --git a/verify_nzb.py b/verify_nzb.py index 953dccd..c2d5dfc 100644 --- a/verify_nzb.py +++ b/verify_nzb.py @@ -115,19 +115,37 @@ def _parse_yenc_attrs(line: bytes) -> dict[str, str]: return attrs +# Translation tables for yEnc decoding to avoid slow byte-by-byte iteration in Python. +# Normal bytes are shifted by 42. Escaped bytes are shifted by 42+64=106. +_YENC_TRANS = bytes((i - 42) % 256 for i in range(256)) +_YENC_ESCAPE_TRANS = bytes((i - 106) % 256 for i in range(256)) + def _decode_yenc_lines(lines: Iterable[bytes]) -> bytes: + """ + Decodes yEnc-encoded lines into bytes. + Optimized to use C-backed bytes.translate() and bytes.find() instead of manual iteration. + """ decoded = bytearray() for line in lines: - index = 0 - while index < len(line): - byte = line[index] - if byte == 61: - index += 1 - if index >= len(line): - raise ValueError("dangling yEnc escape") - byte = (line[index] - 64) % 256 - decoded.append((byte - 42) % 256) - index += 1 + # Fast path: no escapes in the line + if b"=" not in line: + decoded.extend(line.translate(_YENC_TRANS)) + continue + + # Slow path: apply escapes manually using string find + idx = 0 + length = len(line) + while idx < length: + next_eq = line.find(b"=", idx) + if next_eq == -1: + decoded.extend(line[idx:].translate(_YENC_TRANS)) + break + decoded.extend(line[idx:next_eq].translate(_YENC_TRANS)) + idx = next_eq + 1 + if idx >= length: + raise ValueError("dangling yEnc escape") + decoded.append(_YENC_ESCAPE_TRANS[line[idx]]) + idx += 1 return bytes(decoded)