-
Notifications
You must be signed in to change notification settings - Fork 0
β‘ Bolt: [Improve yEnc decoding performance using C-backed bytes methods] #29
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,20 +115,32 @@ def _parse_yenc_attrs(line: bytes) -> dict[str, str]: | |
| return attrs | ||
|
|
||
|
|
||
| _YENC_DECODE_TABLE = bytes((i - 42) % 256 for i in range(256)) | ||
|
|
||
|
|
||
| def _decode_yenc_lines(lines: Iterable[bytes]) -> bytes: | ||
| decoded = bytearray() | ||
| for line in lines: | ||
| index = 0 | ||
| while index < len(line): | ||
| byte = line[index] | ||
| if byte == 61: | ||
| index += 1 | ||
| if index >= len(line): | ||
| raise ValueError("dangling yEnc escape") | ||
| byte = (line[index] - 64) % 256 | ||
| decoded.append((byte - 42) % 256) | ||
| index += 1 | ||
| return bytes(decoded) | ||
| """ | ||
| Decodes yEnc-encoded lines using C-backed bytes methods for significant performance gain. | ||
| Expects ~8-10x speedup by avoiding Python byte-by-byte iteration. | ||
| """ | ||
| data = b"".join(lines) | ||
| if b"=" not in data: | ||
| return data.translate(_YENC_DECODE_TABLE) | ||
|
|
||
| chunks = [] | ||
| start = 0 | ||
| while True: | ||
| idx = data.find(b"=", start) | ||
| if idx == -1: | ||
| chunks.append(data[start:].translate(_YENC_DECODE_TABLE)) | ||
| break | ||
| chunks.append(data[start:idx].translate(_YENC_DECODE_TABLE)) | ||
| if idx + 1 >= len(data): | ||
| raise ValueError("dangling yEnc escape") | ||
| escaped_byte = data[idx + 1] | ||
| chunks.append(bytes([(escaped_byte - 106) % 256])) | ||
| start = idx + 2 | ||
| return b"".join(chunks) | ||
|
Comment on lines
+126
to
+143
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Preserve dangling-escape validation at original line boundaries.
Proposed fix def _decode_yenc_lines(lines: Iterable[bytes]) -> bytes:
@@
- data = b"".join(lines)
+ buffered_lines: list[bytes] = []
+ for line in lines:
+ if line.endswith(b"="):
+ raise ValueError("dangling yEnc escape")
+ buffered_lines.append(line)
+
+ data = b"".join(buffered_lines)π€ Prompt for AI Agents |
||
|
|
||
|
|
||
| def validate_yenc_body(lines: Iterable[bytes | str]) -> YencValidationResult: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because
validate_yenc_bodypasses the physical NNTP body lines after stripping CRLF, joining all data lines before scanning lets an escape marker at the end of one yEnc line consume the first byte of the next line. yEnc line wrapping must keep the escape marker and escaped byte on the same line, and the previous implementation rejected this asdangling yEnc escape; with inputs like[b"abc=", b"def"]this now decodes across the boundary and can report a malformed article as valid if the advertised size/CRC match the mis-decoded bytes.Useful? React with πΒ / π.