-
Notifications
You must be signed in to change notification settings - Fork 0
β‘ Bolt: Optimize yEnc decoding #28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -106,6 +106,9 @@ def normalize_message_id(message_id: str) -> str: | |
| return f"<{text.strip('<>')}>" | ||
|
|
||
|
|
||
| YENC_TRANSLATE_TABLE = bytes((i - 42) % 256 for i in range(256)) | ||
|
|
||
|
|
||
| def _parse_yenc_attrs(line: bytes) -> dict[str, str]: | ||
| attrs: dict[str, str] = {} | ||
| for token in line.decode("latin-1", errors="replace").split()[1:]: | ||
|
|
@@ -116,19 +119,20 @@ def _parse_yenc_attrs(line: bytes) -> dict[str, str]: | |
|
|
||
|
|
||
| def _decode_yenc_lines(lines: Iterable[bytes]) -> bytes: | ||
| data = b"".join(lines) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For large deep-check articles, Useful? React with πΒ / π. |
||
| decoded = bytearray() | ||
| for line in lines: | ||
| index = 0 | ||
| while index < len(line): | ||
| byte = line[index] | ||
| if byte == 61: | ||
| index += 1 | ||
| if index >= len(line): | ||
| raise ValueError("dangling yEnc escape") | ||
| byte = (line[index] - 64) % 256 | ||
| decoded.append((byte - 42) % 256) | ||
| index += 1 | ||
| return bytes(decoded) | ||
| start = 0 | ||
| while True: | ||
| idx = data.find(b"=", start) | ||
| if idx == -1: | ||
| decoded.extend(data[start:]) | ||
| break | ||
| decoded.extend(data[start:idx]) | ||
| if idx + 1 >= len(data): | ||
| raise ValueError("dangling yEnc escape") | ||
| decoded.append((data[idx + 1] - 64) % 256) | ||
| start = idx + 2 | ||
| return bytes(decoded.translate(YENC_TRANSLATE_TABLE)) | ||
|
|
||
|
|
||
| def validate_yenc_body(lines: Iterable[bytes | str]) -> YencValidationResult: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a deep-check body has an encoded data line ending in
=, joining all data lines first makes the decoder consume the first byte of the next physical line as the escape continuation. For example, data lines likeb"=",b"x"are decoded as one escaped byte instead of reportingdangling yEnc escape; if the article's size/CRC are computed for that synthesized byte, the corrupt body is reported asok. The previous per-line loop rejected this malformed yEnc, so the optimized path needs to preserve line-boundary escape checks while still using the faster operations.Useful? React with πΒ / π.