Skip to content

Commit b618655

Browse files
gpsheadclaude
andcommitted
Add hypothesis tests for canonical encoding
- Round-trip tests: encoder always produces canonical output (base64, base32, base85, ascii85) - Uniqueness tests: for base85/ascii85 partial groups, sweep all 85 last-digit values and verify exactly one decodes to the original payload with canonical=True Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 101edf6 commit b618655

File tree

1 file changed

+82
-0
lines changed

1 file changed

+82
-0
lines changed

Lib/test/test_binascii.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,17 @@ def test_base64_canonical(self):
415415
# Full quads with no padding have no leftover bits -- always valid
416416
binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True)
417417

418+
@hypothesis.given(payload=hypothesis.strategies.binary())
419+
@hypothesis.example(b'')
420+
@hypothesis.example(b'\x00')
421+
@hypothesis.example(b'\xff\xff')
422+
@hypothesis.example(b'abc')
423+
def test_base64_canonical_roundtrip(self, payload):
424+
# The encoder must always produce canonical output.
425+
encoded = binascii.b2a_base64(payload, newline=False)
426+
decoded = binascii.a2b_base64(encoded, canonical=True)
427+
self.assertEqual(decoded, payload)
428+
418429
def test_base64_alphabet(self):
419430
alphabet = (b'!"#$%&\'()*+,-012345689@'
420431
b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr')
@@ -848,6 +859,38 @@ def test_base85_canonical(self):
848859
# Empty input is valid
849860
self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'')
850861

862+
@hypothesis.given(payload=hypothesis.strategies.binary())
863+
@hypothesis.example(b'')
864+
@hypothesis.example(b'\x00')
865+
@hypothesis.example(b'\xff\xff')
866+
@hypothesis.example(b'abc')
867+
def test_base85_canonical_roundtrip(self, payload):
868+
encoded = binascii.b2a_base85(payload)
869+
decoded = binascii.a2b_base85(encoded, canonical=True)
870+
self.assertEqual(decoded, payload)
871+
872+
@hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3))
873+
@hypothesis.example(b'\x00')
874+
@hypothesis.example(b'\xff')
875+
@hypothesis.example(b'ab\x00')
876+
def test_base85_canonical_unique(self, payload):
877+
# For a partial group, sweeping all 85 last-digit values should
878+
# yield exactly one encoding that both decodes to the original
879+
# payload AND passes canonical=True.
880+
hypothesis.assume(len(payload) % 4 != 0)
881+
canonical_enc = binascii.b2a_base85(payload)
882+
table = binascii.BASE85_ALPHABET
883+
accepted = []
884+
for digit in table:
885+
candidate = canonical_enc[:-1] + bytes([digit])
886+
try:
887+
result = binascii.a2b_base85(candidate, canonical=True)
888+
if result == payload:
889+
accepted.append(candidate)
890+
except binascii.Error:
891+
pass
892+
self.assertEqual(accepted, [canonical_enc])
893+
851894
def test_ascii85_canonical(self):
852895
# Non-canonical encodings are accepted without canonical=True
853896
self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a')
@@ -889,6 +932,35 @@ def test_ascii85_canonical(self):
889932
binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True),
890933
b'abcd')
891934

935+
@hypothesis.given(payload=hypothesis.strategies.binary())
936+
@hypothesis.example(b'')
937+
@hypothesis.example(b'\x00')
938+
@hypothesis.example(b'\xff\xff')
939+
@hypothesis.example(b'abc')
940+
def test_ascii85_canonical_roundtrip(self, payload):
941+
encoded = binascii.b2a_ascii85(payload)
942+
decoded = binascii.a2b_ascii85(encoded, canonical=True)
943+
self.assertEqual(decoded, payload)
944+
945+
@hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3))
946+
@hypothesis.example(b'\x00')
947+
@hypothesis.example(b'\xff')
948+
@hypothesis.example(b'ab\x00')
949+
def test_ascii85_canonical_unique(self, payload):
950+
hypothesis.assume(len(payload) % 4 != 0)
951+
canonical_enc = binascii.b2a_ascii85(payload)
952+
# Ascii85 alphabet: '!' (33) through 'u' (117)
953+
accepted = []
954+
for digit in range(33, 118):
955+
candidate = canonical_enc[:-1] + bytes([digit])
956+
try:
957+
result = binascii.a2b_ascii85(candidate, canonical=True)
958+
if result == payload:
959+
accepted.append(candidate)
960+
except binascii.Error:
961+
pass
962+
self.assertEqual(accepted, [canonical_enc])
963+
892964
def test_base32_valid(self):
893965
# Test base32 with valid data
894966
lines = []
@@ -1096,6 +1168,16 @@ def test_base32_canonical(self):
10961168
# Full octet with no padding -- always valid
10971169
binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True)
10981170

1171+
@hypothesis.given(payload=hypothesis.strategies.binary())
1172+
@hypothesis.example(b'')
1173+
@hypothesis.example(b'\x00')
1174+
@hypothesis.example(b'\xff\xff')
1175+
@hypothesis.example(b'abc')
1176+
def test_base32_canonical_roundtrip(self, payload):
1177+
encoded = binascii.b2a_base32(payload)
1178+
decoded = binascii.a2b_base32(encoded, canonical=True)
1179+
self.assertEqual(decoded, payload)
1180+
10991181
def test_a2b_base32_padded(self):
11001182
a2b_base32 = binascii.a2b_base32
11011183
t = self.type2test

0 commit comments

Comments
 (0)