diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst index 425dff8f2a9ad1..a02fdc1e8a0080 100644 --- a/Doc/library/base64.rst +++ b/Doc/library/base64.rst @@ -76,8 +76,8 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b64decode(s, altchars=None, validate=False, *, padded=True) - b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True) +.. function:: b64decode(s, altchars=None, validate=False, *, padded=True, canonical=False) + b64decode(s, altchars=None, validate=True, *, ignorechars, padded=True, canonical=False) Decode the Base64 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -112,10 +112,13 @@ POST request. If *validate* is true, these non-alphabet characters in the input result in a :exc:`binascii.Error`. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base64` for details. + For more information about the strict base64 check, see :func:`binascii.a2b_base64` .. versionchanged:: 3.15 - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. deprecated:: 3.15 Accepting the ``+`` and ``/`` characters with an alternative alphabet @@ -179,7 +182,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'') +.. function:: b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', canonical=False) Decode the Base32 encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. @@ -203,12 +206,15 @@ POST request. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-zero padding bits are rejected. + See :func:`binascii.a2b_base32` for details. + A :exc:`binascii.Error` is raised if *s* is incorrectly padded or if there are non-alphabet characters present in the input. .. versionchanged:: next - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. function:: b32hexencode(s, *, padded=True, wrapcol=0) @@ -222,7 +228,7 @@ POST request. Added the *padded* and *wrapcol* parameters. -.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'') +.. function:: b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', canonical=False) Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defined in :rfc:`4648`. @@ -235,7 +241,7 @@ POST request. .. versionadded:: 3.10 .. versionchanged:: next - Added the *ignorechars* and *padded* parameters. + Added the *ignorechars*, *padded*, and *canonical* parameters. .. function:: b16encode(s, *, wrapcol=0) @@ -315,7 +321,7 @@ Refer to the documentation of the individual functions for more information. .. versionadded:: 3.4 -.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v') +.. function:: a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', canonical=False) Decode the Ascii85 encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. @@ -332,8 +338,16 @@ Refer to the documentation of the individual functions for more information. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_ascii85` for details. + .. versionadded:: 3.4 + .. versionchanged:: next + Added the *canonical* parameter. + Single-character final groups are now always rejected as encoding + violations. + .. function:: b85encode(b, pad=False, *, wrapcol=0) @@ -353,7 +367,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: b85decode(b, *, ignorechars=b'') +.. function:: b85decode(b, *, ignorechars=b'', canonical=False) Decode the base85-encoded :term:`bytes-like object` or ASCII string *b* and return the decoded :class:`bytes`. Padding is implicitly removed, if @@ -362,10 +376,15 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.4 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *canonical* parameters. + Single-character final groups are now always rejected as encoding + violations. .. function:: z85encode(s, pad=False, *, wrapcol=0) @@ -390,7 +409,7 @@ Refer to the documentation of the individual functions for more information. Added the *wrapcol* parameter. -.. function:: z85decode(s, *, ignorechars=b'') +.. function:: z85decode(s, *, ignorechars=b'', canonical=False) Decode the Z85-encoded :term:`bytes-like object` or ASCII string *s* and return the decoded :class:`bytes`. See `Z85 specification @@ -399,10 +418,15 @@ Refer to the documentation of the individual functions for more information. *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings are rejected. + See :func:`binascii.a2b_base85` for details. + .. versionadded:: 3.13 .. versionchanged:: next - Added the *ignorechars* parameter. + Added the *ignorechars* and *canonical* parameters. + Single-character final groups are now always rejected as encoding + violations. .. _base64-legacy: diff --git a/Doc/library/binascii.rst b/Doc/library/binascii.rst index 4f2edb7eff8a8f..154ff770f73710 100644 --- a/Doc/library/binascii.rst +++ b/Doc/library/binascii.rst @@ -48,8 +48,8 @@ The :mod:`!binascii` module defines the following functions: Added the *backtick* parameter. -.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False) - a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True) +.. function:: a2b_base64(string, /, *, padded=True, alphabet=BASE64_ALPHABET, strict_mode=False, canonical=False) + a2b_base64(string, /, *, ignorechars, padded=True, alphabet=BASE64_ALPHABET, strict_mode=True, canonical=False) Convert a block of base64 data back to binary and return the binary data. More than one line may be passed at a time. @@ -80,11 +80,15 @@ The :mod:`!binascii` module defines the following functions: * Contains no excess data after padding (including excess padding, newlines, etc.). * Does not start with a padding. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. This check is independent of *strict_mode*. + .. versionchanged:: 3.11 Added the *strict_mode* parameter. .. versionchanged:: 3.15 - Added the *alphabet*, *ignorechars* and *padded* parameters. + Added the *alphabet*, *ignorechars*, *padded*, and *canonical* parameters. .. function:: b2a_base64(data, *, padded=True, alphabet=BASE64_ALPHABET, wrapcol=0, newline=True) @@ -110,16 +114,17 @@ The :mod:`!binascii` module defines the following functions: Added the *alphabet*, *padded* and *wrapcol* parameters. -.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'') +.. function:: a2b_ascii85(string, /, *, foldspaces=False, adobe=False, ignorechars=b'', canonical=False) Convert Ascii85 data back to binary and return the binary data. Valid Ascii85 data contains characters from the Ascii85 alphabet in groups - of five (except for the final group, which may have from two to five + of five (except for the final group, which may have from two to four characters). Each group encodes 32 bits of binary data in the range from ``0`` to ``2 ** 32 - 1``, inclusive. The special character ``z`` is accepted as a short form of the group ``!!!!!``, which encodes four - consecutive null bytes. + consecutive null bytes. A single-character final group is always rejected + as an encoding violation. *foldspaces* is a flag that specifies whether the 'y' short sequence should be accepted as shorthand for 4 consecutive spaces (ASCII 0x20). @@ -132,10 +137,20 @@ The :mod:`!binascii` module defines the following functions: to ignore from the input. This should only contain whitespace characters. + If *canonical* is true, non-canonical encodings are rejected with + :exc:`binascii.Error`. This enforces that the ``z`` abbreviation is used + for all-zero groups (rather than ``!!!!!``), and that partial final groups + use the same padding digits the encoder would produce. + Invalid Ascii85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 + .. versionchanged:: next + Single-character final groups are now always rejected as encoding + violations. Previously they were silently ignored, producing no output + bytes. + .. function:: b2a_ascii85(data, /, *, foldspaces=False, wrapcol=0, pad=False, adobe=False) @@ -160,15 +175,16 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'') +.. function:: a2b_base85(string, /, *, alphabet=BASE85_ALPHABET, ignorechars=b'', canonical=False) Convert Base85 data back to binary and return the binary data. More than one line may be passed at a time. Valid Base85 data contains characters from the Base85 alphabet in groups - of five (except for the final group, which may have from two to five + of five (except for the final group, which may have from two to four characters). Each group encodes 32 bits of binary data in the range from - ``0`` to ``2 ** 32 - 1``, inclusive. + ``0`` to ``2 ** 32 - 1``, inclusive. A single-character final group is + always rejected as an encoding violation. Optional *alphabet* must be a :class:`bytes` object of length 85 which specifies an alternative alphabet. @@ -176,10 +192,19 @@ The :mod:`!binascii` module defines the following functions: *ignorechars* should be a :term:`bytes-like object` containing characters to ignore from the input. + If *canonical* is true, non-canonical encodings in partial final groups + are rejected with :exc:`binascii.Error`. This enforces that the padding + digits match what the encoder would produce. + Invalid Base85 data will raise :exc:`binascii.Error`. .. versionadded:: 3.15 + .. versionchanged:: next + Single-character final groups are now always rejected as encoding + violations. Previously they were silently ignored, producing no output + bytes. + .. function:: b2a_base85(data, /, *, alphabet=BASE85_ALPHABET, wrapcol=0, pad=False) @@ -199,7 +224,7 @@ The :mod:`!binascii` module defines the following functions: .. versionadded:: 3.15 -.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'') +.. function:: a2b_base32(string, /, *, padded=True, alphabet=BASE32_ALPHABET, ignorechars=b'', canonical=False) Convert base32 data back to binary and return the binary data. @@ -228,6 +253,10 @@ The :mod:`!binascii` module defines the following functions: presented before the end of the encoded data and the excess pad characters will be ignored. + If *canonical* is true, non-zero padding bits in the last group are rejected + with :exc:`binascii.Error`, enforcing canonical encoding as defined in + :rfc:`4648` section 3.5. + Invalid base32 data will raise :exc:`binascii.Error`. .. versionadded:: next diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index beae65213a27b6..4fd42185d8a4a1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1636,6 +1636,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cancel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(canonical)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capath)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capitals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(category)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index bb1c6dbaf03906..f2d43c22069b92 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -359,6 +359,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(callable) STRUCT_FOR_ID(callback) STRUCT_FOR_ID(cancel) + STRUCT_FOR_ID(canonical) STRUCT_FOR_ID(capath) STRUCT_FOR_ID(capitals) STRUCT_FOR_ID(category) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 64b029797ab9b3..6ee64a461d8568 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1634,6 +1634,7 @@ extern "C" { INIT_ID(callable), \ INIT_ID(callback), \ INIT_ID(cancel), \ + INIT_ID(canonical), \ INIT_ID(capath), \ INIT_ID(capitals), \ INIT_ID(category), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 461ee36dcebb6d..bcb117e1091674 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1216,6 +1216,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(canonical); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(capath); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/base64.py b/Lib/base64.py index a94bec4d031c52..3a60e358ee285f 100644 --- a/Lib/base64.py +++ b/Lib/base64.py @@ -68,7 +68,7 @@ def b64encode(s, altchars=None, *, padded=True, wrapcol=0): def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, - *, padded=True, ignorechars=_NOT_SPECIFIED): + *, padded=True, ignorechars=_NOT_SPECIFIED, canonical=False): """Decode the Base64 encoded bytes-like object or ASCII string s. Optional altchars must be a bytes-like object or ASCII string of length 2 @@ -110,11 +110,13 @@ def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, alphabet = binascii.BASE64_ALPHABET[:-2] + altchars return binascii.a2b_base64(s, strict_mode=validate, alphabet=alphabet, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if ignorechars is _NOT_SPECIFIED: ignorechars = b'' result = binascii.a2b_base64(s, strict_mode=validate, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) if badchar is not None: import warnings if validate: @@ -230,7 +232,8 @@ def b32encode(s, *, padded=True, wrapcol=0): return binascii.b2a_base32(s, padded=padded, wrapcol=wrapcol) b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32') -def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): +def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # Handle section 2.4 zero and one mapping. The flag map01 will be either # False, or the character to map the digit 1 (one) to. It should be @@ -241,7 +244,8 @@ def b32decode(s, casefold=False, map01=None, *, padded=True, ignorechars=b''): s = s.translate(bytes.maketrans(b'01', b'O' + map01)) if casefold: s = s.upper() - return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars) + return binascii.a2b_base32(s, padded=padded, ignorechars=ignorechars, + canonical=canonical) b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32', extra_args=_B32_DECODE_MAP01_DOCSTRING) @@ -250,13 +254,15 @@ def b32hexencode(s, *, padded=True, wrapcol=0): alphabet=binascii.BASE32HEX_ALPHABET) b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex') -def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b''): +def b32hexdecode(s, casefold=False, *, padded=True, ignorechars=b'', + canonical=False): s = _bytes_from_decode_data(s) # base32hex does not have the 01 mapping if casefold: s = s.upper() return binascii.a2b_base32(s, alphabet=binascii.BASE32HEX_ALPHABET, - padded=padded, ignorechars=ignorechars) + padded=padded, ignorechars=ignorechars, + canonical=canonical) b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex', extra_args='') @@ -324,7 +330,8 @@ def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): return binascii.b2a_ascii85(b, foldspaces=foldspaces, adobe=adobe, wrapcol=wrapcol, pad=pad) -def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): +def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v', + canonical=False): """Decode the Ascii85 encoded bytes-like object or ASCII string b. foldspaces is a flag that specifies whether the 'y' short sequence should be @@ -338,10 +345,13 @@ def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): input. This should only contain whitespace characters, and by default contains all whitespace characters in ASCII. + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ return binascii.a2b_ascii85(b, foldspaces=foldspaces, - adobe=adobe, ignorechars=ignorechars) + adobe=adobe, ignorechars=ignorechars, + canonical=canonical) def b85encode(b, pad=False, *, wrapcol=0): """Encode bytes-like object b in base85 format and return a bytes object. @@ -354,12 +364,15 @@ def b85encode(b, pad=False, *, wrapcol=0): """ return binascii.b2a_base85(b, wrapcol=wrapcol, pad=pad) -def b85decode(b, *, ignorechars=b''): +def b85decode(b, *, ignorechars=b'', canonical=False): """Decode the base85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(b, ignorechars=ignorechars) + return binascii.a2b_base85(b, ignorechars=ignorechars, + canonical=canonical) def z85encode(s, pad=False, *, wrapcol=0): """Encode bytes-like object b in z85 format and return a bytes object. @@ -373,12 +386,15 @@ def z85encode(s, pad=False, *, wrapcol=0): return binascii.b2a_base85(s, wrapcol=wrapcol, pad=pad, alphabet=binascii.Z85_ALPHABET) -def z85decode(s, *, ignorechars=b''): +def z85decode(s, *, ignorechars=b'', canonical=False): """Decode the z85-encoded bytes-like object or ASCII string b + If canonical is true, non-canonical encodings are rejected. + The result is returned as a bytes object. """ - return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, ignorechars=ignorechars) + return binascii.a2b_base85(s, alphabet=binascii.Z85_ALPHABET, + ignorechars=ignorechars, canonical=canonical) # Legacy interface. This code could be cleaned up since I don't believe # binascii has any line length limitations. It just doesn't seem worth it diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 81cdacb96241e2..6991e2ef6815e3 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -383,6 +383,49 @@ def assertInvalidLength(data, strict_mode=True): assertInvalidLength(b'A\tB\nC ??DE', # only 5 valid characters strict_mode=False) + def test_base64_canonical(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB==')), b'\x00') + self.assertEqual(binascii.a2b_base64(self.type2test(b'AB=='), + strict_mode=True), b'\x00') + + # 2 data chars + "==": last char has 4 padding bits + # 'A' = 0, 'B' = 1 -> leftover 0001 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AB=='), canonical=True) + # 'A' = 0, 'P' = 15 -> leftover 1111 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AP=='), canonical=True) + + # 3 data chars + "=": last char has 2 padding bits + # 'A' = 0, 'A' = 0, 'B' = 1 -> leftover 01 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAB='), canonical=True) + # 'A' = 0, 'A' = 0, 'D' = 3 -> leftover 11 (non-zero) + with self.assertRaises(binascii.Error): + binascii.a2b_base64(self.type2test(b'AAD='), canonical=True) + + # Verify that zero padding bits are accepted + binascii.a2b_base64(self.type2test(b'AA=='), canonical=True) + binascii.a2b_base64(self.type2test(b'AAA='), canonical=True) + + # Full quads with no padding have no leftover bits -- always valid + binascii.a2b_base64(self.type2test(b'AAAA'), canonical=True) + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base64_canonical_roundtrip(self, payload): + # The encoder must always produce canonical output. + encoded = binascii.b2a_base64(payload, newline=False) + decoded = binascii.a2b_base64(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_base64_alphabet(self): alphabet = (b'!"#$%&\'()*+,-012345689@' b'ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr') @@ -443,20 +486,22 @@ def test_ascii85_valid(self): res += b self.assertEqual(res, rawdata) - # Test decoding inputs with length 1 mod 5 - params = [ - (b"a", False, False, b"", b""), - (b"xbw", False, False, b"wx", b""), - (b"<~c~>", False, True, b"", b""), - (b"{d ~>", False, True, b" {", b""), - (b"ye", True, False, b"", b" "), - (b"z\x01y\x00f", True, False, b"\x00\x01", b"\x00\x00\x00\x00 "), - (b"<~FCfN8yg~>", True, True, b"", b"test "), - (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03", b"tset\x00\x00\x00\x00test "), + # Inputs with length 1 mod 5 end with a 1-char group, which is + # an encoding violation per the PLRM spec. + error_params = [ + (b"a", False, False, b""), + (b"xbw", False, False, b"wx"), + (b"<~c~>", False, True, b""), + (b"{d ~>", False, True, b" {"), + (b"ye", True, False, b""), + (b"z\x01y\x00f", True, False, b"\x00\x01"), + (b"<~FCfN8yg~>", True, True, b""), + (b"FE;\x03#8zFCf\x02N8yh~>", True, True, b"\x02\x03"), ] - for a, foldspaces, adobe, ignorechars, b in params: + for a, foldspaces, adobe, ignorechars in error_params: kwargs = {"foldspaces": foldspaces, "adobe": adobe, "ignorechars": ignorechars} - self.assertEqual(binascii.a2b_ascii85(self.type2test(a), **kwargs), b) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(self.type2test(a), **kwargs) def test_ascii85_invalid(self): # Test Ascii85 with invalid characters interleaved @@ -670,16 +715,18 @@ def test_base85_valid(self): self.assertEqual(res, self.rawdata) # Test decoding inputs with different length - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') - self.assertEqual(binascii.a2b_base85(self.type2test(b'a')), b'') + # 1-char groups are rejected (encoding violation) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'a')) self.assertEqual(binascii.a2b_base85(self.type2test(b'ab')), b'q') self.assertEqual(binascii.a2b_base85(self.type2test(b'abc')), b'qa') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcd')), b'qa\x9e') self.assertEqual(binascii.a2b_base85(self.type2test(b'abcde')), b'qa\x9e\xb6') - self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdef')), - b'qa\x9e\xb6') + # 6-char input = full 5-char group + trailing 1-char group (rejected) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(self.type2test(b'abcdef')) self.assertEqual(binascii.a2b_base85(self.type2test(b'abcdefg')), b'qa\x9e\xb6\x81') @@ -767,6 +814,169 @@ def test_base85_alphabet(self): with self.assertRaises(TypeError): binascii.a2b_base64(data, alphabet=bytearray(alphabet)) + def test_base85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_base85(b'VF'), b'a') + + # 1-char partial groups are always rejected (encoding violation: + # no conforming encoder produces them) + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'V') + with self.assertRaises(binascii.Error): + binascii.a2b_base85(b'0') + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_base85(data) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Test non-canonical rejection for each partial group size + # (2-char/1-byte, 3-char/2-byte, 4-char/3-byte). + # Incrementing the last digit by 1 produces a non-canonical + # encoding. For 4-char groups (n_pad=1) a +1 can change the + # output byte, so we use b'ab\x00' whose canonical form allows + # a +1 that still decodes to the same 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_base85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + # Same decoded output without canonical check + self.assertEqual(binascii.a2b_base85(non_canonical), data) + # Rejected with canonical=True + with self.assertRaises(binascii.Error): + binascii.a2b_base85(non_canonical, canonical=True) + + # Boundary bytes: \x00 and \xff for each partial group size + for data in [b'\x00', b'\x00\x00', b'\x00\x00\x00', + b'\xff', b'\xff\xff', b'\xff\xff\xff']: + canonical_enc = binascii.b2a_base85(data) + binascii.a2b_base85(canonical_enc, canonical=True) + + # Full 5-char groups are always canonical (no padding bits) + self.assertEqual( + binascii.a2b_base85(b'VPa!s', canonical=True), b'abcd') + + # Empty input is valid + self.assertEqual(binascii.a2b_base85(b'', canonical=True), b'') + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base85(payload) + decoded = binascii.a2b_base85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_base85_canonical_unique(self, payload): + # For a partial group, sweeping all 85 last-digit values should + # yield exactly one encoding that both decodes to the original + # payload AND passes canonical=True. + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_base85(payload) + table = binascii.BASE85_ALPHABET + accepted = [] + for digit in table: + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_base85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + + def test_ascii85_canonical(self): + # Non-canonical encodings are accepted without canonical=True + self.assertEqual(binascii.a2b_ascii85(b'@0'), b'a') + + # 1-char partial groups are always rejected (PLRM encoding violation) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'@') + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!') + + # Verify round-trip: encode then decode with canonical=True works + for data in [b'a', b'ab', b'abc', b'abcd', b'abcde', + b'\x00', b'\xff', b'\x00\x00', b'\xff\xff\xff']: + encoded = binascii.b2a_ascii85(data) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, data) + + # Test non-canonical rejection for each partial group size. + # See test_base85_canonical for why b'ab\x00' is used for 3 bytes. + for data in [b'a', b'ab', b'ab\x00']: + canonical_enc = binascii.b2a_ascii85(data) + non_canonical = (canonical_enc[:-1] + + bytes([canonical_enc[-1] + 1])) + self.assertEqual(binascii.a2b_ascii85(non_canonical), data) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(non_canonical, canonical=True) + + # Full 5-char groups are always canonical + self.assertEqual( + binascii.a2b_ascii85(b'@:E_W', canonical=True), b'abcd') + + # 'z' is the canonical form for all-zero groups per the PLRM. + # '!!!!!' decodes identically but is non-canonical. + self.assertEqual(binascii.a2b_ascii85(b'!!!!!'), b'\x00' * 4) + self.assertEqual(binascii.a2b_ascii85(b'z'), b'\x00' * 4) + self.assertEqual( + binascii.a2b_ascii85(b'z', canonical=True), b'\x00' * 4) + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'!!!!!', canonical=True) + # Multiple groups: z + !!!!! should fail + with self.assertRaises(binascii.Error): + binascii.a2b_ascii85(b'z!!!!!', canonical=True) + # Multiple z groups are fine + self.assertEqual( + binascii.a2b_ascii85(b'zz', canonical=True), b'\x00' * 8) + + # Empty input is valid + self.assertEqual(binascii.a2b_ascii85(b'', canonical=True), b'') + + # Adobe-wrapped with canonical + self.assertEqual( + binascii.a2b_ascii85(b'<~@:E_W~>', canonical=True, adobe=True), + b'abcd') + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\x00\x00\x00\x00') # triggers z abbreviation + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_ascii85_canonical_roundtrip(self, payload): + encoded = binascii.b2a_ascii85(payload) + decoded = binascii.a2b_ascii85(encoded, canonical=True) + self.assertEqual(decoded, payload) + + @hypothesis.given(payload=hypothesis.strategies.binary(min_size=1, max_size=3)) + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff') + @hypothesis.example(b'ab\x00') + def test_ascii85_canonical_unique(self, payload): + hypothesis.assume(len(payload) % 4 != 0) + canonical_enc = binascii.b2a_ascii85(payload) + # Ascii85 alphabet: '!' (33) through 'u' (117) + accepted = [] + for digit in range(33, 118): + candidate = canonical_enc[:-1] + bytes([digit]) + try: + result = binascii.a2b_ascii85(candidate, canonical=True) + if result == payload: + accepted.append(candidate) + except binascii.Error: + pass + self.assertEqual(accepted, [canonical_enc]) + def test_base32_valid(self): # Test base32 with valid data lines = [] @@ -935,6 +1145,55 @@ def assertInvalidLength(data, *args, length=None, **kwargs): assertInvalidLength(b" ABC=====", ignorechars=b' ') assertInvalidLength(b" ABCDEF==", ignorechars=b' ') + def test_base32_canonical(self): + # https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + # Decoders MAY reject encoded data if the pad bits are not zero. + + # Without canonical=True, non-zero padding bits are accepted + self.assertEqual(binascii.a2b_base32(self.type2test(b'AB======')), + b'\x00') + + # 2 data chars + "======": last char has 2 padding bits + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AB======'), canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AD======'), canonical=True) + + # 4 data chars + "====": last char has 4 padding bits + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAB===='), canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAP===='), canonical=True) + + # 5 data chars + "===": last char has 1 padding bit + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAB==='), canonical=True) + + # 7 data chars + "=": last char has 3 padding bits + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAB='), canonical=True) + with self.assertRaises(binascii.Error): + binascii.a2b_base32(self.type2test(b'AAAAAAH='), canonical=True) + + # Verify that zero padding bits are accepted + binascii.a2b_base32(self.type2test(b'AA======'), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAA===='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAA==='), canonical=True) + binascii.a2b_base32(self.type2test(b'AAAAAAA='), canonical=True) + + # Full octet with no padding -- always valid + binascii.a2b_base32(self.type2test(b'AAAAAAAA'), canonical=True) + + @hypothesis.given(payload=hypothesis.strategies.binary()) + @hypothesis.example(b'') + @hypothesis.example(b'\x00') + @hypothesis.example(b'\xff\xff') + @hypothesis.example(b'abc') + def test_base32_canonical_roundtrip(self, payload): + encoded = binascii.b2a_base32(payload) + decoded = binascii.a2b_base32(encoded, canonical=True) + self.assertEqual(decoded, payload) + def test_a2b_base32_padded(self): a2b_base32 = binascii.a2b_base32 t = self.type2test diff --git a/Modules/binascii.c b/Modules/binascii.c index 9193137877aef9..da91280a355440 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -244,6 +244,9 @@ static const _Py_ALIGNED_DEF(64, unsigned char) table_b2a_base85_a85[] = #define BASE85_A85_Z 0x00000000 #define BASE85_A85_Y 0x20202020 +/* 85**0 through 85**4, used for canonical encoding checks. */ +static const uint32_t pow85[] = {1, 85, 7225, 614125, 52200625}; + static const _Py_ALIGNED_DEF(64, unsigned char) table_a2b_base32[] = { -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, @@ -729,6 +732,8 @@ binascii.a2b_base64 ignorechars: Py_buffer = NULL A byte string containing characters to ignore from the input when strict_mode is true. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base64 data. [clinic start generated code]*/ @@ -736,8 +741,8 @@ Decode a line of base64 data. static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars) -/*[clinic end generated code: output=525d840a299ff132 input=74a53dd3b23474b3]*/ + Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=77c46dcbf4239527 input=c99096d071deeec8]*/ { assert(data->len >= 0); @@ -909,6 +914,16 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, goto error_end; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (canonical && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error_end; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); @@ -1037,14 +1052,16 @@ binascii.a2b_ascii85 Expect data to be wrapped in '<~' and '~>' as in Adobe Ascii85. ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings. Decode Ascii85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars) -/*[clinic end generated code: output=599aa3e41095a651 input=f39abd11eab4bac0]*/ + int adobe, Py_buffer *ignorechars, int canonical) +/*[clinic end generated code: output=09b35f1eac531357 input=dd050604ed30199e]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1107,6 +1124,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, uint32_t leftchar = 0; int group_pos = 0; + int from_z = 0; /* true when current group came from 'z' shorthand */ for (; ascii_len > 0 || group_pos != 0; ascii_len--, ascii_data++) { /* Shift (in radix-85) data or padding into our buffer. */ unsigned char this_digit; @@ -1142,6 +1160,7 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, goto error; } leftchar = this_ch == 'y' ? BASE85_A85_Y : BASE85_A85_Z; + from_z = (this_ch == 'z'); group_pos = 5; } else if (!ignorechar(this_ch, ignorechars, ignorecache)) { @@ -1159,11 +1178,62 @@ binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A final partial 5-tuple containing only one character is an + * encoding violation per the PLRM spec; reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Ascii85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + if (canonical) { + /* The PLRM spec requires all-zero groups to use the 'z' + * abbreviation. Reject '!!!!!' (five zero digits). */ + if (chunk_len == 4 && leftchar == 0 && !from_z) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-canonical encoding, " + "use 'z' for all-zero groups"); + } + goto error; + } + /* Reject non-canonical partial groups. + * + * A partial group of N chars (2-4) encodes N-1 bytes. + * The decoder pads missing chars with digit 84 (the max). + * The encoder produces the unique N chars for those bytes + * by zero-padding the bytes to a uint32 and taking the + * leading N base-85 digits. Two encodings are equivalent + * iff they yield the same quotient when divided by + * 85**(5-N). */ + if (chunk_len < 4) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + } + + from_z = 0; group_pos = 0; leftchar = 0; } @@ -1315,14 +1385,17 @@ binascii.a2b_base85 alphabet: PyBytesObject(c_default="NULL") = BASE85_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-canonical encodings. Decode a line of Base85 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=6a8d6eae798818d7 input=04d72a319712bdf3]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=90dfef0c6b51e5f3 input=2819dc8aeffee5a2]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1403,11 +1476,41 @@ binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, } /* Write current chunk. */ - Py_ssize_t chunk_len = ascii_len < 1 ? 3 + ascii_len : 4; - for (Py_ssize_t i = 0; i < chunk_len; i++) { + int chunk_len = ascii_len < 1 ? 3 + (int)ascii_len : 4; + + /* A 1-char final group is an encoding violation (no conforming + * encoder produces it); reject unconditionally. */ + if (chunk_len == 0) { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Incomplete Base85 group"); + } + goto error; + } + + for (int i = 0; i < chunk_len; i++) { *bin_data++ = (leftchar >> (24 - 8 * i)) & 0xff; } + /* Reject non-canonical encodings in the final group. + * See the comment in a2b_ascii85 for the full explanation. */ + if (canonical && chunk_len < 4) { + int n_pad = 4 - chunk_len; + uint32_t canonical_top = + (leftchar >> (n_pad * 8)) << (n_pad * 8); + if (canonical_top / pow85[n_pad] + != leftchar / pow85[n_pad]) + { + state = get_binascii_state(module); + if (state != NULL) { + PyErr_SetString(state->Error, + "Non-zero padding bits"); + } + goto error; + } + } + group_pos = 0; leftchar = 0; } @@ -1535,14 +1638,17 @@ binascii.a2b_base32 alphabet: PyBytesObject(c_default="NULL") = BASE32_ALPHABET ignorechars: Py_buffer = b'' A byte string containing characters to ignore from the input. + canonical: bool = False + When set to true, reject non-zero padding bits per RFC 4648 section 3.5. Decode a line of base32 data. [clinic start generated code]*/ static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars) -/*[clinic end generated code: output=7dbbaa816d956b1c input=07a3721acdf9b688]*/ + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical) +/*[clinic end generated code: output=bc70f2bb6001fb55 input=5bfe6d1ea2f30e3b]*/ { const unsigned char *ascii_data = data->buf; Py_ssize_t ascii_len = data->len; @@ -1723,6 +1829,16 @@ binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, goto error; } + /* https://datatracker.ietf.org/doc/html/rfc4648.html#section-3.5 + * Decoders MAY reject non-zero padding bits. */ + if (canonical && leftchar != 0) { + state = get_binascii_state(module); + if (state) { + PyErr_SetString(state->Error, "Non-zero padding bits"); + } + goto error; + } + Py_XDECREF(table_obj); return PyBytesWriter_FinishWithPointer(writer, bin_data); diff --git a/Modules/clinic/binascii.c.h b/Modules/clinic/binascii.c.h index 0a2d33c428d10a..ed695758ef998c 100644 --- a/Modules/clinic/binascii.c.h +++ b/Modules/clinic/binascii.c.h @@ -119,7 +119,7 @@ binascii_b2a_uu(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(binascii_a2b_base64__doc__, "a2b_base64($module, data, /, *, strict_mode=,\n" " padded=True, alphabet=BASE64_ALPHABET,\n" -" ignorechars=)\n" +" ignorechars=, canonical=False)\n" "--\n" "\n" "Decode a line of base64 data.\n" @@ -132,7 +132,9 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, " When set to false, padding in input is not required.\n" " ignorechars\n" " A byte string containing characters to ignore from the input when\n" -" strict_mode is true."); +" strict_mode is true.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE64_METHODDEF \ {"a2b_base64", _PyCFunction_CAST(binascii_a2b_base64), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base64__doc__}, @@ -140,7 +142,7 @@ PyDoc_STRVAR(binascii_a2b_base64__doc__, static PyObject * binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode, int padded, PyBytesObject *alphabet, - Py_buffer *ignorechars); + Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -148,7 +150,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 4 + #define NUM_KEYWORDS 5 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -157,7 +159,7 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(strict_mode), &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -166,20 +168,21 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "strict_mode", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base64", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[5]; + PyObject *argsbuf[6]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int strict_mode = -1; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {NULL, NULL}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -220,11 +223,20 @@ binascii_a2b_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[4]) { + if (PyObject_GetBuffer(args[4], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[5]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base64_impl(module, &data, strict_mode, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -352,7 +364,7 @@ binascii_b2a_base64(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_ascii85__doc__, "a2b_ascii85($module, data, /, *, foldspaces=False, adobe=False,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode Ascii85 data.\n" @@ -362,14 +374,16 @@ PyDoc_STRVAR(binascii_a2b_ascii85__doc__, " adobe\n" " Expect data to be wrapped in \'<~\' and \'~>\' as in Adobe Ascii85.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_ASCII85_METHODDEF \ {"a2b_ascii85", _PyCFunction_CAST(binascii_a2b_ascii85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_ascii85__doc__}, static PyObject * binascii_a2b_ascii85_impl(PyObject *module, Py_buffer *data, int foldspaces, - int adobe, Py_buffer *ignorechars); + int adobe, Py_buffer *ignorechars, int canonical); static PyObject * binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -377,7 +391,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -386,7 +400,7 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(foldspaces), &_Py_ID(adobe), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -395,19 +409,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "foldspaces", "adobe", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_ascii85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int foldspaces = 0; int adobe = 0; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -438,11 +453,20 @@ binascii_a2b_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars); + return_value = binascii_a2b_ascii85_impl(module, &data, foldspaces, adobe, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -573,20 +597,23 @@ binascii_b2a_ascii85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyDoc_STRVAR(binascii_a2b_base85__doc__, "a2b_base85($module, data, /, *, alphabet=BASE85_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of Base85 data.\n" "\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-canonical encodings."); #define BINASCII_A2B_BASE85_METHODDEF \ {"a2b_base85", _PyCFunction_CAST(binascii_a2b_base85), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base85__doc__}, static PyObject * binascii_a2b_base85_impl(PyObject *module, Py_buffer *data, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -594,7 +621,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 2 + #define NUM_KEYWORDS 3 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -603,7 +630,7 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -612,18 +639,19 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base85", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[3]; + PyObject *argsbuf[4]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -646,11 +674,20 @@ binascii_a2b_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[2]) { + if (PyObject_GetBuffer(args[2], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[3]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars); + return_value = binascii_a2b_base85_impl(module, &data, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -768,7 +805,7 @@ binascii_b2a_base85(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyDoc_STRVAR(binascii_a2b_base32__doc__, "a2b_base32($module, data, /, *, padded=True, alphabet=BASE32_ALPHABET,\n" -" ignorechars=b\'\')\n" +" ignorechars=b\'\', canonical=False)\n" "--\n" "\n" "Decode a line of base32 data.\n" @@ -776,14 +813,17 @@ PyDoc_STRVAR(binascii_a2b_base32__doc__, " padded\n" " When set to false, padding in input is not required.\n" " ignorechars\n" -" A byte string containing characters to ignore from the input."); +" A byte string containing characters to ignore from the input.\n" +" canonical\n" +" When set to true, reject non-zero padding bits per RFC 4648 section 3.5."); #define BINASCII_A2B_BASE32_METHODDEF \ {"a2b_base32", _PyCFunction_CAST(binascii_a2b_base32), METH_FASTCALL|METH_KEYWORDS, binascii_a2b_base32__doc__}, static PyObject * binascii_a2b_base32_impl(PyObject *module, Py_buffer *data, int padded, - PyBytesObject *alphabet, Py_buffer *ignorechars); + PyBytesObject *alphabet, Py_buffer *ignorechars, + int canonical); static PyObject * binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -791,7 +831,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 3 + #define NUM_KEYWORDS 4 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -800,7 +840,7 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), }, + .ob_item = { &_Py_ID(padded), &_Py_ID(alphabet), &_Py_ID(ignorechars), &_Py_ID(canonical), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -809,19 +849,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", NULL}; + static const char * const _keywords[] = {"", "padded", "alphabet", "ignorechars", "canonical", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "a2b_base32", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[4]; + PyObject *argsbuf[5]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int padded = 1; PyBytesObject *alphabet = NULL; Py_buffer ignorechars = {.buf = "", .obj = NULL, .len = 0}; + int canonical = 0; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -853,11 +894,20 @@ binascii_a2b_base32(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P goto skip_optional_kwonly; } } - if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + if (args[3]) { + if (PyObject_GetBuffer(args[3], &ignorechars, PyBUF_SIMPLE) != 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + canonical = PyObject_IsTrue(args[4]); + if (canonical < 0) { goto exit; } skip_optional_kwonly: - return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars); + return_value = binascii_a2b_base32_impl(module, &data, padded, alphabet, &ignorechars, canonical); exit: /* Cleanup for data */ @@ -1634,4 +1684,4 @@ binascii_b2a_qp(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj return return_value; } -/*[clinic end generated code: output=2acab1ceb0058b1a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b41544f39b0ef681 input=a9049054013a1b77]*/