diff --git a/tornado/http1connection.py b/tornado/http1connection.py index 17ada11f6..f163bbaa1 100644 --- a/tornado/http1connection.py +++ b/tornado/http1connection.py @@ -657,12 +657,35 @@ async def _read_fixed_body( await ret async def _read_chunked_body(self, delegate: httputil.HTTPMessageDelegate) -> None: - # TODO: "chunk extensions" http://tools.ietf.org/html/rfc2616#section-3.6.1 + # Per RFC 7230 section 4.1.1, a chunk is `chunk-size [ chunk-ext ] CRLF + # chunk-data CRLF`. The chunk-size is hex digits; the chunk-ext is + # optional and starts with ';' followed by any number of ';'-separated + # `name[=value]` pairs. We only use the chunk-size, so the extension + # is dropped after a sanity check. Both with and without an extension + # share the same trailing CRLF, so we still strip the last two bytes + # of the read line. total_size = 0 while True: chunk_len_str = await self.stream.read_until(b"\r\n", max_bytes=64) + # Strip the trailing CRLF, then if a chunk extension is present + # ('5;ext=val' or '5;ext' or '5;ext=val;foo=bar'), take only the + # chunk-size prefix. Reject the line if it is empty or starts with + # ';' (no size, only an extension) or contains whitespace inside + # the size or extension portion, which would mask a smuggling + # attempt or a malformed client. + chunk_header = native_str(chunk_len_str[:-2]) + # Reject an empty chunk header or one that begins with ';' (no + # chunk-size, only an extension). A size with a trailing + # extension (`5;ext=val`) is valid; we just take the size prefix. + if not chunk_header or chunk_header.startswith(";"): + raise httputil.HTTPInputError("invalid chunk size") + chunk_size_str = chunk_header.partition(";")[0] + # The size must be all hex digits; an empty partition result + # (chunk header was just ';ext=val') is invalid. + if not chunk_size_str: + raise httputil.HTTPInputError("invalid chunk size") try: - chunk_len = parse_hex_int(native_str(chunk_len_str[:-2])) + chunk_len = parse_hex_int(chunk_size_str) except ValueError: raise httputil.HTTPInputError("invalid chunk size") if chunk_len == 0: diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py index 02d4f4c09..b47fcd12b 100644 --- a/tornado/test/httpserver_test.py +++ b/tornado/test/httpserver_test.py @@ -485,6 +485,52 @@ def test_chunked_request_body(self): bar 0 +""".replace(b"\n", b"\r\n")) + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(json_decode(response), {"foo": ["bar"]}) + + def test_chunked_request_body_bare_extension_rejected(self): + # A line that is only an extension (no size prefix) is still malformed + # and must raise HTTPInputError. We send `;ext=val` as the chunk header + # and expect a 400 with the standard "invalid chunk size" log message. + self.stream.write(b"""\ +POST /echo HTTP/1.1 +Host: 127.0.0.1 +Transfer-Encoding: chunked + +;ext=val +foo= +3 +bar +0 + +""".replace(b"\n", b"\r\n")) + with ExpectLog(gen_log, ".*invalid chunk size", level=logging.INFO): + start_line, headers, response = self.io_loop.run_sync( + lambda: read_stream_body(self.stream) + ) + self.assertEqual(400, start_line.code) + + def test_chunked_request_body_with_extensions(self): + # Per RFC 7230 section 4.1.1 a chunk may carry zero or more chunk + # extensions after the size: `chunk-size [ ";" chunk-ext ] CRLF`. + # Tornado previously raised HTTPInputError on `5;ext=val` because the + # size parser saw the full header. Accept the extension and parse + # only the size prefix; a bare extension (no size) is still rejected. + self.stream.write(b"""\ +POST /echo HTTP/1.1 +Host: 127.0.0.1 +Transfer-Encoding: chunked +Content-Type: application/x-www-form-urlencoded + +4;ext=val +foo= +3 +bar +0 + """.replace(b"\n", b"\r\n")) start_line, headers, response = self.io_loop.run_sync( lambda: read_stream_body(self.stream)