Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 27 additions & 2 deletions tornado/curl_httpclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,31 @@

curl_log = logging.getLogger("tornado.curl_httpclient")


def _curl_debug_to_unicode(debug_msg: str | bytes) -> str:
"""Convert a pycurl DEBUGFUNCTION debug_msg argument to str.

pycurl passes the raw bytes curl produced (request/response framing,
TLS metadata, header dumps, etc.). Those bytes are not guaranteed to be
valid UTF-8, but native_str (to_unicode) decodes as UTF-8 and
raises UnicodeDecodeError on invalid sequences -- which previously
crashed the request entirely (see issue #3183, where a proxy that
echoed back binary bytes from the upstream response triggered the
crash).

Latin-1 round-trips every possible byte 0x00-0xFF to the matching
U+0000-U+00FF code point without raising, so a non-UTF-8 debug
message is preserved as its byte sequence (rendered as the latin-1
characters) instead of killing the request. Valid UTF-8 is decoded
losslessly; non-UTF-8 bytes are preserved as the latin-1
character rather than swallowed by errors="replace", which would
be silent and harder to debug.
"""
if isinstance(debug_msg, bytes):
return native_str(debug_msg.decode("latin1"))
return native_str(debug_msg)


CR_OR_LF_RE = re.compile(b"\r|\n")


Expand Down Expand Up @@ -551,10 +576,10 @@ def _curl_header_callback(
def _curl_debug(self, debug_type: int, debug_msg: str) -> None:
debug_types = ("I", "<", ">", "<", ">")
if debug_type == 0:
debug_msg = native_str(debug_msg)
debug_msg = _curl_debug_to_unicode(debug_msg)
curl_log.debug("%s", debug_msg.strip())
elif debug_type in (1, 2):
debug_msg = native_str(debug_msg)
debug_msg = _curl_debug_to_unicode(debug_msg)
for line in debug_msg.splitlines():
curl_log.debug("%s %s", debug_types[debug_type], line)
elif debug_type == 4:
Expand Down
20 changes: 20 additions & 0 deletions tornado/test/curl_httpclient_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,23 @@ def test_reuse_certs(self):
)
response = self.fetch(self.get_url("/client_cert"))
self.assertEqual(response.body, b"no client cert")


@unittest.skipIf(pycurl is None, "pycurl module not present")
class CurlDebugTest(unittest.TestCase):
"""Tests for CurlAsyncHTTPClient._curl_debug.

See issue #3183: a proxy that echoes back binary bytes from the
upstream response can produce a non-UTF-8 debug message, which
used to crash the request with UnicodeDecodeError because the
pre-fix code passed the raw bytes through ``native_str`` (which
decodes as UTF-8).
"""

def test_curl_debug_handles_non_utf8_bytes(self):
client = CurlAsyncHTTPClient()
# debug_type == 0 is the "info" callback.
client._curl_debug(0, b"hello \xff world")
# debug_type in (1, 2) is the "header in" / "data in" callback.
client._curl_debug(1, b"header \xff value")
client._curl_debug(2, b"data \xff value")
Loading