From 86edabadb7edf915ad0ef0dd752d1e159ec153b0 Mon Sep 17 00:00:00 2001 From: Harshith Reddy Date: Thu, 9 Apr 2026 08:28:12 -0500 Subject: [PATCH 1/7] Optimize websocket extension regex backtracking behavior Use an atomic group for Python 3.11+ in websocket extension parsing and add focused tests to validate behavior and guard against worst-case backtracking regressions. --- aiohttp/_websocket/helpers.py | 9 ++++ tests/test_ws_ext_helpers.py | 82 +++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 tests/test_ws_ext_helpers.py diff --git a/aiohttp/_websocket/helpers.py b/aiohttp/_websocket/helpers.py index f9a44cdd39b..692cdf614c6 100644 --- a/aiohttp/_websocket/helpers.py +++ b/aiohttp/_websocket/helpers.py @@ -2,6 +2,7 @@ import functools import re +import sys from re import Pattern from struct import Struct from typing import TYPE_CHECKING, Final @@ -64,12 +65,20 @@ def _websocket_mask_python(mask: bytes, data: bytearray) -> None: websocket_mask = _websocket_mask_python +# On 3.11+ use an atomic outer group to avoid backtracking over already-matched +# iterations when the tail of the string doesn't match. _WS_EXT_RE: Final[Pattern[str]] = re.compile( r"^(?:;\s*(?:" r"(server_no_context_takeover)|" r"(client_no_context_takeover)|" r"(server_max_window_bits(?:=(\d+))?)|" r"(client_max_window_bits(?:=(\d+))?)))*$" + if sys.version_info < (3, 11) + else r"^(?>;\s*(?:" + r"(server_no_context_takeover)|" + r"(client_no_context_takeover)|" + r"(server_max_window_bits(?:=(\d+))?)|" + r"(client_max_window_bits(?:=(\d+))?)))*$" ) _WS_EXT_RE_SPLIT: Final[Pattern[str]] = re.compile(r"permessage-deflate([^,]+)?") diff --git a/tests/test_ws_ext_helpers.py b/tests/test_ws_ext_helpers.py new file mode 100644 index 00000000000..67bc7e82d34 --- /dev/null +++ b/tests/test_ws_ext_helpers.py @@ -0,0 +1,82 @@ +import time + +import pytest + +from aiohttp._websocket.helpers import ws_ext_parse +from aiohttp.http_websocket import WSHandshakeError + + +class TestWsExtParse: + def test_empty(self) -> None: + assert ws_ext_parse(None) == (0, False) + assert ws_ext_parse("") == (0, False) + + def test_permessage_deflate_only(self) -> None: + compress, notakeover = ws_ext_parse("permessage-deflate") + assert compress == 15 + assert notakeover is False + + def test_server_no_context_takeover(self) -> None: + compress, notakeover = ws_ext_parse( + "permessage-deflate; server_no_context_takeover", isserver=True + ) + assert compress == 15 + assert notakeover is True + + def test_client_no_context_takeover(self) -> None: + compress, notakeover = ws_ext_parse( + "permessage-deflate; client_no_context_takeover", isserver=False + ) + assert compress == 15 + assert notakeover is True + + def test_server_max_window_bits(self) -> None: + compress, notakeover = ws_ext_parse( + "permessage-deflate; server_max_window_bits=12", isserver=True + ) + assert compress == 12 + assert notakeover is False + + def test_client_max_window_bits(self) -> None: + compress, notakeover = ws_ext_parse( + "permessage-deflate; client_max_window_bits=10", isserver=False + ) + assert compress == 10 + assert notakeover is False + + def test_window_bits_out_of_range_server(self) -> None: + # out-of-range wbits on server side → skip, return 0 + compress, _ = ws_ext_parse( + "permessage-deflate; server_max_window_bits=8", isserver=True + ) + assert compress == 0 + + def test_window_bits_out_of_range_client(self) -> None: + with pytest.raises(WSHandshakeError): + ws_ext_parse( + "permessage-deflate; client_max_window_bits=8", isserver=False + ) + + def test_invalid_extension_client_raises(self) -> None: + with pytest.raises(WSHandshakeError): + ws_ext_parse("permessage-deflate; unknown_param", isserver=False) + + def test_no_match_server_returns_zero(self) -> None: + compress, notakeover = ws_ext_parse( + "permessage-deflate; unknown_param", isserver=True + ) + assert compress == 0 + assert notakeover is False + + def test_backtracking_performance(self) -> None: + # Crafted input: many valid tokens followed by an invalid suffix. + # Without the atomic group fix this causes exponential backtracking. + evil = "permessage-deflate" + ("; server_no_context_takeover" * 30) + ";INVALID" + start = time.perf_counter() + try: + ws_ext_parse(evil, isserver=True) + except WSHandshakeError: + pass + elapsed = time.perf_counter() - start + # Should complete in well under a second on any reasonable hardware. + assert elapsed < 1.0, f"possible backtracking regression: took {elapsed:.3f}s" From ed7bc4852f96de55ab5c740c374a69d2faf94a3e Mon Sep 17 00:00:00 2001 From: Harshith Reddy Date: Thu, 9 Apr 2026 08:33:42 -0500 Subject: [PATCH 2/7] Add contributor entry and changelog fragment Include contributor attribution and a misc changelog note for the websocket extension regex optimization change. --- CHANGES/XXXX.misc.rst | 3 +++ CONTRIBUTORS.txt | 1 + 2 files changed, 4 insertions(+) create mode 100644 CHANGES/XXXX.misc.rst diff --git a/CHANGES/XXXX.misc.rst b/CHANGES/XXXX.misc.rst new file mode 100644 index 00000000000..9575ffb225b --- /dev/null +++ b/CHANGES/XXXX.misc.rst @@ -0,0 +1,3 @@ +Used an atomic group in ``_WS_EXT_RE`` on Python 3.11+ to prevent +unnecessary backtracking when parsing ``Sec-WebSocket-Extensions`` headers +-- by :user:`HarshithReddy01`. diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index e61c5e8e328..2c0e7827cc3 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -159,6 +159,7 @@ Gustavo Carneiro Günther Jena Hans Adema Harmon Y. +Harshith Reddy Harry Liu Hiroshi Ogawa Hrishikesh Paranjape From 99a58485b943dbe61a82d4ddc256512c47c1d02f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 13:40:39 +0000 Subject: [PATCH 3/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CONTRIBUTORS.txt | 2 +- tests/test_ws_ext_helpers.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 2c0e7827cc3..4e757380d31 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -159,8 +159,8 @@ Gustavo Carneiro Günther Jena Hans Adema Harmon Y. -Harshith Reddy Harry Liu +Harshith Reddy Hiroshi Ogawa Hrishikesh Paranjape Hu Bo diff --git a/tests/test_ws_ext_helpers.py b/tests/test_ws_ext_helpers.py index 67bc7e82d34..e63574d8054 100644 --- a/tests/test_ws_ext_helpers.py +++ b/tests/test_ws_ext_helpers.py @@ -53,9 +53,7 @@ def test_window_bits_out_of_range_server(self) -> None: def test_window_bits_out_of_range_client(self) -> None: with pytest.raises(WSHandshakeError): - ws_ext_parse( - "permessage-deflate; client_max_window_bits=8", isserver=False - ) + ws_ext_parse("permessage-deflate; client_max_window_bits=8", isserver=False) def test_invalid_extension_client_raises(self) -> None: with pytest.raises(WSHandshakeError): From 770f7af86fac6974a57546e6a06339b46553fc3f Mon Sep 17 00:00:00 2001 From: Harshith Reddy Date: Thu, 9 Apr 2026 15:02:24 -0500 Subject: [PATCH 4/7] Address review: parametrize tests, rename CHANGES fragment to PR number --- CHANGES/{XXXX.misc.rst => 12346.misc.rst} | 0 tests/test_ws_ext_helpers.py | 116 ++++++++-------------- 2 files changed, 44 insertions(+), 72 deletions(-) rename CHANGES/{XXXX.misc.rst => 12346.misc.rst} (100%) diff --git a/CHANGES/XXXX.misc.rst b/CHANGES/12346.misc.rst similarity index 100% rename from CHANGES/XXXX.misc.rst rename to CHANGES/12346.misc.rst diff --git a/tests/test_ws_ext_helpers.py b/tests/test_ws_ext_helpers.py index e63574d8054..8adb7126ec9 100644 --- a/tests/test_ws_ext_helpers.py +++ b/tests/test_ws_ext_helpers.py @@ -6,75 +6,47 @@ from aiohttp.http_websocket import WSHandshakeError -class TestWsExtParse: - def test_empty(self) -> None: - assert ws_ext_parse(None) == (0, False) - assert ws_ext_parse("") == (0, False) - - def test_permessage_deflate_only(self) -> None: - compress, notakeover = ws_ext_parse("permessage-deflate") - assert compress == 15 - assert notakeover is False - - def test_server_no_context_takeover(self) -> None: - compress, notakeover = ws_ext_parse( - "permessage-deflate; server_no_context_takeover", isserver=True - ) - assert compress == 15 - assert notakeover is True - - def test_client_no_context_takeover(self) -> None: - compress, notakeover = ws_ext_parse( - "permessage-deflate; client_no_context_takeover", isserver=False - ) - assert compress == 15 - assert notakeover is True - - def test_server_max_window_bits(self) -> None: - compress, notakeover = ws_ext_parse( - "permessage-deflate; server_max_window_bits=12", isserver=True - ) - assert compress == 12 - assert notakeover is False - - def test_client_max_window_bits(self) -> None: - compress, notakeover = ws_ext_parse( - "permessage-deflate; client_max_window_bits=10", isserver=False - ) - assert compress == 10 - assert notakeover is False - - def test_window_bits_out_of_range_server(self) -> None: - # out-of-range wbits on server side → skip, return 0 - compress, _ = ws_ext_parse( - "permessage-deflate; server_max_window_bits=8", isserver=True - ) - assert compress == 0 - - def test_window_bits_out_of_range_client(self) -> None: - with pytest.raises(WSHandshakeError): - ws_ext_parse("permessage-deflate; client_max_window_bits=8", isserver=False) - - def test_invalid_extension_client_raises(self) -> None: - with pytest.raises(WSHandshakeError): - ws_ext_parse("permessage-deflate; unknown_param", isserver=False) - - def test_no_match_server_returns_zero(self) -> None: - compress, notakeover = ws_ext_parse( - "permessage-deflate; unknown_param", isserver=True - ) - assert compress == 0 - assert notakeover is False - - def test_backtracking_performance(self) -> None: - # Crafted input: many valid tokens followed by an invalid suffix. - # Without the atomic group fix this causes exponential backtracking. - evil = "permessage-deflate" + ("; server_no_context_takeover" * 30) + ";INVALID" - start = time.perf_counter() - try: - ws_ext_parse(evil, isserver=True) - except WSHandshakeError: - pass - elapsed = time.perf_counter() - start - # Should complete in well under a second on any reasonable hardware. - assert elapsed < 1.0, f"possible backtracking regression: took {elapsed:.3f}s" +@pytest.mark.parametrize( + ("msg", "server", "expected"), + ( + ("permessage-deflate", False, (15, False)), + ("permessage-deflate; server_no_context_takeover", True, (15, True)), + ("permessage-deflate; client_no_context_takeover", False, (15, True)), + ("permessage-deflate; server_max_window_bits=12", True, (12, False)), + ("permessage-deflate; client_max_window_bits=10", False, (10, False)), + # out-of-range wbits on server side → skip rather than fail + ("permessage-deflate; server_max_window_bits=8", True, (0, False)), + # unknown param on server side → no match, return zero + ("permessage-deflate; unknown_param", True, (0, False)), + ), +) +def test_ws_ext_parse(msg: str, server: bool, expected: tuple[int, bool]) -> None: + assert ws_ext_parse(msg, isserver=server) == expected + + +@pytest.mark.parametrize( + ("msg", "server"), + ( + ("permessage-deflate; client_max_window_bits=8", False), + ("permessage-deflate; unknown_param", False), + ), +) +def test_ws_ext_parse_raises(msg: str, server: bool) -> None: + with pytest.raises(WSHandshakeError): + ws_ext_parse(msg, isserver=server) + + +def test_ws_ext_parse_empty() -> None: + assert ws_ext_parse(None) == (0, False) + assert ws_ext_parse("") == (0, False) + + +def test_ws_ext_parse_backtracking_performance() -> None: + # Many valid tokens followed by an invalid suffix — the classic input that + # triggers exponential backtracking in the outer repeating group. + evil = "permessage-deflate" + ("; server_no_context_takeover" * 30) + ";INVALID" + start = time.perf_counter() + with pytest.raises(WSHandshakeError): + ws_ext_parse(evil, isserver=False) + elapsed = time.perf_counter() - start + assert elapsed < 1.0, f"backtracking regression: took {elapsed:.3f}s" From 4e81c234d9a60c695d2b55a1e7f5c760ed06fafe Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Thu, 9 Apr 2026 21:32:27 +0100 Subject: [PATCH 5/7] Rename test_ws_ext_helpers.py to test_websocket_helpers.py --- tests/{test_ws_ext_helpers.py => test_websocket_helpers.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_ws_ext_helpers.py => test_websocket_helpers.py} (100%) diff --git a/tests/test_ws_ext_helpers.py b/tests/test_websocket_helpers.py similarity index 100% rename from tests/test_ws_ext_helpers.py rename to tests/test_websocket_helpers.py From 604a9e9e1f8d5098b3ae94ad5806f5749b5c71b4 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Thu, 9 Apr 2026 21:33:50 +0100 Subject: [PATCH 6/7] Update 12346.misc.rst --- CHANGES/12346.misc.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES/12346.misc.rst b/CHANGES/12346.misc.rst index 9575ffb225b..eaec57183ce 100644 --- a/CHANGES/12346.misc.rst +++ b/CHANGES/12346.misc.rst @@ -1,3 +1,3 @@ -Used an atomic group in ``_WS_EXT_RE`` on Python 3.11+ to prevent -unnecessary backtracking when parsing ``Sec-WebSocket-Extensions`` headers +Improved performance of ``_WS_EXT_RE`` regular expression on Python 3.11+ +by using atomic grouping when parsing ``Sec-WebSocket-Extensions`` headers -- by :user:`HarshithReddy01`. From 9347ca3851416decbdbfd4717bba4c4600eeaeaf Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Thu, 9 Apr 2026 21:34:59 +0100 Subject: [PATCH 7/7] Update helpers.py --- aiohttp/_websocket/helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/aiohttp/_websocket/helpers.py b/aiohttp/_websocket/helpers.py index 692cdf614c6..5080b8a1712 100644 --- a/aiohttp/_websocket/helpers.py +++ b/aiohttp/_websocket/helpers.py @@ -65,8 +65,6 @@ def _websocket_mask_python(mask: bytes, data: bytearray) -> None: websocket_mask = _websocket_mask_python -# On 3.11+ use an atomic outer group to avoid backtracking over already-matched -# iterations when the tail of the string doesn't match. _WS_EXT_RE: Final[Pattern[str]] = re.compile( r"^(?:;\s*(?:" r"(server_no_context_takeover)|"