From 86dd7d372f0d189952f5a50fec8dad0a9b633550 Mon Sep 17 00:00:00 2001 From: goingforstudying-ctrl Date: Sat, 6 Jun 2026 06:56:17 -0400 Subject: [PATCH 1/3] fix(lexer): replace string regex with linear scanner The string_re regular expression caused catastrophic backtracking on unclosed strings with many escape sequences. Replace it with a manual scanner that uses str.find to locate closing quotes and verifies they are not escaped, eliminating exponential backtracking. Fixes pallets/jinja#2109 --- src/jinja2/lexer.py | 81 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index e35cd471e..5c59de85e 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -26,9 +26,43 @@ # static regular expressions whitespace_re = re.compile(r"\s+") newline_re = re.compile(r"(\r\n|\r|\n)") -string_re = re.compile( - r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S -) + + +def _match_string(source: str, pos: int) -> str | None: + """Match a quoted string starting at *pos*. + + This replaces the ``string_re`` regex to avoid catastrophic + backtracking on unclosed strings with many escape sequences. + The algorithm uses :meth:`str.find` to locate the closing quote + and then verifies that it is not escaped. + """ + if pos >= len(source): + return None + + quote = source[pos] + + if quote not in "'\"": + return None + + i = source.find(quote, pos + 1) + + while i != -1: + # Count backslashes before this quote. + bs = 0 + j = i - 1 + + while j >= pos + 1 and source[j] == "\\": + bs += 1 + j -= 1 + + if bs % 2 == 0: + # Even number of backslashes -> quote is not escaped. + return source[pos : i + 1] + + # Odd number of backslashes -> quote is escaped, keep searching. + i = source.find(quote, i + 1) + + return None integer_re = re.compile( r""" ( @@ -468,6 +502,45 @@ class _Rule(t.NamedTuple): command: str | None +class _StringMatch: + """A minimal match object returned by :class:`_StringPattern`.""" + + __slots__ = ("_text", "_pos") + + def __init__(self, text: str, pos: int) -> None: + self._text = text + self._pos = pos + + def group(self, n: int = 0) -> str: + if n == 0: + return self._text + raise IndexError("no such group") + + def end(self) -> int: + return self._pos + len(self._text) + + def groups(self) -> tuple[str, ...]: + return () + + def groupdict(self) -> dict[str, str]: + return {} + + +class _StringPattern: + """A regex-like object that matches string literals using a fast + manual scanner instead of a regex to avoid catastrophic backtracking + on unclosed strings with many escape sequences. + """ + + __slots__ = () + + def match(self, source: str, pos: int) -> _StringMatch | None: + result = _match_string(source, pos) + if result is None: + return None + return _StringMatch(result, pos) + + class Lexer: """Class that implements a lexer for a given environment. Automatically created by the environment class, usually you don't have to do that. @@ -489,7 +562,7 @@ def c(x: str) -> t.Pattern[str]: _Rule(float_re, TOKEN_FLOAT, None), _Rule(integer_re, TOKEN_INTEGER, None), _Rule(name_re, TOKEN_NAME, None), - _Rule(string_re, TOKEN_STRING, None), + _Rule(_StringPattern(), TOKEN_STRING, None), _Rule(operator_re, TOKEN_OPERATOR, None), ] From bbe3b8441a1c98f551d5b631d6889a34f6f44cd3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Sat, 6 Jun 2026 10:57:42 +0000 Subject: [PATCH 2/3] [pre-commit.ci lite] apply automatic fixes --- src/jinja2/lexer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 5c59de85e..475e01586 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -63,6 +63,8 @@ def _match_string(source: str, pos: int) -> str | None: i = source.find(quote, i + 1) return None + + integer_re = re.compile( r""" ( From e1e14e0cce0f77d8907c36c42892ba4c3241ffbe Mon Sep 17 00:00:00 2001 From: goingforstudying-ctrl Date: Sat, 6 Jun 2026 07:05:58 -0400 Subject: [PATCH 3/3] fix(lexer): add _StringPattern to _Rule type annotation for mypy compatibility --- src/jinja2/lexer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 475e01586..061452c12 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -499,7 +499,7 @@ def __new__(cls, *members, **kwargs): # type: ignore class _Rule(t.NamedTuple): - pattern: t.Pattern[str] + pattern: t.Pattern[str] | "_StringPattern" tokens: str | tuple[str, ...] | tuple[Failure] command: str | None