From 5040d3c2c3451112c9ccc3910822581e8e4282b9 Mon Sep 17 00:00:00 2001 From: JB Rubinovitz Date: Mon, 4 May 2026 02:01:59 -0400 Subject: [PATCH 1/2] Improve lexing of unclosed escaped strings --- src/jinja2/lexer.py | 45 +++++++++++++++++++++++++++++++++++++++++ tests/test_lexnparse.py | 6 ++++++ 2 files changed, 51 insertions(+) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index e35cd471e..79a9d7d67 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -251,6 +251,29 @@ def compile_rules(environment: "Environment") -> list[tuple[str, str]]: return [x[1:] for x in sorted(rules, reverse=True)] +def find_string_end(source: str, start: int) -> int | None: + quote = source[start] + pos = start + 1 + + while True: + end = source.find(quote, pos) + + if end == -1: + return None + + backslashes = 0 + i = end - 1 + + while i > start and source[i] == "\\": + backslashes += 1 + i -= 1 + + if backslashes % 2 == 0: + return end + 1 + + pos = end + 1 + + class Failure: """Class that raises a `TemplateSyntaxError` if called. Used by the `Lexer` to specify known errors. @@ -701,6 +724,28 @@ def tokeniter( line_starting = True while True: + if ( + stack[-1] + in (TOKEN_VARIABLE_BEGIN, TOKEN_BLOCK_BEGIN, TOKEN_LINESTATEMENT_BEGIN) + and source[pos : pos + 1] in ("'", '"') + ): + pos2 = find_string_end(source, pos) + + if pos2 is None: + raise TemplateSyntaxError( + f"unexpected char {source[pos]!r} at {pos}", + lineno, + name, + filename, + ) + + data = source[pos:pos2] + yield lineno, TOKEN_STRING, data + lineno += data.count("\n") + line_starting = data[-1:] == "\n" + pos = pos2 + continue + # tokenizer loop for regex, tokens, new_state in statetokens: m = regex.match(source, pos) diff --git a/tests/test_lexnparse.py b/tests/test_lexnparse.py index 0f0dbf332..82bf16dd7 100644 --- a/tests/test_lexnparse.py +++ b/tests/test_lexnparse.py @@ -95,6 +95,12 @@ def test_string_escapes(self, env): assert tmpl.render() == char assert env.from_string('{{ "\N{HOT SPRINGS}" }}').render() == "\u2668" + def test_unclosed_string_with_many_escapes(self, env): + source = "{{ " + "'" + ("\\a" + "b" * 1000) * 1000 + "c" + + with pytest.raises(TemplateSyntaxError): + env.parse(source) + def test_bytefallback(self, env): from pprint import pformat From 0026e1ad5c3e4b9a2f62eb313e8cd77d835bdc32 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Mon, 4 May 2026 06:04:06 +0000 Subject: [PATCH 2/2] [pre-commit.ci lite] apply automatic fixes --- src/jinja2/lexer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 79a9d7d67..6bbedf901 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -724,11 +724,11 @@ def tokeniter( line_starting = True while True: - if ( - stack[-1] - in (TOKEN_VARIABLE_BEGIN, TOKEN_BLOCK_BEGIN, TOKEN_LINESTATEMENT_BEGIN) - and source[pos : pos + 1] in ("'", '"') - ): + if stack[-1] in ( + TOKEN_VARIABLE_BEGIN, + TOKEN_BLOCK_BEGIN, + TOKEN_LINESTATEMENT_BEGIN, + ) and source[pos : pos + 1] in ("'", '"'): pos2 = find_string_end(source, pos) if pos2 is None: