Skip to content
Closed

AI spam #2182

Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 80 additions & 5 deletions src/jinja2/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,45 @@
# static regular expressions
whitespace_re = re.compile(r"\s+")
newline_re = re.compile(r"(\r\n|\r|\n)")
string_re = re.compile(
r"('([^'\\]*(?:\\.[^'\\]*)*)'" r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S
)


def _match_string(source: str, pos: int) -> str | None:
"""Match a quoted string starting at *pos*.

This replaces the ``string_re`` regex to avoid catastrophic
backtracking on unclosed strings with many escape sequences.
The algorithm uses :meth:`str.find` to locate the closing quote
and then verifies that it is not escaped.
"""
if pos >= len(source):
return None

quote = source[pos]

if quote not in "'\"":
return None

i = source.find(quote, pos + 1)

while i != -1:
# Count backslashes before this quote.
bs = 0
j = i - 1

while j >= pos + 1 and source[j] == "\\":
bs += 1
j -= 1

if bs % 2 == 0:
# Even number of backslashes -> quote is not escaped.
return source[pos : i + 1]

# Odd number of backslashes -> quote is escaped, keep searching.
i = source.find(quote, i + 1)

return None


integer_re = re.compile(
r"""
(
Expand Down Expand Up @@ -463,11 +499,50 @@ def __new__(cls, *members, **kwargs): # type: ignore


class _Rule(t.NamedTuple):
pattern: t.Pattern[str]
pattern: t.Pattern[str] | "_StringPattern"
tokens: str | tuple[str, ...] | tuple[Failure]
command: str | None


class _StringMatch:
"""A minimal match object returned by :class:`_StringPattern`."""

__slots__ = ("_text", "_pos")

def __init__(self, text: str, pos: int) -> None:
self._text = text
self._pos = pos

def group(self, n: int = 0) -> str:
if n == 0:
return self._text
raise IndexError("no such group")

def end(self) -> int:
return self._pos + len(self._text)

def groups(self) -> tuple[str, ...]:
return ()

def groupdict(self) -> dict[str, str]:
return {}


class _StringPattern:
"""A regex-like object that matches string literals using a fast
manual scanner instead of a regex to avoid catastrophic backtracking
on unclosed strings with many escape sequences.
"""

__slots__ = ()

def match(self, source: str, pos: int) -> _StringMatch | None:
result = _match_string(source, pos)
if result is None:
return None
return _StringMatch(result, pos)


class Lexer:
"""Class that implements a lexer for a given environment. Automatically
created by the environment class, usually you don't have to do that.
Expand All @@ -489,7 +564,7 @@ def c(x: str) -> t.Pattern[str]:
_Rule(float_re, TOKEN_FLOAT, None),
_Rule(integer_re, TOKEN_INTEGER, None),
_Rule(name_re, TOKEN_NAME, None),
_Rule(string_re, TOKEN_STRING, None),
_Rule(_StringPattern(), TOKEN_STRING, None),
_Rule(operator_re, TOKEN_OPERATOR, None),
]

Expand Down
Loading