Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/jinja2/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,10 @@ class Environment:
will reload the template. For higher performance it's possible to
disable that.

`parser_tolerate_faults`
Instruct the parser to tolerate some invalid constructs that don't cause much semantic uncertainty, useful for linters and LSP to provide output on incomplete templates.
Defaults to False.

`bytecode_cache`
If set to a bytecode cache object, this object will provide a
cache for the internal Jinja bytecode so that templates don't
Expand Down Expand Up @@ -316,6 +320,7 @@ def __init__(
auto_reload: bool = True,
bytecode_cache: t.Optional["BytecodeCache"] = None,
enable_async: bool = False,
parser_tolerate_faults: bool = False,
):
# !!Important notice!!
# The constructor accepts quite a few arguments that should be
Expand Down Expand Up @@ -360,6 +365,7 @@ def __init__(
self.auto_reload = auto_reload

# configurable policies
self.parser_tolerate_faults = parser_tolerate_faults
self.policies = DEFAULT_POLICIES.copy()

# load extensions
Expand Down
91 changes: 72 additions & 19 deletions src/jinja2/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import typing as t
from ast import literal_eval
from collections import deque
from dataclasses import dataclass
from sys import intern

from ._identifier import pattern as name_re
Expand Down Expand Up @@ -266,10 +267,12 @@ def __call__(self, lineno: int, filename: str | None) -> "te.NoReturn":
raise self.error_class(self.message, lineno, filename)


class Token(t.NamedTuple):
@dataclass
class Token:
lineno: int
type: str
value: str
linepos: int

def __str__(self) -> str:
return describe_token(self)
Expand Down Expand Up @@ -333,7 +336,7 @@ def __init__(
self.name = name
self.filename = filename
self.closed = False
self.current = Token(1, TOKEN_INITIAL, "")
self.current = Token(1, TOKEN_INITIAL, "", 0)
next(self)

def __iter__(self) -> TokenStreamIterator:
Expand Down Expand Up @@ -396,7 +399,11 @@ def __next__(self) -> Token:

def close(self) -> None:
"""Close the stream."""
self.current = Token(self.current.lineno, TOKEN_EOF, "")
lineno, linepos = self.current.lineno, self.current.linepos
value = self.current.value
lineno += value.count("\n")
linepos += len(value.rsplit("\n", 1)[-1])
self.current = Token(lineno, TOKEN_EOF, "", linepos=linepos)
self._iter = iter(())
self.closed = True

Expand Down Expand Up @@ -609,19 +616,23 @@ def tokenize(
state: str | None = None,
) -> TokenStream:
"""Calls tokeniter + tokenize and wraps it in a token stream."""
stream = self.tokeniter(source, name, filename, state)
stream = self.tokeniter_linepos(source, name, filename, state)
return TokenStream(self.wrap(stream, name, filename), name, filename)

def wrap(
self,
stream: t.Iterable[tuple[int, str, str]],
stream: t.Iterable[tuple[int, str, str] | tuple[int, str, str, int]],
name: str | None = None,
filename: str | None = None,
) -> t.Iterator[Token]:
"""This is called with the stream as returned by `tokenize` and wraps
every token in a :class:`Token` and converts the value.
"""
for lineno, token, value_str in stream:
for tup in stream:
if len(tup) == 3:
tup = (*tup, -1)
assert len(tup) == 4
lineno, token, value_str, linepos = tup
if token in ignored_tokens:
continue

Expand Down Expand Up @@ -664,22 +675,25 @@ def wrap(
elif token == TOKEN_OPERATOR:
token = operators[value_str]

yield Token(lineno, token, value)
yield Token(lineno, token, value, linepos)

def tokeniter(
self,
source: str,
name: str | None,
filename: str | None = None,
state: str | None = None,
) -> t.Iterator[tuple[int, str, str]]:
def tokeniter(self, *kargs, **kwargs) -> t.Iterator[tuple[int, str, str]]:
"""This method tokenizes the text and returns the tokens in a
generator. Use this method if you just want to tokenize a template.

.. versionchanged:: 3.0
Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
breaks.
"""
yield from (tup[0:3] for tup in self.tokeniter_linepos(*kargs, **kwargs))

def tokeniter_linepos(
self,
source: str,
name: str | None,
filename: str | None = None,
state: str | None = None,
) -> t.Iterator[tuple[int, str, str, int]]:
lines = newline_re.split(source)[::2]

if not self.keep_trailing_newline and lines[-1] == "":
Expand All @@ -688,6 +702,7 @@ def tokeniter(
source = "\n".join(lines)
pos = 0
lineno = 1
linepos = 0
stack = ["root"]

if state is not None and state != "root":
Expand All @@ -700,11 +715,39 @@ def tokeniter(
newlines_stripped = 0
line_starting = True

def linepos_from_str(line_or_more: str) -> int:
line = line_or_more.rsplit("\n", 1)[-1]
return len(line)

old_pos = pos

while True:
# tokenizer loop
for regex, tokens, new_state in statetokens:
m = regex.match(source, pos)
if old_pos != pos:
lineno = source[:pos].count("\n") + 1
inbetween = source[old_pos:pos]
if "\n" in inbetween:
linepos = len(inbetween.rsplit("\n", 1)[-1])
else:
for backwards_buffer_expo in range(5):
backwards_offset = 10**backwards_buffer_expo
backwards_location = max(0, pos - backwards_offset)
lookbehind = source[backwards_location:pos]
last_line = lookbehind.rsplit("\n", 1)[-1]
if (
len(last_line) != len(lookbehind)
or len(lookbehind) >= pos
):
# we found a line break
linepos = len(last_line)
break
if backwards_location <= 0:
break

old_pos = pos

m = regex.match(source, pos)
# if no match we try again with the next rule
if m is None:
continue
Expand Down Expand Up @@ -737,6 +780,7 @@ def tokeniter(
# Strip all whitespace between the text and the tag.
stripped = text.rstrip()
newlines_stripped = text[len(stripped) :].count("\n")
linepos = len(text.rsplit("\n", 1)[-1])
groups = [stripped, *groups[1:]]
elif (
# Not marked for preserving whitespace.
Expand Down Expand Up @@ -765,7 +809,10 @@ def tokeniter(
elif token == "#bygroup":
for key, value in m.groupdict().items():
if value is not None:
yield lineno, key, value
yield lineno, key, value, linepos
linepos = len(value.splitlines(keepends=False)[-1])
# linepos = pos
# pos = linepos
lineno += value.count("\n")
break
else:
Expand All @@ -778,14 +825,17 @@ def tokeniter(
data = groups[idx]

if data or token not in ignore_if_empty:
yield lineno, token, data # type: ignore[misc]
yield lineno, token, data, linepos # type: ignore[misc]

lineno += data.count("\n") + newlines_stripped
if "\n" in data:
linepos = 0
linepos += len(data.rsplit("\n")[-1])
newlines_stripped = 0

# strings as token just are yielded as it.
else:
data = m.group()
data: str = m.group()

# update brace/parentheses balance
if tokens == TOKEN_OPERATOR:
Expand Down Expand Up @@ -813,11 +863,14 @@ def tokeniter(

# yield items
if data or tokens not in ignore_if_empty:
yield lineno, tokens, data
yield lineno, tokens, data, linepos

lineno += data.count("\n")
if "\n" in data:
linepos = len(data.rsplit("\n", 1)[-1])

line_starting = m.group()[-1:] == "\n"

# fetch new position into new variable so that we can check
# if there is a internal parsing error which would result
# in an infinite loop
Expand Down
77 changes: 71 additions & 6 deletions src/jinja2/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,25 @@ class Node(metaclass=NodeType):
"""

fields: tuple[str, ...] = ()
attributes: tuple[str, ...] = ("lineno", "environment")
attributes: tuple[str, ...] = (
"lineno",
"linepos",
"environment",
"issues",
"lineno_end",
"linepos_end",
)
abstract = True

lineno: int
linepos: int
environment: t.Optional["Environment"]

# only filled in diagnostic mode
issues: list["Node"]
lineno_end: int | None
linepos_end: int | None

def __init__(self, *fields: t.Any, **attributes: t.Any) -> None:
if self.abstract:
raise TypeError("abstract nodes are not instantiable")
Expand Down Expand Up @@ -170,35 +183,47 @@ def iter_child_nodes(
self,
exclude: t.Container[str] | None = None,
only: t.Container[str] | None = None,
reverse: bool = False,
) -> t.Iterator["Node"]:
"""Iterates over all direct child nodes of the node. This iterates
over all fields and yields the values of they are nodes. If the value
of a field is a list all the nodes in that list are returned.
"""
for _, item in self.iter_fields(exclude, only):
items: t.Iterable[tuple[str, t.Any]] = self.iter_fields(exclude, only)
if reverse:
items = reversed(list(items))
for _, item in items:
if isinstance(item, list):
if reverse:
item = reversed(item)
for n in item:
if isinstance(n, Node):
yield n
elif isinstance(item, Node):
yield item

def find(self, node_type: type[_NodeBound]) -> _NodeBound | None:
def find(
self, node_type: type[_NodeBound], *, reverse: bool = False
) -> _NodeBound | None:
"""Find the first node of a given type. If no such node exists the
return value is `None`.
With reverse=True, the last node is returned instead
"""
for result in self.find_all(node_type):
for result in self.find_all(node_type, reverse=reverse):
return result

return None

def find_all(
self, node_type: type[_NodeBound] | tuple[type[_NodeBound], ...]
self,
node_type: type[_NodeBound] | tuple[type[_NodeBound], ...],
*,
reverse: bool = False,
) -> t.Iterator[_NodeBound]:
"""Find all the nodes of a given type. If the type is a tuple,
the check is performed for any of the tuple items.
"""
for child in self.iter_child_nodes():
for child in self.iter_child_nodes(reverse=reverse):
if isinstance(child, node_type):
yield child # type: ignore
yield from child.find_all(node_type)
Expand Down Expand Up @@ -279,12 +304,28 @@ def _dump(node: Node | t.Any) -> None:
return "".join(buf)


class ParserIssue(Node):
attributes: tuple[str, ...] = ("message", "issue_context")
message: str
issue_context: str | None


class Stmt(Node):
"""Base node for all statements."""

abstract = True


class EmptyStatement(Stmt):
"""Node where a statement should be but an empty statement was given.
Returned in Fault-tolerant Mode only
"""

attributes: tuple[str, ...] = ("message", "issue_context")
message: str | None
issue_context: str | None


class Helper(Node):
"""Nodes that exist in a specific context only."""

Expand Down Expand Up @@ -398,10 +439,12 @@ class Block(Stmt):
"""

fields = ("name", "body", "scoped", "required")
attributes = ("endblock_with_name",)
name: str
body: list[Node]
scoped: bool
required: bool
endblock_with_name: bool | None


class Include(Stmt):
Expand Down Expand Up @@ -487,6 +530,28 @@ def can_assign(self) -> bool:
return False


class ExprIssue(Expr):
attributes: tuple[str, ...] = ("message", "issue_context")
message: str
issue_context: str | None


class EmptyExpression(ExprIssue):
"""Node where an expression should be but an empty expression was given.
Returned in Fault-tolerant Mode only
"""


class InvalidExpression(ExprIssue):
"""Node where an expression should be but an unparsable expression was given.
Returned in Fault-tolerant Mode only
"""

attributes: tuple[str, ...] = ("original_str",)

original_str: str


class BinExpr(Expr):
"""Baseclass for all binary expressions."""

Expand Down
Loading
Loading