pallets · InsanePrawn · May 4, 2025 · May 5, 2025 · May 9, 2025 · May 10, 2025
diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py
@@ -252,6 +252,10 @@ class Environment:
             will reload the template.  For higher performance it's possible to
             disable that.
 
+        `parser_tolerate_faults`
+            Instruct the parser to tolerate some invalid constructs that don't cause much semantic uncertainty, useful for linters and LSP to provide output on incomplete templates.
+            Defaults to False.
+
         `bytecode_cache`
             If set to a bytecode cache object, this object will provide a
             cache for the internal Jinja bytecode so that templates don't
@@ -316,6 +320,7 @@ def __init__(
         auto_reload: bool = True,
         bytecode_cache: t.Optional["BytecodeCache"] = None,
         enable_async: bool = False,
+        parser_tolerate_faults: bool = False,
     ):
         # !!Important notice!!
         #   The constructor accepts quite a few arguments that should be
@@ -360,6 +365,7 @@ def __init__(
         self.auto_reload = auto_reload
 
         # configurable policies
+        self.parser_tolerate_faults = parser_tolerate_faults
         self.policies = DEFAULT_POLICIES.copy()
 
         # load extensions

diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py
@@ -8,6 +8,7 @@
 import typing as t
 from ast import literal_eval
 from collections import deque
+from dataclasses import dataclass
 from sys import intern
 
 from ._identifier import pattern as name_re
@@ -266,10 +267,12 @@ def __call__(self, lineno: int, filename: str | None) -> "te.NoReturn":
         raise self.error_class(self.message, lineno, filename)
 
 
-class Token(t.NamedTuple):
+@dataclass
+class Token:
     lineno: int
     type: str
     value: str
+    linepos: int
 
     def __str__(self) -> str:
         return describe_token(self)
@@ -333,7 +336,7 @@ def __init__(
         self.name = name
         self.filename = filename
         self.closed = False
-        self.current = Token(1, TOKEN_INITIAL, "")
+        self.current = Token(1, TOKEN_INITIAL, "", 0)
         next(self)
 
     def __iter__(self) -> TokenStreamIterator:
@@ -396,7 +399,11 @@ def __next__(self) -> Token:
 
     def close(self) -> None:
         """Close the stream."""
-        self.current = Token(self.current.lineno, TOKEN_EOF, "")
+        lineno, linepos = self.current.lineno, self.current.linepos
+        value = self.current.value
+        lineno += value.count("\n")
+        linepos += len(value.rsplit("\n", 1)[-1])
+        self.current = Token(lineno, TOKEN_EOF, "", linepos=linepos)
         self._iter = iter(())
         self.closed = True
 
@@ -609,19 +616,23 @@ def tokenize(
         state: str | None = None,
     ) -> TokenStream:
         """Calls tokeniter + tokenize and wraps it in a token stream."""
-        stream = self.tokeniter(source, name, filename, state)
+        stream = self.tokeniter_linepos(source, name, filename, state)
         return TokenStream(self.wrap(stream, name, filename), name, filename)
 
     def wrap(
         self,
-        stream: t.Iterable[tuple[int, str, str]],
+        stream: t.Iterable[tuple[int, str, str] | tuple[int, str, str, int]],
         name: str | None = None,
         filename: str | None = None,
     ) -> t.Iterator[Token]:
         """This is called with the stream as returned by `tokenize` and wraps
         every token in a :class:`Token` and converts the value.
         """
-        for lineno, token, value_str in stream:
+        for tup in stream:
+            if len(tup) == 3:
+                tup = (*tup, -1)
+            assert len(tup) == 4
+            lineno, token, value_str, linepos = tup
             if token in ignored_tokens:
                 continue
 
@@ -664,22 +675,25 @@ def wrap(
             elif token == TOKEN_OPERATOR:
                 token = operators[value_str]
 
-            yield Token(lineno, token, value)
+            yield Token(lineno, token, value, linepos)
 
-    def tokeniter(
-        self,
-        source: str,
-        name: str | None,
-        filename: str | None = None,
-        state: str | None = None,
-    ) -> t.Iterator[tuple[int, str, str]]:
+    def tokeniter(self, *kargs, **kwargs) -> t.Iterator[tuple[int, str, str]]:
         """This method tokenizes the text and returns the tokens in a
         generator. Use this method if you just want to tokenize a template.
 
         .. versionchanged:: 3.0
             Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line
             breaks.
         """
+        yield from (tup[0:3] for tup in self.tokeniter_linepos(*kargs, **kwargs))
+
+    def tokeniter_linepos(
+        self,
+        source: str,
+        name: str | None,
+        filename: str | None = None,
+        state: str | None = None,
+    ) -> t.Iterator[tuple[int, str, str, int]]:
         lines = newline_re.split(source)[::2]
 
         if not self.keep_trailing_newline and lines[-1] == "":
@@ -688,6 +702,7 @@ def tokeniter(
         source = "\n".join(lines)
         pos = 0
         lineno = 1
+        linepos = 0
         stack = ["root"]
 
         if state is not None and state != "root":
@@ -700,11 +715,39 @@ def tokeniter(
         newlines_stripped = 0
         line_starting = True
 
+        def linepos_from_str(line_or_more: str) -> int:
+            line = line_or_more.rsplit("\n", 1)[-1]
+            return len(line)
+
+        old_pos = pos
+
         while True:
             # tokenizer loop
             for regex, tokens, new_state in statetokens:
-                m = regex.match(source, pos)
+                if old_pos != pos:
+                    lineno = source[:pos].count("\n") + 1
+                    inbetween = source[old_pos:pos]
+                    if "\n" in inbetween:
+                        linepos = len(inbetween.rsplit("\n", 1)[-1])
+                    else:
+                        for backwards_buffer_expo in range(5):
+                            backwards_offset = 10**backwards_buffer_expo
+                            backwards_location = max(0, pos - backwards_offset)
+                            lookbehind = source[backwards_location:pos]
+                            last_line = lookbehind.rsplit("\n", 1)[-1]
+                            if (
+                                len(last_line) != len(lookbehind)
+                                or len(lookbehind) >= pos
+                            ):
+                                # we found a line break
+                                linepos = len(last_line)
+                                break
+                            if backwards_location <= 0:
+                                break
+
+                old_pos = pos
 
+                m = regex.match(source, pos)
                 # if no match we try again with the next rule
                 if m is None:
                     continue
@@ -737,6 +780,7 @@ def tokeniter(
                             # Strip all whitespace between the text and the tag.
                             stripped = text.rstrip()
                             newlines_stripped = text[len(stripped) :].count("\n")
+                            linepos = len(text.rsplit("\n", 1)[-1])
                             groups = [stripped, *groups[1:]]
                         elif (
                             # Not marked for preserving whitespace.
@@ -765,7 +809,10 @@ def tokeniter(
                         elif token == "#bygroup":
                             for key, value in m.groupdict().items():
                                 if value is not None:
-                                    yield lineno, key, value
+                                    yield lineno, key, value, linepos
+                                    linepos = len(value.splitlines(keepends=False)[-1])
+                                    # linepos = pos
+                                    # pos = linepos
                                     lineno += value.count("\n")
                                     break
                             else:
@@ -778,14 +825,17 @@ def tokeniter(
                             data = groups[idx]
 
                             if data or token not in ignore_if_empty:
-                                yield lineno, token, data  # type: ignore[misc]
+                                yield lineno, token, data, linepos  # type: ignore[misc]
 
                             lineno += data.count("\n") + newlines_stripped
+                            if "\n" in data:
+                                linepos = 0
+                            linepos += len(data.rsplit("\n")[-1])
                             newlines_stripped = 0
 
                 # strings as token just are yielded as it.
                 else:
-                    data = m.group()
+                    data: str = m.group()
 
                     # update brace/parentheses balance
                     if tokens == TOKEN_OPERATOR:
@@ -813,11 +863,14 @@ def tokeniter(
 
                     # yield items
                     if data or tokens not in ignore_if_empty:
-                        yield lineno, tokens, data
+                        yield lineno, tokens, data, linepos
 
                     lineno += data.count("\n")
+                    if "\n" in data:
+                        linepos = len(data.rsplit("\n", 1)[-1])
 
                 line_starting = m.group()[-1:] == "\n"
+
                 # fetch new position into new variable so that we can check
                 # if there is a internal parsing error which would result
                 # in an infinite loop

diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py
@@ -120,12 +120,25 @@ class Node(metaclass=NodeType):
     """
 
     fields: tuple[str, ...] = ()
-    attributes: tuple[str, ...] = ("lineno", "environment")
+    attributes: tuple[str, ...] = (
+        "lineno",
+        "linepos",
+        "environment",
+        "issues",
+        "lineno_end",
+        "linepos_end",
+    )
     abstract = True
 
     lineno: int
+    linepos: int
     environment: t.Optional["Environment"]
 
+    # only filled in diagnostic mode
+    issues: list["Node"]
+    lineno_end: int | None
+    linepos_end: int | None
+
     def __init__(self, *fields: t.Any, **attributes: t.Any) -> None:
         if self.abstract:
             raise TypeError("abstract nodes are not instantiable")
@@ -170,35 +183,47 @@ def iter_child_nodes(
         self,
         exclude: t.Container[str] | None = None,
         only: t.Container[str] | None = None,
+        reverse: bool = False,
     ) -> t.Iterator["Node"]:
         """Iterates over all direct child nodes of the node.  This iterates
         over all fields and yields the values of they are nodes.  If the value
         of a field is a list all the nodes in that list are returned.
         """
-        for _, item in self.iter_fields(exclude, only):
+        items: t.Iterable[tuple[str, t.Any]] = self.iter_fields(exclude, only)
+        if reverse:
+            items = reversed(list(items))
+        for _, item in items:
             if isinstance(item, list):
+                if reverse:
+                    item = reversed(item)
                 for n in item:
                     if isinstance(n, Node):
                         yield n
             elif isinstance(item, Node):
                 yield item
 
-    def find(self, node_type: type[_NodeBound]) -> _NodeBound | None:
+    def find(
+        self, node_type: type[_NodeBound], *, reverse: bool = False
+    ) -> _NodeBound | None:
         """Find the first node of a given type.  If no such node exists the
         return value is `None`.
+        With reverse=True, the last node is returned instead
         """
-        for result in self.find_all(node_type):
+        for result in self.find_all(node_type, reverse=reverse):
             return result
 
         return None
 
     def find_all(
-        self, node_type: type[_NodeBound] | tuple[type[_NodeBound], ...]
+        self,
+        node_type: type[_NodeBound] | tuple[type[_NodeBound], ...],
+        *,
+        reverse: bool = False,
     ) -> t.Iterator[_NodeBound]:
         """Find all the nodes of a given type.  If the type is a tuple,
         the check is performed for any of the tuple items.
         """
-        for child in self.iter_child_nodes():
+        for child in self.iter_child_nodes(reverse=reverse):
             if isinstance(child, node_type):
                 yield child  # type: ignore
             yield from child.find_all(node_type)
@@ -279,12 +304,28 @@ def _dump(node: Node | t.Any) -> None:
         return "".join(buf)
 
 
+class ParserIssue(Node):
+    attributes: tuple[str, ...] = ("message", "issue_context")
+    message: str
+    issue_context: str | None
+
+
 class Stmt(Node):
     """Base node for all statements."""
 
     abstract = True
 
 
+class EmptyStatement(Stmt):
+    """Node where a statement should be but an empty statement was given.
+    Returned in Fault-tolerant Mode only
+    """
+
+    attributes: tuple[str, ...] = ("message", "issue_context")
+    message: str | None
+    issue_context: str | None
+
+
 class Helper(Node):
     """Nodes that exist in a specific context only."""
 
@@ -398,10 +439,12 @@ class Block(Stmt):
     """
 
     fields = ("name", "body", "scoped", "required")
+    attributes = ("endblock_with_name",)
     name: str
     body: list[Node]
     scoped: bool
     required: bool
+    endblock_with_name: bool | None
 
 
 class Include(Stmt):
@@ -487,6 +530,28 @@ def can_assign(self) -> bool:
         return False
 
 
+class ExprIssue(Expr):
+    attributes: tuple[str, ...] = ("message", "issue_context")
+    message: str
+    issue_context: str | None
+
+
+class EmptyExpression(ExprIssue):
+    """Node where an expression should be but an empty expression was given.
+    Returned in Fault-tolerant Mode only
+    """
+
+
+class InvalidExpression(ExprIssue):
+    """Node where an expression should be but an unparsable expression was given.
+    Returned in Fault-tolerant Mode only
+    """
+
+    attributes: tuple[str, ...] = ("original_str",)
+
+    original_str: str
+
+
 class BinExpr(Expr):
     """Baseclass for all binary expressions."""