From 05ddbbda8e7473d8e4cfffb9f0890e15f05410e7 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sun, 4 May 2025 23:19:08 +0200 Subject: [PATCH 01/19] parser: add fault_tolerant=False --- src/jinja2/nodes.py | 15 +++++++++++++++ src/jinja2/parser.py | 11 +++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 1e08d3c59..d684a7ebf 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -285,6 +285,12 @@ class Stmt(Node): abstract = True +class EmptyStatement(Stmt): + """Node where a statement should be but an empty statement was given. + Returned in Fault-tolerant Mode only + """ + + class Helper(Node): """Nodes that exist in a specific context only.""" @@ -487,6 +493,15 @@ def can_assign(self) -> bool: return False +class EmptyExpression(Expr): + """Node where an expression should be but an empty expression was given. + Returned in Fault-tolerant Mode only + """ + + comment: str + attributes: t.Tuple[str, ...] = ("comment",) + + class BinExpr(Expr): """Baseclass for all binary expressions.""" diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 3ae857ebe..72daea735 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -57,7 +57,9 @@ def __init__( name: str | None = None, filename: str | None = None, state: str | None = None, + fault_tolerant: bool = False, ) -> None: + self.fault_tolerant = fault_tolerant self.environment = environment self.stream = environment._tokenize(source, name, filename, state) self.name = name @@ -685,6 +687,7 @@ def parse_tuple( extra_end_rules: tuple[str, ...] | None = None, explicit_parentheses: bool = False, with_namespace: bool = False, + allow_empty: bool = False, ) -> nodes.Tuple | nodes.Expr: """Works like `parse_expression` but if multiple expressions are delimited by a comma a :class:`~jinja2.nodes.Tuple` node is created. @@ -740,11 +743,12 @@ def parse() -> nodes.Expr: # nothing) in the spot of an expression would be an empty # tuple. if not explicit_parentheses: + if allow_empty: + return nodes.EmptyExpression(lineno=lineno, comment="") self.fail( "Expected an expression," f" got {describe_token(self.stream.current)!r}" ) - return nodes.Tuple(args, "load", lineno=lineno) def parse_list(self) -> nodes.List: @@ -1010,7 +1014,10 @@ def flush_data() -> None: next(self.stream) elif token.type == "variable_begin": next(self.stream) - add_data(self.parse_tuple(with_condexpr=True)) + data = self.parse_tuple(with_condexpr=True, allow_empty=self.fault_tolerant) + if isinstance(data, nodes.EmptyExpression): + data.comment = "Empty expression inside print statement" + add_data(data) self.stream.expect("variable_end") elif token.type == "block_begin": flush_data() From 42345cc1c4dc1d439f6bc14888f20e609fff68f5 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Mon, 5 May 2025 02:52:48 +0200 Subject: [PATCH 02/19] move parser tolerance setting to Environment and tolerate more issues --- src/jinja2/environment.py | 6 +++++ src/jinja2/nodes.py | 27 +++++++++++++++++++--- src/jinja2/parser.py | 47 ++++++++++++++++++++++++++++++--------- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/src/jinja2/environment.py b/src/jinja2/environment.py index acaaffb59..9d40c904d 100644 --- a/src/jinja2/environment.py +++ b/src/jinja2/environment.py @@ -252,6 +252,10 @@ class Environment: will reload the template. For higher performance it's possible to disable that. + `parser_tolerate_faults` + Instruct the parser to tolerate some invalid constructs that don't cause much semantic uncertainty, useful for linters and LSP to provide output on incomplete templates. + Defaults to False. + `bytecode_cache` If set to a bytecode cache object, this object will provide a cache for the internal Jinja bytecode so that templates don't @@ -316,6 +320,7 @@ def __init__( auto_reload: bool = True, bytecode_cache: t.Optional["BytecodeCache"] = None, enable_async: bool = False, + parser_tolerate_faults: bool = False, ): # !!Important notice!! # The constructor accepts quite a few arguments that should be @@ -360,6 +365,7 @@ def __init__( self.auto_reload = auto_reload # configurable policies + self.parser_tolerate_faults = parser_tolerate_faults self.policies = DEFAULT_POLICIES.copy() # load extensions diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index d684a7ebf..c791d8691 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -125,6 +125,7 @@ class Node(metaclass=NodeType): lineno: int environment: t.Optional["Environment"] + issues: list[t.Union["ParserIssue", "ExprIssue"]] def __init__(self, *fields: t.Any, **attributes: t.Any) -> None: if self.abstract: @@ -279,6 +280,13 @@ def _dump(node: Node | t.Any) -> None: return "".join(buf) +class ParserIssue(Node): + attributes: tuple[str, ...] = ("message", "lineno_end") + + message: str + lineno_end: int | None + + class Stmt(Node): """Base node for all statements.""" @@ -493,13 +501,26 @@ def can_assign(self) -> bool: return False -class EmptyExpression(Expr): +class ExprIssue(Expr): + attributes: tuple[str, ...] = ("message", "lineno_end") + message: str + lineno_end: int | None + + +class EmptyExpression(ExprIssue): """Node where an expression should be but an empty expression was given. Returned in Fault-tolerant Mode only """ - comment: str - attributes: t.Tuple[str, ...] = ("comment",) + +class InvalidExpression(ExprIssue): + """Node where an expression should be but an unparsable expression was given. + Returned in Fault-tolerant Mode only + """ + + attributes: tuple[str, ...] = ("original_str",) + + original_str: str class BinExpr(Expr): diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 72daea735..14274c3c7 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -57,9 +57,7 @@ def __init__( name: str | None = None, filename: str | None = None, state: str | None = None, - fault_tolerant: bool = False, ) -> None: - self.fault_tolerant = fault_tolerant self.environment = environment self.stream = environment._tokenize(source, name, filename, state) self.name = name @@ -401,9 +399,17 @@ def parse_context() -> bool: node.with_context = False return node - def parse_signature(self, node: _MacroCall) -> None: + def parse_signature(self, node: _MacroCall) -> None | nodes.EmptyExpression | nodes.InvalidExpression: args = node.args = [] defaults = node.defaults = [] + if self.environment.parser_tolerate_faults and self.stream.current.type != "lparen": + if self.stream.current.type == "block_end": + return nodes.EmptyExpression( # type: ignore[assignment] + lineno=node.lineno, + lineno_end=self.stream.current.lineno, + message="Empty signature", + ) + self.stream.expect("lparen") while self.stream.current.type != "rparen": if args: @@ -413,14 +419,27 @@ def parse_signature(self, node: _MacroCall) -> None: if self.stream.skip_if("assign"): defaults.append(self.parse_expression()) elif defaults: - self.fail("non-default argument follows default argument") + msg = "non-default argument follows default argument" + if not self.environment.parser_tolerate_faults: + self.fail(msg) + err = nodes.InvalidExpression( + lineno=arg.lineno, + lineno_end=self.stream.current.lineno, + message=msg, + original_str=arg.name, + ) + arg.issues = [err] args.append(arg) self.stream.expect("rparen") + return None def parse_call_block(self) -> nodes.CallBlock: node = nodes.CallBlock(lineno=next(self.stream).lineno) if self.stream.current.type == "lparen": - self.parse_signature(node) + signature_issue = self.parse_signature(node) + if signature_issue: + assert self.environment.parser_tolerate_faults + node.args = signature_issue # type: ignore[assignment] else: node.args = [] node.defaults = [] @@ -439,9 +458,15 @@ def parse_filter_block(self) -> nodes.FilterBlock: return node def parse_macro(self) -> nodes.Macro: - node = nodes.Macro(lineno=next(self.stream).lineno) + node = nodes.Macro(lineno=next(self.stream).lineno, issues=None) node.name = self.parse_assign_target(name_only=True).name - self.parse_signature(node) + signature_issue = self.parse_signature(node) + if signature_issue: + assert self.environment.parser_tolerate_faults + node.args = signature_issue # type: ignore[assignment] + if node.issues is None: + node.issues = [] + node.issues.append(signature_issue) node.body = self.parse_statements(("name:endmacro",), drop_needle=True) return node @@ -744,7 +769,9 @@ def parse() -> nodes.Expr: # tuple. if not explicit_parentheses: if allow_empty: - return nodes.EmptyExpression(lineno=lineno, comment="") + empty = nodes.EmptyExpression(lineno=lineno, message="Expected an expression") + empty.issues = [empty] + return empty self.fail( "Expected an expression," f" got {describe_token(self.stream.current)!r}" @@ -1014,9 +1041,9 @@ def flush_data() -> None: next(self.stream) elif token.type == "variable_begin": next(self.stream) - data = self.parse_tuple(with_condexpr=True, allow_empty=self.fault_tolerant) + data = self.parse_tuple(with_condexpr=True, allow_empty=self.environment.parser_tolerate_faults) if isinstance(data, nodes.EmptyExpression): - data.comment = "Empty expression inside print statement" + data.message = "Empty expression inside print statement" add_data(data) self.stream.expect("variable_end") elif token.type == "block_begin": From 96e18404b42179cedf21801262e3571fcd1841a8 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Fri, 9 May 2025 16:56:12 +0200 Subject: [PATCH 03/19] nodes: Node.iter_child_nodes(): add reverse param --- src/jinja2/nodes.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index c791d8691..379d14068 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -120,7 +120,7 @@ class Node(metaclass=NodeType): """ fields: tuple[str, ...] = () - attributes: tuple[str, ...] = ("lineno", "environment") + attributes: tuple[str, ...] = ("lineno", "environment", "issues") abstract = True lineno: int @@ -171,13 +171,19 @@ def iter_child_nodes( self, exclude: t.Container[str] | None = None, only: t.Container[str] | None = None, + reverse: bool = False, ) -> t.Iterator["Node"]: """Iterates over all direct child nodes of the node. This iterates over all fields and yields the values of they are nodes. If the value of a field is a list all the nodes in that list are returned. """ - for _, item in self.iter_fields(exclude, only): + items: t.Iterable[t.Tuple[str, t.Any]] = self.iter_fields(exclude, only) + if reverse: + items = reversed(list(items)) + for _, item in items: if isinstance(item, list): + if reverse: + item = reversed(item) for n in item: if isinstance(n, Node): yield n From 3d3ad887630c00bc3aa9431481df7101bc28e02c Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 10 May 2025 14:16:11 +0200 Subject: [PATCH 04/19] nodes: add reverse=False to find() and find_all() --- src/jinja2/nodes.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 379d14068..84acc46aa 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -190,22 +190,28 @@ def iter_child_nodes( elif isinstance(item, Node): yield item - def find(self, node_type: type[_NodeBound]) -> _NodeBound | None: + def find( + self, node_type: type[_NodeBound], *, reverse: bool = False + ) -> _NodeBound | None: """Find the first node of a given type. If no such node exists the return value is `None`. + With reverse=True, the last node is returned instead """ - for result in self.find_all(node_type): + for result in self.find_all(node_type, reverse=reverse): return result return None def find_all( - self, node_type: type[_NodeBound] | tuple[type[_NodeBound], ...] + self, + node_type: type[_NodeBound] | tuple[type[_NodeBound], ...], + *, + reverse: bool = False, ) -> t.Iterator[_NodeBound]: """Find all the nodes of a given type. If the type is a tuple, the check is performed for any of the tuple items. """ - for child in self.iter_child_nodes(): + for child in self.iter_child_nodes(reverse=reverse): if isinstance(child, node_type): yield child # type: ignore yield from child.find_all(node_type) From 727e64dbeebe66829830128005d221a0db1c8a83 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 10 May 2025 19:56:03 +0200 Subject: [PATCH 05/19] nodes: add lineno_end --- src/jinja2/nodes.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 84acc46aa..4264c28e5 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -120,12 +120,15 @@ class Node(metaclass=NodeType): """ fields: tuple[str, ...] = () - attributes: tuple[str, ...] = ("lineno", "environment", "issues") + attributes: tuple[str, ...] = ("lineno", "environment", "issues", "lineno_end") abstract = True lineno: int environment: t.Optional["Environment"] + + # only filled in diagnostic mode issues: list[t.Union["ParserIssue", "ExprIssue"]] + lineno_end: int | None def __init__(self, *fields: t.Any, **attributes: t.Any) -> None: if self.abstract: @@ -293,10 +296,9 @@ def _dump(node: Node | t.Any) -> None: class ParserIssue(Node): - attributes: tuple[str, ...] = ("message", "lineno_end") + attributes: tuple[str, ...] = ("message",) message: str - lineno_end: int | None class Stmt(Node): @@ -514,9 +516,8 @@ def can_assign(self) -> bool: class ExprIssue(Expr): - attributes: tuple[str, ...] = ("message", "lineno_end") + attributes: tuple[str, ...] = ("message",) message: str - lineno_end: int | None class EmptyExpression(ExprIssue): From 5ef1566c0dbe61f2fa721af23ac95f2a0b21a1e9 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Fri, 26 Sep 2025 03:52:18 +0200 Subject: [PATCH 06/19] lexer: add tokeniter_linepos --- src/jinja2/lexer.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index e35cd471e..228d0b4cb 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -666,13 +666,7 @@ def wrap( yield Token(lineno, token, value) - def tokeniter( - self, - source: str, - name: str | None, - filename: str | None = None, - state: str | None = None, - ) -> t.Iterator[tuple[int, str, str]]: + def tokeniter(self, *kargs, **kwargs) -> t.Iterator[tuple[int, str, str]]: """This method tokenizes the text and returns the tokens in a generator. Use this method if you just want to tokenize a template. @@ -680,6 +674,18 @@ def tokeniter( Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line breaks. """ + yield from ( + (tup[0], tup[2], tup[3]) + for tup in self.tokeniter_linepos(*kargs, **kwargs) + ) + + def tokeniter_linepos( + self, + source: str, + name: str | None, + filename: str | None = None, + state: str | None = None, + ) -> t.Iterator[tuple[int, int, str, str]]: lines = newline_re.split(source)[::2] if not self.keep_trailing_newline and lines[-1] == "": @@ -765,7 +771,8 @@ def tokeniter( elif token == "#bygroup": for key, value in m.groupdict().items(): if value is not None: - yield lineno, key, value + yield lineno, pos, key, value + pos = 0 if value.endswith("\n") else len(value.splitlines(keepends=False)[-1]) lineno += value.count("\n") break else: @@ -778,7 +785,7 @@ def tokeniter( data = groups[idx] if data or token not in ignore_if_empty: - yield lineno, token, data # type: ignore[misc] + yield lineno, pos, token, data # type: ignore[misc] lineno += data.count("\n") + newlines_stripped newlines_stripped = 0 @@ -813,7 +820,7 @@ def tokeniter( # yield items if data or tokens not in ignore_if_empty: - yield lineno, tokens, data + yield lineno, pos, tokens, data lineno += data.count("\n") From b494babca21e9c17e9bc6e35f95f2e48fd9b8924 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Mon, 29 Sep 2025 03:27:04 +0200 Subject: [PATCH 07/19] nodes: add linepos to tokens and all nodes --- src/jinja2/lexer.py | 30 +++-- src/jinja2/nodes.py | 9 +- src/jinja2/parser.py | 279 ++++++++++++++++++++++++++++++++----------- 3 files changed, 237 insertions(+), 81 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 228d0b4cb..4a853f073 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -7,6 +7,7 @@ import re import typing as t from ast import literal_eval +from dataclasses import dataclass from collections import deque from sys import intern @@ -266,10 +267,12 @@ def __call__(self, lineno: int, filename: str | None) -> "te.NoReturn": raise self.error_class(self.message, lineno, filename) -class Token(t.NamedTuple): +@dataclass +class Token: lineno: int type: str value: str + linepos: int | None = None def __str__(self) -> str: return describe_token(self) @@ -609,19 +612,23 @@ def tokenize( state: str | None = None, ) -> TokenStream: """Calls tokeniter + tokenize and wraps it in a token stream.""" - stream = self.tokeniter(source, name, filename, state) + stream = self.tokeniter_linepos(source, name, filename, state) return TokenStream(self.wrap(stream, name, filename), name, filename) def wrap( self, - stream: t.Iterable[tuple[int, str, str]], + stream: t.Iterable[tuple[int, str, str] | tuple[int, str, str, int]], name: str | None = None, filename: str | None = None, ) -> t.Iterator[Token]: """This is called with the stream as returned by `tokenize` and wraps every token in a :class:`Token` and converts the value. """ - for lineno, token, value_str in stream: + for tup in stream: + if len(tup) == 3: + tup = (*tup, -1) + assert len(tup) == 4 + lineno, token, value_str, linepos = tup if token in ignored_tokens: continue @@ -664,7 +671,7 @@ def wrap( elif token == TOKEN_OPERATOR: token = operators[value_str] - yield Token(lineno, token, value) + yield Token(lineno, token, value, linepos) def tokeniter(self, *kargs, **kwargs) -> t.Iterator[tuple[int, str, str]]: """This method tokenizes the text and returns the tokens in a @@ -674,10 +681,7 @@ def tokeniter(self, *kargs, **kwargs) -> t.Iterator[tuple[int, str, str]]: Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line breaks. """ - yield from ( - (tup[0], tup[2], tup[3]) - for tup in self.tokeniter_linepos(*kargs, **kwargs) - ) + yield from (tup[0:3] for tup in self.tokeniter_linepos(*kargs, **kwargs)) def tokeniter_linepos( self, @@ -685,7 +689,7 @@ def tokeniter_linepos( name: str | None, filename: str | None = None, state: str | None = None, - ) -> t.Iterator[tuple[int, int, str, str]]: + ) -> t.Iterator[tuple[int, str, str, int]]: lines = newline_re.split(source)[::2] if not self.keep_trailing_newline and lines[-1] == "": @@ -771,7 +775,7 @@ def tokeniter_linepos( elif token == "#bygroup": for key, value in m.groupdict().items(): if value is not None: - yield lineno, pos, key, value + yield lineno, key, value, pos pos = 0 if value.endswith("\n") else len(value.splitlines(keepends=False)[-1]) lineno += value.count("\n") break @@ -785,7 +789,7 @@ def tokeniter_linepos( data = groups[idx] if data or token not in ignore_if_empty: - yield lineno, pos, token, data # type: ignore[misc] + yield lineno, token, data, pos # type: ignore[misc] lineno += data.count("\n") + newlines_stripped newlines_stripped = 0 @@ -820,7 +824,7 @@ def tokeniter_linepos( # yield items if data or tokens not in ignore_if_empty: - yield lineno, pos, tokens, data + yield lineno, tokens, data, pos lineno += data.count("\n") diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 4264c28e5..74ebbd375 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -120,10 +120,17 @@ class Node(metaclass=NodeType): """ fields: tuple[str, ...] = () - attributes: tuple[str, ...] = ("lineno", "environment", "issues", "lineno_end") + attributes: tuple[str, ...] = ( + "lineno", + "linepos", + "environment", + "issues", + "lineno_end", + ) abstract = True lineno: int + linepos: int environment: t.Optional["Environment"] # only filled in diagnostic mode diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 14274c3c7..152de1913 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -153,11 +153,15 @@ def is_tuple_end(self, extra_end_rules: tuple[str, ...] | None = None) -> bool: return self.stream.current.test_any(extra_end_rules) # type: ignore return False - def free_identifier(self, lineno: int | None = None) -> nodes.InternalName: + def free_identifier( + self, lineno: int | None = None, linepos: int | None = None + ) -> nodes.InternalName: """Return a new free identifier as :class:`~jinja2.nodes.InternalName`.""" self._last_identifier += 1 rv = object.__new__(nodes.InternalName) - nodes.Node.__init__(rv, f"fi{self._last_identifier}", lineno=lineno) + nodes.Node.__init__( + rv, f"fi{self._last_identifier}", lineno=lineno, linepos=linepos + ) return rv def parse_statement(self) -> nodes.Node | list[nodes.Node]: @@ -177,7 +181,10 @@ def parse_statement(self) -> nodes.Node | list[nodes.Node]: return self.parse_filter_block() ext = self.extensions.get(token.value) if ext is not None: - return ext(self) + res = ext(self) + if hasattr(res, "linepos") and res.linepos is None: + res.linepos = token.linepos + return res # did not work out, remove the token we pushed by accident # from the stack so that the unknown tag fail function can @@ -220,18 +227,24 @@ def parse_statements( def parse_set(self) -> nodes.Assign | nodes.AssignBlock: """Parse an assign statement.""" - lineno = next(self.stream).lineno + _next = next(self.stream) + lineno = _next.lineno + linepos = _next.linepos target = self.parse_assign_target(with_namespace=True) if self.stream.skip_if("assign"): expr = self.parse_tuple() - return nodes.Assign(target, expr, lineno=lineno) + return nodes.Assign(target, expr, lineno=lineno, linepos=linepos) filter_node = self.parse_filter(None) body = self.parse_statements(("name:endset",), drop_needle=True) - return nodes.AssignBlock(target, filter_node, body, lineno=lineno) + return nodes.AssignBlock( + target, filter_node, body, lineno=lineno, linepos=linepos + ) def parse_for(self) -> nodes.For: """Parse a for loop.""" - lineno = self.stream.expect("name:for").lineno + _next = self.stream.expect("name:for") + lineno = _next.lineno + linepos = _next.linepos target = self.parse_assign_target(extra_end_rules=("name:in",)) self.stream.expect("name:in") iter = self.parse_tuple( @@ -246,11 +259,14 @@ def parse_for(self) -> nodes.For: else_ = [] else: else_ = self.parse_statements(("name:endfor",), drop_needle=True) - return nodes.For(target, iter, body, else_, test, recursive, lineno=lineno) + return nodes.For( + target, iter, body, else_, test, recursive, lineno=lineno, linepos=linepos + ) def parse_if(self) -> nodes.If: """Parse an if construct.""" - node = result = nodes.If(lineno=self.stream.expect("name:if").lineno) + _next = self.stream.expect("name:if") + node = result = nodes.If(lineno=_next.lineno, linepos=_next.linepos) while True: node.test = self.parse_tuple(with_condexpr=False) node.body = self.parse_statements(("name:elif", "name:else", "name:endif")) @@ -258,7 +274,8 @@ def parse_if(self) -> nodes.If: node.else_ = [] token = next(self.stream) if token.test("name:elif"): - node = nodes.If(lineno=self.stream.current.lineno) + _current = self.stream.current + node = nodes.If(lineno=_current.lineno, linepos=_current.linepos) result.elif_.append(node) continue elif token.test("name:else"): @@ -267,7 +284,8 @@ def parse_if(self) -> nodes.If: return result def parse_with(self) -> nodes.With: - node = nodes.With(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.With(lineno=_next.lineno, linepos=_next.linepos) targets: list[nodes.Expr] = [] values: list[nodes.Expr] = [] while self.stream.current.type != "block_end": @@ -284,13 +302,17 @@ def parse_with(self) -> nodes.With: return node def parse_autoescape(self) -> nodes.Scope: - node = nodes.ScopedEvalContextModifier(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.ScopedEvalContextModifier( + lineno=_next.lineno, linepos=_next.linepos + ) node.options = [nodes.Keyword("autoescape", self.parse_expression())] node.body = self.parse_statements(("name:endautoescape",), drop_needle=True) - return nodes.Scope([node]) + return nodes.Scope([node], lineno=node.lineno, linepos=node.linepos) def parse_block(self) -> nodes.Block: - node = nodes.Block(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.Block(lineno=_next.lineno, linepos=_next.linepos) node.name = self.stream.expect("name").value node.scoped = self.stream.skip_if("name:scoped") node.required = self.stream.skip_if("name:required") @@ -322,7 +344,8 @@ def parse_block(self) -> nodes.Block: return node def parse_extends(self) -> nodes.Extends: - node = nodes.Extends(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.Extends(lineno=_next.lineno, linepos=_next.linepos) node.template = self.parse_expression() return node @@ -339,7 +362,8 @@ def parse_import_context( return node def parse_include(self) -> nodes.Include: - node = nodes.Include(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.Include(lineno=_next.lineno, linepos=_next.linepos) node.template = self.parse_expression() if self.stream.current.test("name:ignore") and self.stream.look().test( "name:missing" @@ -351,14 +375,16 @@ def parse_include(self) -> nodes.Include: return self.parse_import_context(node, True) def parse_import(self) -> nodes.Import: - node = nodes.Import(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.Import(lineno=_next.lineno, linepos=_next.linepos) node.template = self.parse_expression() self.stream.expect("name:as") node.target = self.parse_assign_target(name_only=True).name return self.parse_import_context(node, False) def parse_from(self) -> nodes.FromImport: - node = nodes.FromImport(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.FromImport(lineno=_next.lineno, linepos=_next.linepos) node.template = self.parse_expression() self.stream.expect("name:import") node.names = [] @@ -399,13 +425,19 @@ def parse_context() -> bool: node.with_context = False return node - def parse_signature(self, node: _MacroCall) -> None | nodes.EmptyExpression | nodes.InvalidExpression: + def parse_signature( + self, node: _MacroCall + ) -> None | nodes.EmptyExpression | nodes.InvalidExpression: args = node.args = [] defaults = node.defaults = [] - if self.environment.parser_tolerate_faults and self.stream.current.type != "lparen": + if ( + self.environment.parser_tolerate_faults + and self.stream.current.type != "lparen" + ): if self.stream.current.type == "block_end": return nodes.EmptyExpression( # type: ignore[assignment] lineno=node.lineno, + linepos=node.linepos, lineno_end=self.stream.current.lineno, message="Empty signature", ) @@ -424,6 +456,7 @@ def parse_signature(self, node: _MacroCall) -> None | nodes.EmptyExpression | no self.fail(msg) err = nodes.InvalidExpression( lineno=arg.lineno, + linepos=arg.linepos, lineno_end=self.stream.current.lineno, message=msg, original_str=arg.name, @@ -434,7 +467,8 @@ def parse_signature(self, node: _MacroCall) -> None | nodes.EmptyExpression | no return None def parse_call_block(self) -> nodes.CallBlock: - node = nodes.CallBlock(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.CallBlock(lineno=_next.lineno, linepos=_next.linepos) if self.stream.current.type == "lparen": signature_issue = self.parse_signature(node) if signature_issue: @@ -452,13 +486,15 @@ def parse_call_block(self) -> nodes.CallBlock: return node def parse_filter_block(self) -> nodes.FilterBlock: - node = nodes.FilterBlock(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.FilterBlock(lineno=_next.lineno, linepos=_next.linepos) node.filter = self.parse_filter(None, start_inline=True) # type: ignore node.body = self.parse_statements(("name:endfilter",), drop_needle=True) return node def parse_macro(self) -> nodes.Macro: - node = nodes.Macro(lineno=next(self.stream).lineno, issues=None) + _next = next(self.stream) + node = nodes.Macro(lineno=_next.lineno, linepos=_next.linepos, issues=None) node.name = self.parse_assign_target(name_only=True).name signature_issue = self.parse_signature(node) if signature_issue: @@ -471,7 +507,8 @@ def parse_macro(self) -> nodes.Macro: return node def parse_print(self) -> nodes.Output: - node = nodes.Output(lineno=next(self.stream).lineno) + _next = next(self.stream) + node = nodes.Output(lineno=_next.lineno, linepos=_next.linepos) node.nodes = [] while self.stream.current.type != "block_end": if node.nodes: @@ -512,7 +549,9 @@ def parse_assign_target( if name_only: token = self.stream.expect("name") - target = nodes.Name(token.value, "store", lineno=token.lineno) + target = nodes.Name( + token.value, "store", lineno=token.lineno, linepos=token.linepos + ) else: if with_tuple: target = self.parse_tuple( @@ -543,6 +582,7 @@ def parse_expression(self, with_condexpr: bool = True) -> nodes.Expr: def parse_condexpr(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos expr1 = self.parse_or() expr3: nodes.Expr | None @@ -552,110 +592,153 @@ def parse_condexpr(self) -> nodes.Expr: expr3 = self.parse_condexpr() else: expr3 = None - expr1 = nodes.CondExpr(expr2, expr1, expr3, lineno=lineno) + expr1 = nodes.CondExpr(expr2, expr1, expr3, lineno=lineno, linepos=linepos) lineno = self.stream.current.lineno + linepos = self.stream.current.linepos return expr1 def parse_or(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos left = self.parse_and() while self.stream.skip_if("name:or"): right = self.parse_and() - left = nodes.Or(left, right, lineno=lineno) + left = nodes.Or(left, right, lineno=lineno, linepos=linepos) lineno = self.stream.current.lineno + linepos = self.stream.current.linepos return left def parse_and(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos left = self.parse_not() while self.stream.skip_if("name:and"): right = self.parse_not() - left = nodes.And(left, right, lineno=lineno) + left = nodes.And(left, right, lineno=lineno, linepos=linepos) lineno = self.stream.current.lineno + linepos = self.stream.current.linepos return left def parse_not(self) -> nodes.Expr: if self.stream.current.test("name:not"): - lineno = next(self.stream).lineno - return nodes.Not(self.parse_not(), lineno=lineno) + _next = next(self.stream) + lineno = _next.lineno + linepos = _next.linepos + return nodes.Not(self.parse_not(), lineno=lineno, linepos=linepos) return self.parse_compare() def parse_compare(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos expr = self.parse_math1() ops = [] while True: - token_type = self.stream.current.type + token = self.stream.current + token_type = token.type if token_type in _compare_operators: next(self.stream) - ops.append(nodes.Operand(token_type, self.parse_math1())) + token = self.stream.current + ops.append( + nodes.Operand( + token_type, + self.parse_math1(), + lineno=token.lineno, + linepos=token.linepos, + ) + ) elif self.stream.skip_if("name:in"): - ops.append(nodes.Operand("in", self.parse_math1())) + token = self.stream.current + ops.append( + nodes.Operand( + "in", + self.parse_math1(), + lineno=token.lineno, + linepos=token.linepos, + ) + ) elif self.stream.current.test("name:not") and self.stream.look().test( "name:in" ): self.stream.skip(2) - ops.append(nodes.Operand("notin", self.parse_math1())) + token = self.stream.current + ops.append( + nodes.Operand( + "notin", + self.parse_math1(), + lineno=token.lineno, + linepos=token.linepos, + ) + ) else: break lineno = self.stream.current.lineno + linepos = self.stream.current.linepos if not ops: return expr - return nodes.Compare(expr, ops, lineno=lineno) + token = self.stream.current + return nodes.Compare(expr, ops, lineno=lineno, linepos=linepos) def parse_math1(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos left = self.parse_concat() while self.stream.current.type in ("add", "sub"): cls = _math_nodes[self.stream.current.type] next(self.stream) right = self.parse_concat() - left = cls(left, right, lineno=lineno) + left = cls(left, right, lineno=lineno, linepos=linepos) lineno = self.stream.current.lineno + linepos = self.stream.current.linepos return left def parse_concat(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos args = [self.parse_math2()] while self.stream.current.type == "tilde": next(self.stream) args.append(self.parse_math2()) if len(args) == 1: return args[0] - return nodes.Concat(args, lineno=lineno) + return nodes.Concat(args, lineno=lineno, linepos=linepos) def parse_math2(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos left = self.parse_pow() while self.stream.current.type in ("mul", "div", "floordiv", "mod"): cls = _math_nodes[self.stream.current.type] next(self.stream) right = self.parse_pow() - left = cls(left, right, lineno=lineno) + left = cls(left, right, lineno=lineno, linepos=linepos) lineno = self.stream.current.lineno + linepos = self.stream.current.linepos return left def parse_pow(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos left = self.parse_unary() while self.stream.current.type == "pow": next(self.stream) right = self.parse_unary() - left = nodes.Pow(left, right, lineno=lineno) + left = nodes.Pow(left, right, lineno=lineno, linepos=linepos) lineno = self.stream.current.lineno + linepos = self.stream.current.linepos return left def parse_unary(self, with_filter: bool = True) -> nodes.Expr: token_type = self.stream.current.type lineno = self.stream.current.lineno + linepos = self.stream.current.linepos node: nodes.Expr if token_type == "sub": next(self.stream) - node = nodes.Neg(self.parse_unary(False), lineno=lineno) + node = nodes.Neg(self.parse_unary(False), lineno=lineno, linepos=linepos) elif token_type == "add": next(self.stream) - node = nodes.Pos(self.parse_unary(False), lineno=lineno) + node = nodes.Pos(self.parse_unary(False), lineno=lineno, linepos=linepos) else: node = self.parse_primary() node = self.parse_postfix(node) @@ -671,17 +754,25 @@ def parse_primary(self, with_namespace: bool = False) -> nodes.Expr: if token.type == "name": next(self.stream) if token.value in ("true", "false", "True", "False"): - node = nodes.Const(token.value in ("true", "True"), lineno=token.lineno) + node = nodes.Const( + token.value in ("true", "True"), + lineno=token.lineno, + linepos=token.linepos, + ) elif token.value in ("none", "None"): - node = nodes.Const(None, lineno=token.lineno) + node = nodes.Const(None, lineno=token.lineno, linepos=token.linepos) elif with_namespace and self.stream.current.type == "dot": # If namespace attributes are allowed at this point, and the next # token is a dot, produce a namespace reference. next(self.stream) attr = self.stream.expect("name") - node = nodes.NSRef(token.value, attr.value, lineno=token.lineno) + node = nodes.NSRef( + token.value, attr.value, lineno=token.lineno, linepos=token.linepos + ) else: - node = nodes.Name(token.value, "load", lineno=token.lineno) + node = nodes.Name( + token.value, "load", lineno=token.lineno, linepos=token.linepos + ) elif token.type == "string": next(self.stream) buf = [token.value] @@ -689,10 +780,10 @@ def parse_primary(self, with_namespace: bool = False) -> nodes.Expr: while self.stream.current.type == "string": buf.append(self.stream.current.value) next(self.stream) - node = nodes.Const("".join(buf), lineno=lineno) + node = nodes.Const("".join(buf), lineno=lineno, linepos=token.linepos) elif token.type in ("integer", "float"): next(self.stream) - node = nodes.Const(token.value, lineno=token.lineno) + node = nodes.Const(token.value, lineno=token.lineno, linepos=token.linepos) elif token.type == "lparen": next(self.stream) node = self.parse_tuple(explicit_parentheses=True) @@ -746,6 +837,7 @@ def parse() -> nodes.Expr: args: list[nodes.Expr] = [] is_tuple = False + linepos_start = self.stream.current.linepos while True: if args: @@ -769,14 +861,18 @@ def parse() -> nodes.Expr: # tuple. if not explicit_parentheses: if allow_empty: - empty = nodes.EmptyExpression(lineno=lineno, message="Expected an expression") + empty = nodes.EmptyExpression( + lineno=lineno, + linepos=linepos_start, + message="Expected an expression", + ) empty.issues = [empty] return empty self.fail( "Expected an expression," f" got {describe_token(self.stream.current)!r}" ) - return nodes.Tuple(args, "load", lineno=lineno) + return nodes.Tuple(args, "load", lineno=lineno, linepos=linepos_start) def parse_list(self) -> nodes.List: token = self.stream.expect("lbracket") @@ -788,7 +884,7 @@ def parse_list(self) -> nodes.List: break items.append(self.parse_expression()) self.stream.expect("rbracket") - return nodes.List(items, lineno=token.lineno) + return nodes.List(items, lineno=token.lineno, linepos=token.linepos) def parse_dict(self) -> nodes.Dict: token = self.stream.expect("lbrace") @@ -801,9 +897,9 @@ def parse_dict(self) -> nodes.Dict: key = self.parse_expression() self.stream.expect("colon") value = self.parse_expression() - items.append(nodes.Pair(key, value, lineno=key.lineno)) + items.append(nodes.Pair(key, value, lineno=key.lineno, linepos=key.linepos)) self.stream.expect("rbrace") - return nodes.Dict(items, lineno=token.lineno) + return nodes.Dict(items, lineno=token.lineno, linepos=token.linepos) def parse_postfix(self, node: nodes.Expr) -> nodes.Expr: while True: @@ -842,12 +938,20 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: next(self.stream) if attr_token.type == "name": return nodes.Getattr( - node, attr_token.value, "load", lineno=token.lineno + node, + attr_token.value, + "load", + lineno=token.lineno, + linepos=token.linepos, ) elif attr_token.type != "integer": self.fail("expected name or number", attr_token.lineno) - arg = nodes.Const(attr_token.value, lineno=attr_token.lineno) - return nodes.Getitem(node, arg, "load", lineno=token.lineno) + arg = nodes.Const( + attr_token.value, lineno=attr_token.lineno, linepos=attr_token.linepos + ) + return nodes.Getitem( + node, arg, "load", lineno=token.lineno, linepos=token.linepos + ) if token.type == "lbracket": args: list[nodes.Expr] = [] while self.stream.current.type != "rbracket": @@ -858,12 +962,17 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: if len(args) == 1: arg = args[0] else: - arg = nodes.Tuple(args, "load", lineno=token.lineno) - return nodes.Getitem(node, arg, "load", lineno=token.lineno) + arg = nodes.Tuple( + args, "load", lineno=token.lineno, linepos=token.linepos + ) + return nodes.Getitem( + node, arg, "load", lineno=token.lineno, linepos=token.linepos + ) self.fail("expected subscript expression", token.lineno) def parse_subscribed(self) -> nodes.Expr: lineno = self.stream.current.lineno + linepos = self.stream.current.linepos args: list[nodes.Expr | None] if self.stream.current.type == "colon": @@ -892,7 +1001,7 @@ def parse_subscribed(self) -> nodes.Expr: else: args.append(None) - return nodes.Slice(lineno=lineno, *args) # noqa: B026 + return nodes.Slice(lineno=lineno, linepos=linepos, *args) # noqa: B026 def parse_call_args( self, @@ -939,7 +1048,11 @@ def ensure(expr: bool) -> None: key = self.stream.current.value self.stream.skip(2) value = self.parse_expression() - kwargs.append(nodes.Keyword(key, value, lineno=value.lineno)) + kwargs.append( + nodes.Keyword( + key, value, lineno=value.lineno, linepos=value.linepos + ) + ) else: # Parsing an arg ensure(dyn_args is None and dyn_kwargs is None and not kwargs) @@ -955,7 +1068,15 @@ def parse_call(self, node: nodes.Expr) -> nodes.Call: # needs to be recorded before the stream is advanced. token = self.stream.current args, kwargs, dyn_args, dyn_kwargs = self.parse_call_args() - return nodes.Call(node, args, kwargs, dyn_args, dyn_kwargs, lineno=token.lineno) + return nodes.Call( + node, + args, + kwargs, + dyn_args, + dyn_kwargs, + lineno=token.lineno, + linepos=token.linepos, + ) def parse_filter( self, node: nodes.Expr | None, start_inline: bool = False @@ -975,7 +1096,14 @@ def parse_filter( kwargs = [] dyn_args = dyn_kwargs = None node = nodes.Filter( - node, name, args, kwargs, dyn_args, dyn_kwargs, lineno=token.lineno + node, + name, + args, + kwargs, + dyn_args, + dyn_kwargs, + lineno=token.lineno, + linepos=token.linepos, ) start_inline = False return node @@ -1012,10 +1140,17 @@ def parse_test(self, node: nodes.Expr) -> nodes.Expr: else: args = [] node = nodes.Test( - node, name, args, kwargs, dyn_args, dyn_kwargs, lineno=token.lineno + node, + name, + args, + kwargs, + dyn_args, + dyn_kwargs, + lineno=token.lineno, + linepos=token.linepos, ) if negated: - node = nodes.Not(node, lineno=token.lineno) + node = nodes.Not(node, lineno=token.lineno, linepos=token.linepos) return node def subparse(self, end_tokens: tuple[str, ...] | None = None) -> list[nodes.Node]: @@ -1029,7 +1164,10 @@ def subparse(self, end_tokens: tuple[str, ...] | None = None) -> list[nodes.Node def flush_data() -> None: if data_buffer: lineno = data_buffer[0].lineno - body.append(nodes.Output(data_buffer[:], lineno=lineno)) + linepos = data_buffer[0].linepos + body.append( + nodes.Output(data_buffer[:], lineno=lineno, linepos=linepos) + ) del data_buffer[:] try: @@ -1037,11 +1175,18 @@ def flush_data() -> None: token = self.stream.current if token.type == "data": if token.value: - add_data(nodes.TemplateData(token.value, lineno=token.lineno)) + add_data( + nodes.TemplateData( + token.value, lineno=token.lineno, linepos=token.linepos + ) + ) next(self.stream) elif token.type == "variable_begin": next(self.stream) - data = self.parse_tuple(with_condexpr=True, allow_empty=self.environment.parser_tolerate_faults) + data = self.parse_tuple( + with_condexpr=True, + allow_empty=self.environment.parser_tolerate_faults, + ) if isinstance(data, nodes.EmptyExpression): data.message = "Empty expression inside print statement" add_data(data) @@ -1070,6 +1215,6 @@ def flush_data() -> None: def parse(self) -> nodes.Template: """Parse the whole template into a `Template` node.""" - result = nodes.Template(self.subparse(), lineno=1) + result = nodes.Template(self.subparse(), lineno=1, linepos=0) result.set_environment(self.environment) return result From 31663590d4750abf1cd3a37cf19a57e5542a49e3 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 11 Oct 2025 16:00:10 +0200 Subject: [PATCH 08/19] more linepos --- src/jinja2/lexer.py | 33 ++++++++++++++++++++++++++++----- src/jinja2/parser.py | 9 +++++++-- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 4a853f073..642c54fc3 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -698,6 +698,7 @@ def tokeniter_linepos( source = "\n".join(lines) pos = 0 lineno = 1 + linepos = 0 stack = ["root"] if state is not None and state != "root": @@ -710,11 +711,31 @@ def tokeniter_linepos( newlines_stripped = 0 line_starting = True + def linepos_from_str(line_or_more: str) -> int: + line = line_or_more.rsplit("\n", 1)[-1] + return len(line) + + + + old_pos = pos while True: + if old_pos != pos: + for i in range(5): + backwards_offset = (10 ** i) + backwards_location = max(0, pos - backwards_offset) + lookbehind = source[backwards_location:pos + 1] + last_line = lookbehind.rsplit("\n", 1)[-1] + linepos = len(last_line) + if len(last_line) != len(lookbehind): + # we found a line break + break + if backwards_location <= 0: + break + old_pos = pos + # tokenizer loop for regex, tokens, new_state in statetokens: m = regex.match(source, pos) - # if no match we try again with the next rule if m is None: continue @@ -775,8 +796,9 @@ def tokeniter_linepos( elif token == "#bygroup": for key, value in m.groupdict().items(): if value is not None: - yield lineno, key, value, pos + yield lineno, key, value, linepos pos = 0 if value.endswith("\n") else len(value.splitlines(keepends=False)[-1]) + linepos = pos lineno += value.count("\n") break else: @@ -789,14 +811,14 @@ def tokeniter_linepos( data = groups[idx] if data or token not in ignore_if_empty: - yield lineno, token, data, pos # type: ignore[misc] + yield lineno, token, data, linepos # type: ignore[misc] lineno += data.count("\n") + newlines_stripped newlines_stripped = 0 # strings as token just are yielded as it. else: - data = m.group() + data: str = m.group() # update brace/parentheses balance if tokens == TOKEN_OPERATOR: @@ -824,10 +846,11 @@ def tokeniter_linepos( # yield items if data or tokens not in ignore_if_empty: - yield lineno, tokens, data, pos + yield lineno, tokens, data, linepos lineno += data.count("\n") + line_starting = m.group()[-1:] == "\n" # fetch new position into new variable so that we can check # if there is a internal parsing error which would result diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 152de1913..0ffde9c2d 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -232,8 +232,12 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: linepos = _next.linepos target = self.parse_assign_target(with_namespace=True) if self.stream.skip_if("assign"): - expr = self.parse_tuple() - return nodes.Assign(target, expr, lineno=lineno, linepos=linepos) + expr = self.parse_tuple(allow_empty=self.environment.parser_tolerate_faults) + result = nodes.Assign(target, expr, lineno=lineno, linepos=linepos) + if isinstance(expr, nodes.EmptyExpression): + result.issues = [expr] + expr.message = "Assignment to empty expression" + return result filter_node = self.parse_filter(None) body = self.parse_statements(("name:endset",), drop_needle=True) return nodes.AssignBlock( @@ -1188,6 +1192,7 @@ def flush_data() -> None: allow_empty=self.environment.parser_tolerate_faults, ) if isinstance(data, nodes.EmptyExpression): + data.lineno, data.linepos = token.lineno, token.linepos data.message = "Empty expression inside print statement" add_data(data) self.stream.expect("variable_end") From fee88ffe0ebc992d29c919067aea7653a2f7e4a0 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 11 Oct 2025 18:08:45 +0200 Subject: [PATCH 09/19] lexer: fix linepos and lineno by brute force --- src/jinja2/lexer.py | 53 +++++++++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 642c54fc3..1d63ce429 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -715,26 +715,34 @@ def linepos_from_str(line_or_more: str) -> int: line = line_or_more.rsplit("\n", 1)[-1] return len(line) - - old_pos = pos - while True: - if old_pos != pos: - for i in range(5): - backwards_offset = (10 ** i) - backwards_location = max(0, pos - backwards_offset) - lookbehind = source[backwards_location:pos + 1] - last_line = lookbehind.rsplit("\n", 1)[-1] - linepos = len(last_line) - if len(last_line) != len(lookbehind): - # we found a line break - break - if backwards_location <= 0: - break - old_pos = pos + while True: # tokenizer loop for regex, tokens, new_state in statetokens: + if old_pos != pos: + lineno = source[:pos].count("\n") + 1 + inbetween = source[old_pos:pos] + if "\n" in inbetween: + linepos = len(inbetween.rsplit("\n", 1)[-1]) + else: + for backwards_buffer_expo in range(5): + backwards_offset = 10**backwards_buffer_expo + backwards_location = max(0, pos - backwards_offset) + lookbehind = source[backwards_location:pos] + last_line = lookbehind.rsplit("\n", 1)[-1] + if ( + len(last_line) != len(lookbehind) + or len(lookbehind) >= pos + ): + # we found a line break + linepos = len(last_line) + break + if backwards_location <= 0: + break + + old_pos = pos + m = regex.match(source, pos) # if no match we try again with the next rule if m is None: @@ -768,6 +776,7 @@ def linepos_from_str(line_or_more: str) -> int: # Strip all whitespace between the text and the tag. stripped = text.rstrip() newlines_stripped = text[len(stripped) :].count("\n") + linepos = len(text.rsplit("\n", 1)[-1]) groups = [stripped, *groups[1:]] elif ( # Not marked for preserving whitespace. @@ -797,8 +806,9 @@ def linepos_from_str(line_or_more: str) -> int: for key, value in m.groupdict().items(): if value is not None: yield lineno, key, value, linepos - pos = 0 if value.endswith("\n") else len(value.splitlines(keepends=False)[-1]) - linepos = pos + linepos = len(value.splitlines(keepends=False)[-1]) + # linepos = pos + # pos = linepos lineno += value.count("\n") break else: @@ -814,6 +824,9 @@ def linepos_from_str(line_or_more: str) -> int: yield lineno, token, data, linepos # type: ignore[misc] lineno += data.count("\n") + newlines_stripped + if "\n" in data: + linepos = 0 + linepos += len(data.rsplit("\n")[-1]) newlines_stripped = 0 # strings as token just are yielded as it. @@ -849,9 +862,11 @@ def linepos_from_str(line_or_more: str) -> int: yield lineno, tokens, data, linepos lineno += data.count("\n") - + if "\n" in data: + linepos = len(data.rsplit("\n", 1)[-1]) line_starting = m.group()[-1:] == "\n" + # fetch new position into new variable so that we can check # if there is a internal parsing error which would result # in an infinite loop From 474c3f524b57d87955f1302484b38a0cec50aa7c Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Fri, 31 Oct 2025 03:41:27 +0100 Subject: [PATCH 10/19] parser: moar fault tolerance --- src/jinja2/nodes.py | 4 +- src/jinja2/parser.py | 105 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 90 insertions(+), 19 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 74ebbd375..ac88a472d 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -304,7 +304,6 @@ def _dump(node: Node | t.Any) -> None: class ParserIssue(Node): attributes: tuple[str, ...] = ("message",) - message: str @@ -319,6 +318,9 @@ class EmptyStatement(Stmt): Returned in Fault-tolerant Mode only """ + attributes: tuple[str] = ("message",) + message: str | None + class Helper(Node): """Nodes that exist in a specific context only.""" diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 0ffde9c2d..221efa12c 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -168,7 +168,11 @@ def parse_statement(self) -> nodes.Node | list[nodes.Node]: """Parse a single statement.""" token = self.stream.current if token.type != "name": - self.fail("tag name expected", token.lineno) + if not self.environment.parser_tolerate_faults: + self.fail("tag name expected", token.lineno) + return nodes.EmptyStatement( + message="tag name expected", lineno=token.lineno, linepos=token.linepos + ) self._tag_stack.append(token.value) pop_tag = True try: @@ -235,7 +239,8 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: expr = self.parse_tuple(allow_empty=self.environment.parser_tolerate_faults) result = nodes.Assign(target, expr, lineno=lineno, linepos=linepos) if isinstance(expr, nodes.EmptyExpression): - result.issues = [expr] + result.issues = result.issues or [] + result.issues.append(expr) expr.message = "Assignment to empty expression" return result filter_node = self.parse_filter(None) @@ -344,7 +349,19 @@ def parse_block(self) -> nodes.Block: ): self.fail("Required blocks can only contain comments or whitespace") - self.stream.skip_if("name:" + node.name) + if not self.environment.parser_tolerate_faults: + self.stream.skip_if("name:" + node.name) + elif self.stream.current.test("name"): + wrong = self.stream.expect("name") + if wrong.value != node.name: + node.issues = node.issues or [] + node.issues.append( + nodes.ParserIssue( + message=f"endblock used with incorrect name {wrong.value!r} for block {node.name!r}", + lineno=wrong.lineno, + linepos=wrong.linepos, + ) + ) return node def parse_extends(self) -> nodes.Extends: @@ -465,7 +482,8 @@ def parse_signature( message=msg, original_str=arg.name, ) - arg.issues = [err] + arg.issues = arg.issues or [] + arg.issues.append(err) args.append(arg) self.stream.expect("rparen") return None @@ -504,9 +522,6 @@ def parse_macro(self) -> nodes.Macro: if signature_issue: assert self.environment.parser_tolerate_faults node.args = signature_issue # type: ignore[assignment] - if node.issues is None: - node.issues = [] - node.issues.append(signature_issue) node.body = self.parse_statements(("name:endmacro",), drop_needle=True) return node @@ -870,7 +885,6 @@ def parse() -> nodes.Expr: linepos=linepos_start, message="Expected an expression", ) - empty.issues = [empty] return empty self.fail( "Expected an expression," @@ -939,8 +953,8 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: if token.type == "dot": attr_token = self.stream.current - next(self.stream) if attr_token.type == "name": + next(self.stream) return nodes.Getattr( node, attr_token.value, @@ -948,11 +962,21 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: lineno=token.lineno, linepos=token.linepos, ) - elif attr_token.type != "integer": - self.fail("expected name or number", attr_token.lineno) - arg = nodes.Const( - attr_token.value, lineno=attr_token.lineno, linepos=attr_token.linepos - ) + if attr_token.type != "integer": + if not self.environment.parser_tolerate_faults: + self.fail("expected name or number", attr_token.lineno) + arg = nodes.EmptyExpression( + message=f"Missing name for dot access! Got {attr_token.type}", + lineno=token.lineno, + linepos=token.linepos, + ) + else: + next(self.stream) + arg = nodes.Const( + attr_token.value, + lineno=attr_token.lineno, + linepos=attr_token.linepos, + ) return nodes.Getitem( node, arg, "load", lineno=token.lineno, linepos=token.linepos ) @@ -1088,8 +1112,26 @@ def parse_filter( while self.stream.current.type == "pipe" or start_inline: if not start_inline: next(self.stream) - token = self.stream.expect("name") - name = token.value + issues: list[nodes.ExprIssue] = [] + + def _get_name() -> str: + nonlocal issues + if ( + self.environment.parser_tolerate_faults + and not self.stream.current.test("name") + ): + issues.append( + nodes.EmptyExpression( + message="Missing name: Filter expected", + lineno=self.stream.current.lineno, + linepos=self.stream.current.linepos, + ) + ) + return "" + return self.stream.expect("name").value + + name = _get_name() + token = self.stream.current while self.stream.current.type == "dot": next(self.stream) name += "." + self.stream.expect("name").value @@ -1108,6 +1150,7 @@ def parse_filter( dyn_kwargs, lineno=token.lineno, linepos=token.linepos, + issues=issues, ) start_inline = False return node @@ -1119,10 +1162,29 @@ def parse_test(self, node: nodes.Expr) -> nodes.Expr: negated = True else: negated = False - name = self.stream.expect("name").value + issues: list[nodes.ExprIssue] = [] + + def _get_name() -> str: + nonlocal issues + if self.environment.parser_tolerate_faults and not self.stream.current.test( + "name" + ): + issues.append( + nodes.EmptyExpression( + message="Missing name: Test expected", + lineno=self.stream.current.lineno, + linepos=self.stream.current.linepos, + ) + ) + return "" + + return self.stream.expect("name").value + + name = _get_name() + while self.stream.current.type == "dot": next(self.stream) - name += "." + self.stream.expect("name").value + name += "." + _get_name() dyn_args = dyn_kwargs = None kwargs: list[nodes.Keyword] = [] if self.stream.current.type == "lparen": @@ -1152,6 +1214,7 @@ def parse_test(self, node: nodes.Expr) -> nodes.Expr: dyn_kwargs, lineno=token.lineno, linepos=token.linepos, + issues=issues, ) if negated: node = nodes.Not(node, lineno=token.lineno, linepos=token.linepos) @@ -1207,6 +1270,12 @@ def flush_data() -> None: if isinstance(rv, list): body.extend(rv) else: + if self.environment.parser_tolerate_faults and isinstance( + rv, (nodes.ParserIssue, nodes.EmptyStatement) + ): + rv = nodes.Output( + [rv], lineno=token.lineno, linepos=token.linepos + ) body.append(rv) self.stream.expect("block_end") else: From 5e8bdf86f61c9ca94caf8eb899b33fd963a10b78 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Mon, 3 Nov 2025 01:49:42 +0100 Subject: [PATCH 11/19] parser: track Blocko.endblock_with_name in fault_tolerant mode --- src/jinja2/nodes.py | 2 ++ src/jinja2/parser.py | 1 + 2 files changed, 3 insertions(+) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index ac88a472d..e584c7918 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -435,10 +435,12 @@ class Block(Stmt): """ fields = ("name", "body", "scoped", "required") + attributes = ("endblock_with_name",) name: str body: list[Node] scoped: bool required: bool + endblock_with_name: bool | None class Include(Stmt): diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 221efa12c..cce283ca1 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -352,6 +352,7 @@ def parse_block(self) -> nodes.Block: if not self.environment.parser_tolerate_faults: self.stream.skip_if("name:" + node.name) elif self.stream.current.test("name"): + node.endblock_with_name = True wrong = self.stream.expect("name") if wrong.value != node.name: node.issues = node.issues or [] From 738259a5ebf2a6e003651a0276b892579f16b57a Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Mon, 3 Nov 2025 21:20:13 +0100 Subject: [PATCH 12/19] nodes: add Node.linepos_end --- src/jinja2/nodes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index e584c7918..7abc34b2f 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -126,6 +126,7 @@ class Node(metaclass=NodeType): "environment", "issues", "lineno_end", + "linepos_end", ) abstract = True @@ -136,6 +137,7 @@ class Node(metaclass=NodeType): # only filled in diagnostic mode issues: list[t.Union["ParserIssue", "ExprIssue"]] lineno_end: int | None + linepos_end: int | None def __init__(self, *fields: t.Any, **attributes: t.Any) -> None: if self.abstract: From b166ebacccea5293c75dd98ee7f5dcb4861296cb Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Tue, 4 Nov 2025 01:58:48 +0100 Subject: [PATCH 13/19] lexer: add linepos to eof node --- src/jinja2/lexer.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index 1d63ce429..e14872882 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -272,7 +272,7 @@ class Token: lineno: int type: str value: str - linepos: int | None = None + linepos: int def __str__(self) -> str: return describe_token(self) @@ -336,7 +336,7 @@ def __init__( self.name = name self.filename = filename self.closed = False - self.current = Token(1, TOKEN_INITIAL, "") + self.current = Token(1, TOKEN_INITIAL, "", 0) next(self) def __iter__(self) -> TokenStreamIterator: @@ -399,7 +399,11 @@ def __next__(self) -> Token: def close(self) -> None: """Close the stream.""" - self.current = Token(self.current.lineno, TOKEN_EOF, "") + lineno, linepos = self.current.lineno, self.current.linepos + value = self.current.value + lineno += value.count("\n") + linepos += len(value.rsplit("\n", 1)[-1]) + self.current = Token(lineno, TOKEN_EOF, "", linepos=linepos) self._iter = iter(()) self.closed = True From 312771c5fea9ea6bbc0df89dd2fdefb96b044091 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Tue, 4 Nov 2025 01:59:35 +0100 Subject: [PATCH 14/19] parser: add lineno_end and linepos_end to all nodes --- src/jinja2/parser.py | 439 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 392 insertions(+), 47 deletions(-) diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index cce283ca1..563486750 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -160,7 +160,12 @@ def free_identifier( self._last_identifier += 1 rv = object.__new__(nodes.InternalName) nodes.Node.__init__( - rv, f"fi{self._last_identifier}", lineno=lineno, linepos=linepos + rv, + f"fi{self._last_identifier}", + lineno=lineno, + linepos=linepos, + lineno_end=lineno, + linepos_end=linepos, ) return rv @@ -170,8 +175,13 @@ def parse_statement(self) -> nodes.Node | list[nodes.Node]: if token.type != "name": if not self.environment.parser_tolerate_faults: self.fail("tag name expected", token.lineno) + nxt = self.stream.look() if not self.stream.closed else self.stream.current return nodes.EmptyStatement( - message="tag name expected", lineno=token.lineno, linepos=token.linepos + message="tag name expected", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) self._tag_stack.append(token.value) pop_tag = True @@ -237,7 +247,15 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: target = self.parse_assign_target(with_namespace=True) if self.stream.skip_if("assign"): expr = self.parse_tuple(allow_empty=self.environment.parser_tolerate_faults) - result = nodes.Assign(target, expr, lineno=lineno, linepos=linepos) + end_token = self.stream.current + result = nodes.Assign( + target, + expr, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) if isinstance(expr, nodes.EmptyExpression): result.issues = result.issues or [] result.issues.append(expr) @@ -245,8 +263,15 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: return result filter_node = self.parse_filter(None) body = self.parse_statements(("name:endset",), drop_needle=True) + end_token = self.stream.current return nodes.AssignBlock( - target, filter_node, body, lineno=lineno, linepos=linepos + target, + filter_node, + body, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, ) def parse_for(self) -> nodes.For: @@ -268,28 +293,52 @@ def parse_for(self) -> nodes.For: else_ = [] else: else_ = self.parse_statements(("name:endfor",), drop_needle=True) + end_token = self.stream.current return nodes.For( - target, iter, body, else_, test, recursive, lineno=lineno, linepos=linepos + target, + iter, + body, + else_, + test, + recursive, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, ) def parse_if(self) -> nodes.If: """Parse an if construct.""" + current = self.stream.current _next = self.stream.expect("name:if") - node = result = nodes.If(lineno=_next.lineno, linepos=_next.linepos) + node = result = nodes.If( + lineno=current.lineno, + linepos=current.linepos, + lineno_end=_next.lineno, + linepos_end=_next.linepos, + ) while True: node.test = self.parse_tuple(with_condexpr=False) node.body = self.parse_statements(("name:elif", "name:else", "name:endif")) node.elif_ = [] node.else_ = [] token = next(self.stream) + nxt = self.stream.look() if not self.stream.closed else self.stream.current if token.test("name:elif"): - _current = self.stream.current - node = nodes.If(lineno=_current.lineno, linepos=_current.linepos) + node = nodes.If( + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, + ) result.elif_.append(node) continue elif token.test("name:else"): result.else_ = self.parse_statements(("name:endif",), drop_needle=True) break + end_token = self.stream.current + result.lineno_end = end_token.lineno + result.linepos_end = end_token.linepos return result def parse_with(self) -> nodes.With: @@ -308,6 +357,9 @@ def parse_with(self) -> nodes.With: node.targets = targets node.values = values node.body = self.parse_statements(("name:endwith",), drop_needle=True) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_autoescape(self) -> nodes.Scope: @@ -317,7 +369,16 @@ def parse_autoescape(self) -> nodes.Scope: ) node.options = [nodes.Keyword("autoescape", self.parse_expression())] node.body = self.parse_statements(("name:endautoescape",), drop_needle=True) - return nodes.Scope([node], lineno=node.lineno, linepos=node.linepos) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos + return nodes.Scope( + [node], + lineno=node.lineno, + linepos=node.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) def parse_block(self) -> nodes.Block: _next = next(self.stream) @@ -361,14 +422,22 @@ def parse_block(self) -> nodes.Block: message=f"endblock used with incorrect name {wrong.value!r} for block {node.name!r}", lineno=wrong.lineno, linepos=wrong.linepos, + lineno_end=wrong.lineno, + linepos_end=wrong.linepos + len(wrong.value), ) ) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_extends(self) -> nodes.Extends: _next = next(self.stream) node = nodes.Extends(lineno=_next.lineno, linepos=_next.linepos) node.template = self.parse_expression() + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_import_context( @@ -394,7 +463,11 @@ def parse_include(self) -> nodes.Include: self.stream.skip(2) else: node.ignore_missing = False - return self.parse_import_context(node, True) + result = self.parse_import_context(node, True) + end_token = self.stream.current + result.lineno_end = end_token.lineno + result.linepos_end = end_token.linepos + return result def parse_import(self) -> nodes.Import: _next = next(self.stream) @@ -402,7 +475,11 @@ def parse_import(self) -> nodes.Import: node.template = self.parse_expression() self.stream.expect("name:as") node.target = self.parse_assign_target(name_only=True).name - return self.parse_import_context(node, False) + result = self.parse_import_context(node, False) + end_token = self.stream.current + result.lineno_end = end_token.lineno + result.linepos_end = end_token.linepos + return result def parse_from(self) -> nodes.FromImport: _next = next(self.stream) @@ -445,6 +522,9 @@ def parse_context() -> bool: self.stream.expect("name") if not hasattr(node, "with_context"): node.with_context = False + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_signature( @@ -461,6 +541,7 @@ def parse_signature( lineno=node.lineno, linepos=node.linepos, lineno_end=self.stream.current.lineno, + linepos_end=self.stream.current.linepos, message="Empty signature", ) @@ -480,6 +561,7 @@ def parse_signature( lineno=arg.lineno, linepos=arg.linepos, lineno_end=self.stream.current.lineno, + linepos_end=self.stream.current.linepos, message=msg, original_str=arg.name, ) @@ -506,6 +588,9 @@ def parse_call_block(self) -> nodes.CallBlock: self.fail("expected call", node.lineno) node.call = call_node node.body = self.parse_statements(("name:endcall",), drop_needle=True) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_filter_block(self) -> nodes.FilterBlock: @@ -513,6 +598,9 @@ def parse_filter_block(self) -> nodes.FilterBlock: node = nodes.FilterBlock(lineno=_next.lineno, linepos=_next.linepos) node.filter = self.parse_filter(None, start_inline=True) # type: ignore node.body = self.parse_statements(("name:endfilter",), drop_needle=True) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_macro(self) -> nodes.Macro: @@ -524,6 +612,9 @@ def parse_macro(self) -> nodes.Macro: assert self.environment.parser_tolerate_faults node.args = signature_issue # type: ignore[assignment] node.body = self.parse_statements(("name:endmacro",), drop_needle=True) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node def parse_print(self) -> nodes.Output: @@ -534,6 +625,9 @@ def parse_print(self) -> nodes.Output: if node.nodes: self.stream.expect("comma") node.nodes.append(self.parse_expression()) + end_token = self.stream.current + node.lineno_end = end_token.lineno + node.linepos_end = end_token.linepos return node @typing.overload @@ -569,8 +663,14 @@ def parse_assign_target( if name_only: token = self.stream.expect("name") + nxt = self.stream.look() if not self.stream.closed else self.stream.current target = nodes.Name( - token.value, "store", lineno=token.lineno, linepos=token.linepos + token.value, + "store", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) else: if with_tuple: @@ -612,7 +712,16 @@ def parse_condexpr(self) -> nodes.Expr: expr3 = self.parse_condexpr() else: expr3 = None - expr1 = nodes.CondExpr(expr2, expr1, expr3, lineno=lineno, linepos=linepos) + end_token = self.stream.current + expr1 = nodes.CondExpr( + expr2, + expr1, + expr3, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) lineno = self.stream.current.lineno linepos = self.stream.current.linepos return expr1 @@ -623,7 +732,15 @@ def parse_or(self) -> nodes.Expr: left = self.parse_and() while self.stream.skip_if("name:or"): right = self.parse_and() - left = nodes.Or(left, right, lineno=lineno, linepos=linepos) + end_token = self.stream.current + left = nodes.Or( + left, + right, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) lineno = self.stream.current.lineno linepos = self.stream.current.linepos return left @@ -634,7 +751,15 @@ def parse_and(self) -> nodes.Expr: left = self.parse_not() while self.stream.skip_if("name:and"): right = self.parse_not() - left = nodes.And(left, right, lineno=lineno, linepos=linepos) + end_token = self.stream.current + left = nodes.And( + left, + right, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) lineno = self.stream.current.lineno linepos = self.stream.current.linepos return left @@ -644,7 +769,15 @@ def parse_not(self) -> nodes.Expr: _next = next(self.stream) lineno = _next.lineno linepos = _next.linepos - return nodes.Not(self.parse_not(), lineno=lineno, linepos=linepos) + result = self.parse_not() + end_token = self.stream.current + return nodes.Not( + result, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) return self.parse_compare() def parse_compare(self) -> nodes.Expr: @@ -657,46 +790,63 @@ def parse_compare(self) -> nodes.Expr: token_type = token.type if token_type in _compare_operators: next(self.stream) - token = self.stream.current + nxt = self.stream.current ops.append( nodes.Operand( token_type, self.parse_math1(), lineno=token.lineno, linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) ) elif self.stream.skip_if("name:in"): token = self.stream.current + nxt = self.stream.look() if not self.stream.closed else token ops.append( nodes.Operand( "in", self.parse_math1(), lineno=token.lineno, linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) ) elif self.stream.current.test("name:not") and self.stream.look().test( "name:in" ): - self.stream.skip(2) token = self.stream.current + self.stream.skip(2) + nxt = ( + self.stream.look() + if not self.stream.closed + else self.stream.current + ) ops.append( nodes.Operand( "notin", self.parse_math1(), lineno=token.lineno, linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) ) else: break - lineno = self.stream.current.lineno - linepos = self.stream.current.linepos if not ops: return expr - token = self.stream.current - return nodes.Compare(expr, ops, lineno=lineno, linepos=linepos) + end_token = self.stream.current + return nodes.Compare( + expr, + ops, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) def parse_math1(self) -> nodes.Expr: lineno = self.stream.current.lineno @@ -706,7 +856,15 @@ def parse_math1(self) -> nodes.Expr: cls = _math_nodes[self.stream.current.type] next(self.stream) right = self.parse_concat() - left = cls(left, right, lineno=lineno, linepos=linepos) + end_token = self.stream.current + left = cls( + left, + right, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) lineno = self.stream.current.lineno linepos = self.stream.current.linepos return left @@ -720,7 +878,14 @@ def parse_concat(self) -> nodes.Expr: args.append(self.parse_math2()) if len(args) == 1: return args[0] - return nodes.Concat(args, lineno=lineno, linepos=linepos) + end_token = self.stream.current + return nodes.Concat( + args, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) def parse_math2(self) -> nodes.Expr: lineno = self.stream.current.lineno @@ -730,7 +895,15 @@ def parse_math2(self) -> nodes.Expr: cls = _math_nodes[self.stream.current.type] next(self.stream) right = self.parse_pow() - left = cls(left, right, lineno=lineno, linepos=linepos) + end_token = self.stream.current + left = cls( + left, + right, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) lineno = self.stream.current.lineno linepos = self.stream.current.linepos return left @@ -742,7 +915,15 @@ def parse_pow(self) -> nodes.Expr: while self.stream.current.type == "pow": next(self.stream) right = self.parse_unary() - left = nodes.Pow(left, right, lineno=lineno, linepos=linepos) + end_token = self.stream.current + left = nodes.Pow( + left, + right, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) lineno = self.stream.current.lineno linepos = self.stream.current.linepos return left @@ -755,10 +936,26 @@ def parse_unary(self, with_filter: bool = True) -> nodes.Expr: if token_type == "sub": next(self.stream) - node = nodes.Neg(self.parse_unary(False), lineno=lineno, linepos=linepos) + inner = self.parse_unary(False) + end_token = self.stream.current + node = nodes.Neg( + inner, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) elif token_type == "add": next(self.stream) - node = nodes.Pos(self.parse_unary(False), lineno=lineno, linepos=linepos) + inner = self.parse_unary(False) + end_token = self.stream.current + node = nodes.Pos( + inner, + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) else: node = self.parse_primary() node = self.parse_postfix(node) @@ -778,32 +975,71 @@ def parse_primary(self, with_namespace: bool = False) -> nodes.Expr: token.value in ("true", "True"), lineno=token.lineno, linepos=token.linepos, + lineno_end=token.lineno, + linepos_end=token.linepos + len(token.value), ) elif token.value in ("none", "None"): - node = nodes.Const(None, lineno=token.lineno, linepos=token.linepos) + node = nodes.Const( + None, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=token.lineno, + linepos_end=token.linepos + len(token.value), + ) elif with_namespace and self.stream.current.type == "dot": # If namespace attributes are allowed at this point, and the next # token is a dot, produce a namespace reference. next(self.stream) attr = self.stream.expect("name") + nxt = self.stream.current node = nodes.NSRef( - token.value, attr.value, lineno=token.lineno, linepos=token.linepos + token.value, + attr.value, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) else: + nxt = ( + self.stream.look() + if not self.stream.closed + else self.stream.current + ) node = nodes.Name( - token.value, "load", lineno=token.lineno, linepos=token.linepos + token.value, + "load", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) elif token.type == "string": next(self.stream) buf = [token.value] lineno = token.lineno + linepos_end = token.linepos while self.stream.current.type == "string": buf.append(self.stream.current.value) + linepos_end = self.stream.current.linepos next(self.stream) - node = nodes.Const("".join(buf), lineno=lineno, linepos=token.linepos) + node = nodes.Const( + "".join(buf), + lineno=lineno, + linepos=token.linepos, + lineno_end=self.stream.current.lineno, + linepos_end=linepos_end, + ) elif token.type in ("integer", "float"): next(self.stream) - node = nodes.Const(token.value, lineno=token.lineno, linepos=token.linepos) + nxt = self.stream.look() if not self.stream.closed else self.stream.current + node = nodes.Const( + token.value, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, + ) elif token.type == "lparen": next(self.stream) node = self.parse_tuple(explicit_parentheses=True) @@ -884,6 +1120,8 @@ def parse() -> nodes.Expr: empty = nodes.EmptyExpression( lineno=lineno, linepos=linepos_start, + lineno_end=self.stream.current.lineno, + linepos_end=self.stream.current.linepos, message="Expected an expression", ) return empty @@ -891,7 +1129,15 @@ def parse() -> nodes.Expr: "Expected an expression," f" got {describe_token(self.stream.current)!r}" ) - return nodes.Tuple(args, "load", lineno=lineno, linepos=linepos_start) + end_token = self.stream.current + return nodes.Tuple( + args, + "load", + lineno=lineno, + linepos=linepos_start, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) def parse_list(self) -> nodes.List: token = self.stream.expect("lbracket") @@ -902,8 +1148,14 @@ def parse_list(self) -> nodes.List: if self.stream.current.type == "rbracket": break items.append(self.parse_expression()) - self.stream.expect("rbracket") - return nodes.List(items, lineno=token.lineno, linepos=token.linepos) + end_token = self.stream.expect("rbracket") + return nodes.List( + items, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) def parse_dict(self) -> nodes.Dict: token = self.stream.expect("lbrace") @@ -917,8 +1169,14 @@ def parse_dict(self) -> nodes.Dict: self.stream.expect("colon") value = self.parse_expression() items.append(nodes.Pair(key, value, lineno=key.lineno, linepos=key.linepos)) - self.stream.expect("rbrace") - return nodes.Dict(items, lineno=token.lineno, linepos=token.linepos) + end_token = self.stream.expect("rbrace") + return nodes.Dict( + items, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) def parse_postfix(self, node: nodes.Expr) -> nodes.Expr: while True: @@ -962,6 +1220,8 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: "load", lineno=token.lineno, linepos=token.linepos, + lineno_end=attr_token.lineno, + linepos_end=attr_token.linepos, ) if attr_token.type != "integer": if not self.environment.parser_tolerate_faults: @@ -970,6 +1230,8 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: message=f"Missing name for dot access! Got {attr_token.type}", lineno=token.lineno, linepos=token.linepos, + lineno_end=attr_token.lineno, + linepos_end=attr_token.linepos, ) else: next(self.stream) @@ -977,9 +1239,18 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: attr_token.value, lineno=attr_token.lineno, linepos=attr_token.linepos, + lineno_end=attr_token.lineno, + linepos_end=attr_token.linepos, ) + end_token = self.stream.current return nodes.Getitem( - node, arg, "load", lineno=token.lineno, linepos=token.linepos + node, + arg, + "load", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, ) if token.type == "lbracket": args: list[nodes.Expr] = [] @@ -987,15 +1258,26 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: if args: self.stream.expect("comma") args.append(self.parse_subscribed()) - self.stream.expect("rbracket") + end_token = self.stream.expect("rbracket") if len(args) == 1: arg = args[0] else: arg = nodes.Tuple( - args, "load", lineno=token.lineno, linepos=token.linepos + args, + "load", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, ) return nodes.Getitem( - node, arg, "load", lineno=token.lineno, linepos=token.linepos + node, + arg, + "load", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, ) self.fail("expected subscript expression", token.lineno) @@ -1030,7 +1312,14 @@ def parse_subscribed(self) -> nodes.Expr: else: args.append(None) - return nodes.Slice(lineno=lineno, linepos=linepos, *args) # noqa: B026 + end_token = self.stream.current + return nodes.Slice( + lineno=lineno, + linepos=linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + *args, + ) # noqa: B026 def parse_call_args( self, @@ -1097,6 +1386,7 @@ def parse_call(self, node: nodes.Expr) -> nodes.Call: # needs to be recorded before the stream is advanced. token = self.stream.current args, kwargs, dyn_args, dyn_kwargs = self.parse_call_args() + end_token = self.stream.current return nodes.Call( node, args, @@ -1105,6 +1395,8 @@ def parse_call(self, node: nodes.Expr) -> nodes.Call: dyn_kwargs, lineno=token.lineno, linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, ) def parse_filter( @@ -1126,6 +1418,8 @@ def _get_name() -> str: message="Missing name: Filter expected", lineno=self.stream.current.lineno, linepos=self.stream.current.linepos, + lineno_end=self.stream.current.lineno, + linepos_end=self.stream.current.linepos, ) ) return "" @@ -1142,6 +1436,7 @@ def _get_name() -> str: args = [] kwargs = [] dyn_args = dyn_kwargs = None + end_token = self.stream.current node = nodes.Filter( node, name, @@ -1151,6 +1446,8 @@ def _get_name() -> str: dyn_kwargs, lineno=token.lineno, linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, issues=issues, ) start_inline = False @@ -1175,6 +1472,8 @@ def _get_name() -> str: message="Missing name: Test expected", lineno=self.stream.current.lineno, linepos=self.stream.current.linepos, + lineno_end=self.stream.current.lineno, + linepos_end=self.stream.current.linepos, ) ) return "" @@ -1206,6 +1505,7 @@ def _get_name() -> str: args = [arg_node] else: args = [] + end_token = self.stream.current node = nodes.Test( node, name, @@ -1215,10 +1515,18 @@ def _get_name() -> str: dyn_kwargs, lineno=token.lineno, linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, issues=issues, ) if negated: - node = nodes.Not(node, lineno=token.lineno, linepos=token.linepos) + node = nodes.Not( + node, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end_token.lineno, + linepos_end=end_token.linepos, + ) return node def subparse(self, end_tokens: tuple[str, ...] | None = None) -> list[nodes.Node]: @@ -1233,8 +1541,16 @@ def flush_data() -> None: if data_buffer: lineno = data_buffer[0].lineno linepos = data_buffer[0].linepos + lineno_end = data_buffer[-1].lineno_end + linepos_end = data_buffer[-1].linepos_end body.append( - nodes.Output(data_buffer[:], lineno=lineno, linepos=linepos) + nodes.Output( + data_buffer[:], + lineno=lineno, + linepos=linepos, + lineno_end=lineno_end, + linepos_end=linepos_end, + ) ) del data_buffer[:] @@ -1242,10 +1558,21 @@ def flush_data() -> None: while self.stream: token = self.stream.current if token.type == "data": + if "\n" not in token.value: + end = token.lineno, token.linepos + len(token.value) + else: + end = ( + token.lineno + token.value.count("\n"), + len(token.value.rsplit("\n", 1)[-1]), + ) if token.value: add_data( nodes.TemplateData( - token.value, lineno=token.lineno, linepos=token.linepos + token.value, + lineno=token.lineno, + linepos=token.linepos, + lineno_end=end[0], + linepos_end=end[1], ) ) next(self.stream) @@ -1257,6 +1584,8 @@ def flush_data() -> None: ) if isinstance(data, nodes.EmptyExpression): data.lineno, data.linepos = token.lineno, token.linepos + nxt = self.stream.current + data.lineno_end, data.linepos_end = nxt.lineno, nxt.linepos data.message = "Empty expression inside print statement" add_data(data) self.stream.expect("variable_end") @@ -1271,11 +1600,16 @@ def flush_data() -> None: if isinstance(rv, list): body.extend(rv) else: + nxt = self.stream.current if self.environment.parser_tolerate_faults and isinstance( rv, (nodes.ParserIssue, nodes.EmptyStatement) ): rv = nodes.Output( - [rv], lineno=token.lineno, linepos=token.linepos + [rv], + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) body.append(rv) self.stream.expect("block_end") @@ -1292,4 +1626,15 @@ def parse(self) -> nodes.Template: """Parse the whole template into a `Template` node.""" result = nodes.Template(self.subparse(), lineno=1, linepos=0) result.set_environment(self.environment) + # Set end position to the last token + end_token = self.stream.current + end_element = result.body[-1] if result.body else None + end = max( + (end_token.lineno, end_token.linepos), + (-1, -1) + if not end_element + else (end_element.lineno_end, end_element.linepos_end), + ) + result.lineno_end = end[0] + result.linepos_end = end[1] return result From 74589c4c5950c24a1fe95c8efa13cf6380378469 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Wed, 5 Nov 2025 04:42:51 +0100 Subject: [PATCH 15/19] parser: tolerate faults on parsing empty If block test --- src/jinja2/parser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 563486750..b1b9a198c 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -318,7 +318,10 @@ def parse_if(self) -> nodes.If: linepos_end=_next.linepos, ) while True: - node.test = self.parse_tuple(with_condexpr=False) + node.test = self.parse_tuple( + with_condexpr=False, + allow_empty=self.environment.parser_tolerate_faults, + ) node.body = self.parse_statements(("name:elif", "name:else", "name:endif")) node.elif_ = [] node.else_ = [] From 10fd3b2168632794b8d0c801aad4cfb54a22b7af Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sun, 9 Nov 2025 05:37:11 +0100 Subject: [PATCH 16/19] parser: more fault-tolerance --- src/jinja2/parser.py | 80 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index b1b9a198c..184e36df7 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -245,7 +245,8 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: lineno = _next.lineno linepos = _next.linepos target = self.parse_assign_target(with_namespace=True) - if self.stream.skip_if("assign"): + expr_start = self.stream.next_if("assign") + if expr_start: expr = self.parse_tuple(allow_empty=self.environment.parser_tolerate_faults) end_token = self.stream.current result = nodes.Assign( @@ -257,9 +258,9 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: linepos_end=end_token.linepos, ) if isinstance(expr, nodes.EmptyExpression): - result.issues = result.issues or [] - result.issues.append(expr) expr.message = "Assignment to empty expression" + expr.lineno, expr.linepos = expr_start.lineno, expr_start.linepos + expr.linepos_end += 1 return result filter_node = self.parse_filter(None) body = self.parse_statements(("name:endset",), drop_needle=True) @@ -280,10 +281,16 @@ def parse_for(self) -> nodes.For: lineno = _next.lineno linepos = _next.linepos target = self.parse_assign_target(extra_end_rules=("name:in",)) - self.stream.expect("name:in") + iter_start = self.stream.expect("name:in") iter = self.parse_tuple( - with_condexpr=False, extra_end_rules=("name:recursive",) + with_condexpr=False, + extra_end_rules=("name:recursive",), + allow_empty=self.environment.parser_tolerate_faults, ) + if isinstance(iter, nodes.EmptyExpression): + iter.message = "Empty For-loop iterator" + iter.lineno, iter.linepos = iter_start.lineno, iter_start.linepos + iter.linepos_end += 1 test = None if self.stream.skip_if("name:if"): test = self.parse_expression() @@ -540,13 +547,16 @@ def parse_signature( and self.stream.current.type != "lparen" ): if self.stream.current.type == "block_end": - return nodes.EmptyExpression( # type: ignore[assignment] + node.issues = node.issues or [] + issue = nodes.EmptyExpression( # type: ignore[assignment] lineno=node.lineno, linepos=node.linepos, lineno_end=self.stream.current.lineno, linepos_end=self.stream.current.linepos, - message="Empty signature", + message=f"Missing {type(node).__name__} signature", ) + node.issues.append(issue) + return issue self.stream.expect("lparen") while self.stream.current.type != "rparen": @@ -588,7 +598,19 @@ def parse_call_block(self) -> nodes.CallBlock: call_node = self.parse_expression() if not isinstance(call_node, nodes.Call): - self.fail("expected call", node.lineno) + if not ( + self.environment.parser_tolerate_faults or isinstance(call_node, Name) + ): + self.fail("expected call", node.lineno) + call_node.issues = call_node.issues or [] + issue = nodes.EmptyExpression( + message="Expected function call; missing parentheses", + lineno=call_node.lineno, + linepos=call_node.linepos, + lineno_end=call_node.lineno_end, + linepos_end=call_node.linepos_end, + ) + call_node.issues.append(issue) node.call = call_node node.body = self.parse_statements(("name:endcall",), drop_needle=True) end_token = self.stream.current @@ -611,9 +633,6 @@ def parse_macro(self) -> nodes.Macro: node = nodes.Macro(lineno=_next.lineno, linepos=_next.linepos, issues=None) node.name = self.parse_assign_target(name_only=True).name signature_issue = self.parse_signature(node) - if signature_issue: - assert self.environment.parser_tolerate_faults - node.args = signature_issue # type: ignore[assignment] node.body = self.parse_statements(("name:endmacro",), drop_needle=True) end_token = self.stream.current node.lineno_end = end_token.lineno @@ -733,9 +752,12 @@ def parse_or(self) -> nodes.Expr: lineno = self.stream.current.lineno linepos = self.stream.current.linepos left = self.parse_and() - while self.stream.skip_if("name:or"): + while self.stream.current.test("name:or"): + token = next(self.stream) right = self.parse_and() end_token = self.stream.current + if isinstance(right, nodes.EmptyExpression): + right.lineno, right.linepos = token.lineno, token.linepos left = nodes.Or( left, right, @@ -752,8 +774,11 @@ def parse_and(self) -> nodes.Expr: lineno = self.stream.current.lineno linepos = self.stream.current.linepos left = self.parse_not() - while self.stream.skip_if("name:and"): + while self.stream.current.test("name:and"): + token = next(self.stream) right = self.parse_not() + if isinstance(right, nodes.EmptyExpression): + right.lineno, right.linepos = token.lineno, token.linepos end_token = self.stream.current left = nodes.And( left, @@ -805,7 +830,6 @@ def parse_compare(self) -> nodes.Expr: ) ) elif self.stream.skip_if("name:in"): - token = self.stream.current nxt = self.stream.look() if not self.stream.closed else token ops.append( nodes.Operand( @@ -840,6 +864,8 @@ def parse_compare(self) -> nodes.Expr: else: break if not ops: + if isinstance(expr, nodes.EmptyExpression): + expr.lineno, expr.linepos = lineno, linepos return expr end_token = self.stream.current return nodes.Compare( @@ -961,6 +987,7 @@ def parse_unary(self, with_filter: bool = True) -> nodes.Expr: ) else: node = self.parse_primary() + node.lineno, node.linepos = lineno, linepos node = self.parse_postfix(node) if with_filter: node = self.parse_filter_expr(node) @@ -1052,7 +1079,24 @@ def parse_primary(self, with_namespace: bool = False) -> nodes.Expr: elif token.type == "lbrace": node = self.parse_dict() else: - self.fail(f"unexpected {describe_token(token)!r}", token.lineno) + msg = f"unexpected {describe_token(token)!r}" + if not self.environment.parser_tolerate_faults: + self.fail(msg, token.lineno) + if token.type == "variable_end": + nxt = ( + self.stream.look() + if not self.stream.closed + else self.stream.current + ) + node = nodes.EmptyExpression( + message="Unexpected end of print statement", + lineno=token.lineno, + linepos=token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, + ) + else: + self.fail(msg, token.lineno) return node def parse_tuple( @@ -1084,6 +1128,7 @@ def parse_tuple( tuple is a valid expression or not. """ lineno = self.stream.current.lineno + lineno_start = lineno if simplified: def parse() -> nodes.Expr: @@ -1121,7 +1166,7 @@ def parse() -> nodes.Expr: if not explicit_parentheses: if allow_empty: empty = nodes.EmptyExpression( - lineno=lineno, + lineno=lineno_start, linepos=linepos_start, lineno_end=self.stream.current.lineno, linepos_end=self.stream.current.linepos, @@ -1589,6 +1634,8 @@ def flush_data() -> None: data.lineno, data.linepos = token.lineno, token.linepos nxt = self.stream.current data.lineno_end, data.linepos_end = nxt.lineno, nxt.linepos + if nxt.type == "variable_end": + data.linepos_end += len(nxt.value) data.message = "Empty expression inside print statement" add_data(data) self.stream.expect("variable_end") @@ -1607,6 +1654,7 @@ def flush_data() -> None: if self.environment.parser_tolerate_faults and isinstance( rv, (nodes.ParserIssue, nodes.EmptyStatement) ): + rv.lineno, rv.linepos = token.lineno, token.linepos rv = nodes.Output( [rv], lineno=token.lineno, From f40c82c720c546ca789e361ba623d93315456362 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Mon, 10 Nov 2025 06:35:25 +0100 Subject: [PATCH 17/19] add issue_context to issue nodes --- src/jinja2/nodes.py | 9 ++++++--- src/jinja2/parser.py | 21 ++++++++++++++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 7abc34b2f..5e30792bd 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -305,8 +305,9 @@ def _dump(node: Node | t.Any) -> None: class ParserIssue(Node): - attributes: tuple[str, ...] = ("message",) + attributes: tuple[str, ...] = ("message", "issue_context") message: str + issue_context: str | None class Stmt(Node): @@ -320,8 +321,9 @@ class EmptyStatement(Stmt): Returned in Fault-tolerant Mode only """ - attributes: tuple[str] = ("message",) + attributes: tuple[str, ...] = ("message", "issue_context") message: str | None + issue_context: str | None class Helper(Node): @@ -529,8 +531,9 @@ def can_assign(self) -> bool: class ExprIssue(Expr): - attributes: tuple[str, ...] = ("message",) + attributes: tuple[str, ...] = ("message", "issue_context") message: str + issue_context: str | None class EmptyExpression(ExprIssue): diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 184e36df7..7bb00f9ca 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -177,11 +177,12 @@ def parse_statement(self) -> nodes.Node | list[nodes.Node]: self.fail("tag name expected", token.lineno) nxt = self.stream.look() if not self.stream.closed else self.stream.current return nodes.EmptyStatement( - message="tag name expected", + message="Tag name expected", lineno=token.lineno, linepos=token.linepos, lineno_end=nxt.lineno, linepos_end=nxt.linepos, + issue_context="tag", ) self._tag_stack.append(token.value) pop_tag = True @@ -259,6 +260,7 @@ def parse_set(self) -> nodes.Assign | nodes.AssignBlock: ) if isinstance(expr, nodes.EmptyExpression): expr.message = "Assignment to empty expression" + expr.issue_context = "assignment" expr.lineno, expr.linepos = expr_start.lineno, expr_start.linepos expr.linepos_end += 1 return result @@ -287,10 +289,15 @@ def parse_for(self) -> nodes.For: extra_end_rules=("name:recursive",), allow_empty=self.environment.parser_tolerate_faults, ) - if isinstance(iter, nodes.EmptyExpression): + if self.environment.parser_tolerate_faults and isinstance( + iter, nodes.EmptyExpression + ): iter.message = "Empty For-loop iterator" iter.lineno, iter.linepos = iter_start.lineno, iter_start.linepos + assert iter.linepos_end is not None iter.linepos_end += 1 + iter.issue_context = "for_iterator" + test = None if self.stream.skip_if("name:if"): test = self.parse_expression() @@ -434,6 +441,7 @@ def parse_block(self) -> nodes.Block: linepos=wrong.linepos, lineno_end=wrong.lineno, linepos_end=wrong.linepos + len(wrong.value), + issue_context="endblock", ) ) end_token = self.stream.current @@ -554,6 +562,7 @@ def parse_signature( lineno_end=self.stream.current.lineno, linepos_end=self.stream.current.linepos, message=f"Missing {type(node).__name__} signature", + issue_context="signature", ) node.issues.append(issue) return issue @@ -609,6 +618,7 @@ def parse_call_block(self) -> nodes.CallBlock: linepos=call_node.linepos, lineno_end=call_node.lineno_end, linepos_end=call_node.linepos_end, + issue_context="function_call", ) call_node.issues.append(issue) node.call = call_node @@ -1089,11 +1099,12 @@ def parse_primary(self, with_namespace: bool = False) -> nodes.Expr: else self.stream.current ) node = nodes.EmptyExpression( - message="Unexpected end of print statement", + message="Unexpected end of primary statement", lineno=token.lineno, linepos=token.linepos, lineno_end=nxt.lineno, linepos_end=nxt.linepos, + issue_context="primary", ) else: self.fail(msg, token.lineno) @@ -1280,6 +1291,7 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: linepos=token.linepos, lineno_end=attr_token.lineno, linepos_end=attr_token.linepos, + issue_context="attribute", ) else: next(self.stream) @@ -1468,6 +1480,7 @@ def _get_name() -> str: linepos=self.stream.current.linepos, lineno_end=self.stream.current.lineno, linepos_end=self.stream.current.linepos, + issue_context="filter", ) ) return "" @@ -1522,6 +1535,7 @@ def _get_name() -> str: linepos=self.stream.current.linepos, lineno_end=self.stream.current.lineno, linepos_end=self.stream.current.linepos, + issue_context="test", ) ) return "" @@ -1637,6 +1651,7 @@ def flush_data() -> None: if nxt.type == "variable_end": data.linepos_end += len(nxt.value) data.message = "Empty expression inside print statement" + data.issue_context = "print" add_data(data) self.stream.expect("variable_end") elif token.type == "block_begin": From 48a133f636c8976b6621411f3fa4c74d7c56b581 Mon Sep 17 00:00:00 2001 From: InsanePrawn Date: Sat, 6 Dec 2025 12:08:39 +0100 Subject: [PATCH 18/19] more lineno and errors is always list of nodes --- src/jinja2/nodes.py | 2 +- src/jinja2/parser.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 5e30792bd..05bfbd70e 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -135,7 +135,7 @@ class Node(metaclass=NodeType): environment: t.Optional["Environment"] # only filled in diagnostic mode - issues: list[t.Union["ParserIssue", "ExprIssue"]] + issues: list["Node"] lineno_end: int | None linepos_end: int | None diff --git a/src/jinja2/parser.py b/src/jinja2/parser.py index 7bb00f9ca..018aac08d 100644 --- a/src/jinja2/parser.py +++ b/src/jinja2/parser.py @@ -1273,14 +1273,15 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: attr_token = self.stream.current if attr_token.type == "name": next(self.stream) + nxt = self.stream.current return nodes.Getattr( node, attr_token.value, "load", lineno=token.lineno, linepos=token.linepos, - lineno_end=attr_token.lineno, - linepos_end=attr_token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) if attr_token.type != "integer": if not self.environment.parser_tolerate_faults: @@ -1295,12 +1296,13 @@ def parse_subscript(self, node: nodes.Expr) -> nodes.Getattr | nodes.Getitem: ) else: next(self.stream) + nxt = self.stream.current arg = nodes.Const( attr_token.value, lineno=attr_token.lineno, linepos=attr_token.linepos, - lineno_end=attr_token.lineno, - linepos_end=attr_token.linepos, + lineno_end=nxt.lineno, + linepos_end=nxt.linepos, ) end_token = self.stream.current return nodes.Getitem( From 1333f8b73065b67f709ae656c1541d200972c682 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Sun, 17 May 2026 18:36:27 +0000 Subject: [PATCH 19/19] [pre-commit.ci lite] apply automatic fixes --- src/jinja2/lexer.py | 2 +- src/jinja2/nodes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/jinja2/lexer.py b/src/jinja2/lexer.py index e14872882..7cd5c9cf0 100644 --- a/src/jinja2/lexer.py +++ b/src/jinja2/lexer.py @@ -7,8 +7,8 @@ import re import typing as t from ast import literal_eval -from dataclasses import dataclass from collections import deque +from dataclasses import dataclass from sys import intern from ._identifier import pattern as name_re diff --git a/src/jinja2/nodes.py b/src/jinja2/nodes.py index 05bfbd70e..e7631c7e3 100644 --- a/src/jinja2/nodes.py +++ b/src/jinja2/nodes.py @@ -189,7 +189,7 @@ def iter_child_nodes( over all fields and yields the values of they are nodes. If the value of a field is a list all the nodes in that list are returned. """ - items: t.Iterable[t.Tuple[str, t.Any]] = self.iter_fields(exclude, only) + items: t.Iterable[tuple[str, t.Any]] = self.iter_fields(exclude, only) if reverse: items = reversed(list(items)) for _, item in items: